EDIT: While I wasn’t able to fix the problem, I simply circumvented. Since from my research the version of pytorch I was using and torchvision should be compatible, I wasn’t too sure what exactly wasn’t working. But since it wasn’t I just searched for an alternative solution, doing so I found out cv2 already has a RGB to grayscale method and that there was no need to use pytorch for that. So I did. My loader now seems to work.
Howdy,
I am having an issue with the following code. I am trying to implement a project to test my understanding of the first part of the Deep Learning for coders course, and I figured I’d do something with pytorch.
I tried solving an exception for a bit now, but I can’t seem to find what’s not working.
For my load_video method, I first transform the video into frames using cv2, and then do the transformations (to_grayscale) using torchvision.transforms.
but, I am having this weird issue where the import itself fails.
I am using torch 2.1.2 with torchvision 0.16.2.
import torchvision.transforms as transforms
is giving me the following error
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[26], line 1
----> 1 import torchvision.transforms as transforms
File /opt/conda/lib/python3.10/site-packages/torchvision/__init__.py:6
3 from modulefinder import Module
5 import torch
----> 6 from torchvision import _meta_registrations, datasets, io, models, ops, transforms, utils
8 from .extension import _HAS_OPS
10 try:
File /opt/conda/lib/python3.10/site-packages/torchvision/models/__init__.py:2
1 from .alexnet import *
----> 2 from .convnext import *
3 from .densenet import *
4 from .efficientnet import *
File /opt/conda/lib/python3.10/site-packages/torchvision/models/convnext.py:9
6 from torch.nn import functional as F
8 from ..ops.misc import Conv2dNormActivation, Permute
----> 9 from ..ops.stochastic_depth import StochasticDepth
10 from ..transforms._presets import ImageClassification
11 from ..utils import _log_api_usage_once
File /opt/conda/lib/python3.10/site-packages/torchvision/ops/__init__.py:23
21 from .giou_loss import generalized_box_iou_loss
22 from .misc import Conv2dNormActivation, Conv3dNormActivation, FrozenBatchNorm2d, MLP, Permute, SqueezeExcitation
---> 23 from .poolers import MultiScaleRoIAlign
24 from .ps_roi_align import ps_roi_align, PSRoIAlign
25 from .ps_roi_pool import ps_roi_pool, PSRoIPool
File /opt/conda/lib/python3.10/site-packages/torchvision/ops/poolers.py:10
7 from torchvision.ops.boxes import box_area
9 from ..utils import _log_api_usage_once
---> 10 from .roi_align import roi_align
13 # copying result_idx_in_level to a specific index in result[]
14 # is not supported by ONNX tracing yet.
15 # _onnx_merge_levels() is an implementation supported by ONNX
16 # that merges the levels to the right indices
17 @torch.jit.unused
18 def _onnx_merge_levels(levels: Tensor, unmerged_results: List[Tensor]) -> Tensor:
File /opt/conda/lib/python3.10/site-packages/torchvision/ops/roi_align.py:4
1 from typing import List, Union
3 import torch
----> 4 import torch._dynamo
5 import torch.fx
6 from torch import nn, Tensor
File /opt/conda/lib/python3.10/site-packages/torch/_dynamo/__init__.py:2
1 import torch
----> 2 from . import allowed_functions, convert_frame, eval_frame, resume_execution
3 from .backends.registry import list_backends, register_backend
4 from .convert_frame import replay
File /opt/conda/lib/python3.10/site-packages/torch/_dynamo/allowed_functions.py:23
19 np = None
22 import torch
---> 23 import torch._functorch.deprecated as deprecated_func
24 from torch.fx._symbolic_trace import is_fx_tracing
26 from . import config
File /opt/conda/lib/python3.10/site-packages/torch/_functorch/deprecated.py:1
----> 1 import torch._functorch.apis as apis
2 import torch._functorch.eager_transforms as _impl
3 import torch._functorch.make_functional as _nn_impl
File /opt/conda/lib/python3.10/site-packages/torch/_functorch/apis.py:6
1 # NOTE: We allow Dynamo to see this file (via torch/_dynamo/skipfiles.py) so that it can
2 # trace through functorch transforms.
3 # Currently, we can't allow Dynamo to see `eager_transforms.py`/`vmap.py` as that break a lot of thing
4 # and there isn't a mechanism to selectively expose only some functions (eg. grad) from a file
5 # to Dynamo.
----> 6 from torch._functorch.vmap import (vmap_impl, _check_randomness_arg,
7 Callable, in_dims_t, out_dims_t, _check_out_dims_is_int_or_int_pytree,
8 _process_batched_inputs, _chunked_vmap)
9 from torch._functorch.utils import exposed_in, argnums_t
10 import functools
ImportError: cannot import name 'vmap_impl' from 'torch._functorch.vmap' (/opt/conda/lib/python3.10/site-packages/torch/_functorch/vmap.py)
My code for reference:
def load_video(path:str) -> List[float]:
capture = cv2.VideoCapture(path)
frames = []
frame_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Grayscale(num_output_channels=1),
transforms.ToTensor()
])
#This part converts the frames to tensors and then normalize them
for _ in range(int(capture.get(cv2.CAP_PROP_FRAME_COUNT))):
successful, frame = capture.read()
if successful:
frame = frame_transform(frame)
frames.append(frame.permute(1, 2, 0).numpy())
capture.release()
frames_stack = np.stack(frames)
mean = np.mean(frames_stack, axis = (0, 1, 2))
std = np.std(frames_stack, axis = (0, 1, 2))
normalized_frames = [(frame-mean)/std for frame in frames_stack]
#This part extracts the mouth from the normalized frames
mouth_coordinates = [detect_mouth(frame) for frame in normalized_frames]
extracted_mouths = []
for coord, frame in zip(mouth_coordinates, normalized_frames):
if coord.size > 0:
mouth_torch = torch.from_numpy(box_and_crop(coord, frame))
mouth_torch = mouth_torch.permute(1, 2, 0) #change back to (H, W, C)
extracted_mouths.append(mouth_torch)
else:
continue
return extracted_mouths