FastAi part 1 : Final Project problems (FIXED)

EDIT: While I wasn’t able to fix the problem, I simply circumvented. Since from my research the version of pytorch I was using and torchvision should be compatible, I wasn’t too sure what exactly wasn’t working. But since it wasn’t I just searched for an alternative solution, doing so I found out cv2 already has a RGB to grayscale method and that there was no need to use pytorch for that. So I did. My loader now seems to work.

Howdy,

I am having an issue with the following code. I am trying to implement a project to test my understanding of the first part of the Deep Learning for coders course, and I figured I’d do something with pytorch.

I tried solving an exception for a bit now, but I can’t seem to find what’s not working.
For my load_video method, I first transform the video into frames using cv2, and then do the transformations (to_grayscale) using torchvision.transforms.

but, I am having this weird issue where the import itself fails.
I am using torch 2.1.2 with torchvision 0.16.2.

import torchvision.transforms as transforms

is giving me the following error

---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
Cell In[26], line 1
----> 1 import torchvision.transforms as transforms

File /opt/conda/lib/python3.10/site-packages/torchvision/__init__.py:6
      3 from modulefinder import Module
      5 import torch
----> 6 from torchvision import _meta_registrations, datasets, io, models, ops, transforms, utils
      8 from .extension import _HAS_OPS
     10 try:

File /opt/conda/lib/python3.10/site-packages/torchvision/models/__init__.py:2
      1 from .alexnet import *
----> 2 from .convnext import *
      3 from .densenet import *
      4 from .efficientnet import *

File /opt/conda/lib/python3.10/site-packages/torchvision/models/convnext.py:9
      6 from torch.nn import functional as F
      8 from ..ops.misc import Conv2dNormActivation, Permute
----> 9 from ..ops.stochastic_depth import StochasticDepth
     10 from ..transforms._presets import ImageClassification
     11 from ..utils import _log_api_usage_once

File /opt/conda/lib/python3.10/site-packages/torchvision/ops/__init__.py:23
     21 from .giou_loss import generalized_box_iou_loss
     22 from .misc import Conv2dNormActivation, Conv3dNormActivation, FrozenBatchNorm2d, MLP, Permute, SqueezeExcitation
---> 23 from .poolers import MultiScaleRoIAlign
     24 from .ps_roi_align import ps_roi_align, PSRoIAlign
     25 from .ps_roi_pool import ps_roi_pool, PSRoIPool

File /opt/conda/lib/python3.10/site-packages/torchvision/ops/poolers.py:10
      7 from torchvision.ops.boxes import box_area
      9 from ..utils import _log_api_usage_once
---> 10 from .roi_align import roi_align
     13 # copying result_idx_in_level to a specific index in result[]
     14 # is not supported by ONNX tracing yet.
     15 # _onnx_merge_levels() is an implementation supported by ONNX
     16 # that merges the levels to the right indices
     17 @torch.jit.unused
     18 def _onnx_merge_levels(levels: Tensor, unmerged_results: List[Tensor]) -> Tensor:

File /opt/conda/lib/python3.10/site-packages/torchvision/ops/roi_align.py:4
      1 from typing import List, Union
      3 import torch
----> 4 import torch._dynamo
      5 import torch.fx
      6 from torch import nn, Tensor

File /opt/conda/lib/python3.10/site-packages/torch/_dynamo/__init__.py:2
      1 import torch
----> 2 from . import allowed_functions, convert_frame, eval_frame, resume_execution
      3 from .backends.registry import list_backends, register_backend
      4 from .convert_frame import replay

File /opt/conda/lib/python3.10/site-packages/torch/_dynamo/allowed_functions.py:23
     19     np = None
     22 import torch
---> 23 import torch._functorch.deprecated as deprecated_func
     24 from torch.fx._symbolic_trace import is_fx_tracing
     26 from . import config

File /opt/conda/lib/python3.10/site-packages/torch/_functorch/deprecated.py:1
----> 1 import torch._functorch.apis as apis
      2 import torch._functorch.eager_transforms as _impl
      3 import torch._functorch.make_functional as _nn_impl

File /opt/conda/lib/python3.10/site-packages/torch/_functorch/apis.py:6
      1 # NOTE: We allow Dynamo to see this file (via torch/_dynamo/skipfiles.py) so that it can
      2 #       trace through functorch transforms.
      3 #       Currently, we can't allow Dynamo to see `eager_transforms.py`/`vmap.py` as that break a lot of thing
      4 #       and there isn't a mechanism to selectively expose only some functions (eg. grad) from a file
      5 #       to Dynamo.
----> 6 from torch._functorch.vmap import (vmap_impl, _check_randomness_arg,
      7                                    Callable, in_dims_t, out_dims_t, _check_out_dims_is_int_or_int_pytree,
      8                                    _process_batched_inputs, _chunked_vmap)
      9 from torch._functorch.utils import exposed_in, argnums_t
     10 import functools

ImportError: cannot import name 'vmap_impl' from 'torch._functorch.vmap' (/opt/conda/lib/python3.10/site-packages/torch/_functorch/vmap.py)

My code for reference:

def load_video(path:str) -> List[float]:
    capture = cv2.VideoCapture(path)
    frames = []
    frame_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor()
    ])
    
    #This part converts the frames to tensors and then normalize them
    for _ in range(int(capture.get(cv2.CAP_PROP_FRAME_COUNT))):
        successful, frame = capture.read()
        if successful:
            frame = frame_transform(frame)
            frames.append(frame.permute(1, 2, 0).numpy())
    capture.release() 
    
    frames_stack = np.stack(frames)
    mean = np.mean(frames_stack, axis = (0, 1, 2))
    std = np.std(frames_stack, axis = (0, 1, 2))
    normalized_frames = [(frame-mean)/std for frame in frames_stack]
    
    #This part extracts the mouth from the normalized frames
    mouth_coordinates = [detect_mouth(frame) for frame in normalized_frames]
    extracted_mouths = []
    for coord, frame in zip(mouth_coordinates, normalized_frames):
        if coord.size > 0:
            mouth_torch = torch.from_numpy(box_and_crop(coord, frame))
            mouth_torch = mouth_torch.permute(1, 2, 0) #change back to (H, W, C)
            extracted_mouths.append(mouth_torch)                                    
        else:
            continue
        
            
    return extracted_mouths