Recursion error fine-tuning a segmentation learner on Windows 10

Hi everyone,

I’ve searched around and couldn’t find an answer to this specific problem but apologies if it is already answered somewhere.

I’m having trouble getting a unet_learner to fine-tune on Windows 10.

When I try to run it in a Jupyter notebook, the cell with the fine-tuning step hangs and then restarts the kernel. When I run the code in a script, I get a recursion error at the fine-tuning step.

I get the same behaviour when I try to run the image segmentation example in Chapter 1 of fastbook. Here is the code I’m running:

from fastai.vision.all import *

def main():
    dls = SegmentationDataLoaders.from_label_func(
        path,
        bs=BATCH_SIZE,
        fnames=get_image_files(path/"training_images"),
        label_func=lambda o: path/f"training_masks/{o.stem}_mask{o.suffix}",
        codes=codes,
        item_tfms=[Resize(IMAGE_SIZE, pad_mode=PadMode.Border)],
        batch_tfms=[IntToFloatTensor(div_mask=255)],
        valid_pct=0.2,
        num_workers=0
    )
   
    learn = unet_learner(dls, resnet34)

    learn.fine_tune(8)


if __name__ == "__main__":
    main()

The trace that I get is:

Traceback (most recent call last):
  File "segmentation_example.py", line 48, in <module>
    main()
  File "segmentation_example.py", line 39, in main
    learn.fine_tune(8)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastcore\logargs.py", line 56, in _f
    return inst if to_return else f(*args, **kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\callback\schedule.py", line 161, in fine_tune
    self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastcore\logargs.py", line 56, in _f
    return inst if to_return else f(*args, **kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\callback\schedule.py", line 113, in fit_one_cycle
    self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastcore\logargs.py", line 56, in _f
    return inst if to_return else f(*args, **kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 207, in fit
    self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 197, in _do_fit
    self._with_events(self._do_epoch, 'epoch', CancelEpochException)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 191, in _do_epoch
    self._do_epoch_train()
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 183, in _do_epoch_train
    self._with_events(self.all_batches, 'train', CancelTrainException)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 161, in all_batches
    for o in enumerate(self.dl): self.one_batch(*o)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 179, in one_batch
    self._with_events(self._do_one_batch, 'batch', CancelBatchException)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 164, in _do_one_batch
    self.pred = self.model(*self.xb)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py", line 744, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\layers.py", line 
397, in forward
    nres = l(res)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py", line 744, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\container.py", line 117, in forward
    input = module(input)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py", line 744, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\conv.py", line 390, in forward
    return self._conv_forward(input, self.weight)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\conv.py", line 386, in _conv_forward
    return F.conv2d(input, weight, self.bias, self.stride,
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\torch_core.py", line 311, in __torch_function__
    if isinstance(ret, TensorBase): ret.set_meta(self, as_copy=True)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\torch_core.py", line 277, in set_meta
    self.__dict__ = deepcopy(x.__dict__) if as_copy else x.__dict__
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 146, in deepcopy       
    y = copier(x, memo)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 230, in _deepcopy_dict 
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 153, in deepcopy       
    y = copier(memo)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\tensor.py", line 45, in __deepcopy__
    return handle_torch_function(Tensor.__deepcopy__, relevant_args, self, memo)
 ...
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 146, in deepcopy       
    y = copier(x, memo)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 230, in _deepcopy_dict 
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 153, in deepcopy       
    y = copier(memo)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\tensor.py", line 45, in __deepcopy__
    return handle_torch_function(Tensor.__deepcopy__, relevant_args, self, memo)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\overrides.py", line 1066, in handle_torch_function
    result = overloaded_arg.__torch_function__(public_api, types, args, kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\torch_core.py", line 310, in __torch_function__
    ret = super().__torch_function__(func, types, args=args, kwargs=kwargs)
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\tensor.py", line 1020, in __torch_function__
    if not all(issubclass(cls, t) for t in types):
  File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\tensor.py", line 1020, in <genexpr>
    if not all(issubclass(cls, t) for t in types):
RecursionError: maximum recursion depth exceeded while calling a Python object

I’ve truncated the traceback in the middle, because it repeated a lot.

I’m using fastai=2.1.0 with cudatoolkit=11.0 and pytorch=1.8.0 on Windows 10. My GPU is an NVIDIA Quadro P2000.