Hi everyone,
I’ve searched around and couldn’t find an answer to this specific problem but apologies if it is already answered somewhere.
I’m having trouble getting a unet_learner
to fine-tune on Windows 10.
When I try to run it in a Jupyter notebook, the cell with the fine-tuning step hangs and then restarts the kernel. When I run the code in a script, I get a recursion error at the fine-tuning step.
I get the same behaviour when I try to run the image segmentation example in Chapter 1 of fastbook
. Here is the code I’m running:
from fastai.vision.all import *
def main():
dls = SegmentationDataLoaders.from_label_func(
path,
bs=BATCH_SIZE,
fnames=get_image_files(path/"training_images"),
label_func=lambda o: path/f"training_masks/{o.stem}_mask{o.suffix}",
codes=codes,
item_tfms=[Resize(IMAGE_SIZE, pad_mode=PadMode.Border)],
batch_tfms=[IntToFloatTensor(div_mask=255)],
valid_pct=0.2,
num_workers=0
)
learn = unet_learner(dls, resnet34)
learn.fine_tune(8)
if __name__ == "__main__":
main()
The trace that I get is:
Traceback (most recent call last):
File "segmentation_example.py", line 48, in <module>
main()
File "segmentation_example.py", line 39, in main
learn.fine_tune(8)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastcore\logargs.py", line 56, in _f
return inst if to_return else f(*args, **kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\callback\schedule.py", line 161, in fine_tune
self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastcore\logargs.py", line 56, in _f
return inst if to_return else f(*args, **kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\callback\schedule.py", line 113, in fit_one_cycle
self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastcore\logargs.py", line 56, in _f
return inst if to_return else f(*args, **kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 207, in fit
self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 155, in _with_events
try: self(f'before_{event_type}') ;f()
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 197, in _do_fit
self._with_events(self._do_epoch, 'epoch', CancelEpochException)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 155, in _with_events
try: self(f'before_{event_type}') ;f()
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 191, in _do_epoch
self._do_epoch_train()
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 183, in _do_epoch_train
self._with_events(self.all_batches, 'train', CancelTrainException)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 155, in _with_events
try: self(f'before_{event_type}') ;f()
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 161, in all_batches
for o in enumerate(self.dl): self.one_batch(*o)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 179, in one_batch
self._with_events(self._do_one_batch, 'batch', CancelBatchException)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 155, in _with_events
try: self(f'before_{event_type}') ;f()
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\learner.py", line 164, in _do_one_batch
self.pred = self.model(*self.xb)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py", line 744, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\layers.py", line
397, in forward
nres = l(res)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py", line 744, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\container.py", line 117, in forward
input = module(input)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\module.py", line 744, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\conv.py", line 390, in forward
return self._conv_forward(input, self.weight)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\nn\modules\conv.py", line 386, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\torch_core.py", line 311, in __torch_function__
if isinstance(ret, TensorBase): ret.set_meta(self, as_copy=True)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\torch_core.py", line 277, in set_meta
self.__dict__ = deepcopy(x.__dict__) if as_copy else x.__dict__
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 146, in deepcopy
y = copier(x, memo)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 153, in deepcopy
y = copier(memo)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\tensor.py", line 45, in __deepcopy__
return handle_torch_function(Tensor.__deepcopy__, relevant_args, self, memo)
...
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 146, in deepcopy
y = copier(x, memo)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 230, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\copy.py", line 153, in deepcopy
y = copier(memo)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\tensor.py", line 45, in __deepcopy__
return handle_torch_function(Tensor.__deepcopy__, relevant_args, self, memo)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\overrides.py", line 1066, in handle_torch_function
result = overloaded_arg.__torch_function__(public_api, types, args, kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\fastai\torch_core.py", line 310, in __torch_function__
ret = super().__torch_function__(func, types, args=args, kwargs=kwargs)
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\tensor.py", line 1020, in __torch_function__
if not all(issubclass(cls, t) for t in types):
File "C:\Users\bw42kg\AppData\Local\Continuum\anaconda3\envs\fastai\lib\site-packages\torch\tensor.py", line 1020, in <genexpr>
if not all(issubclass(cls, t) for t in types):
RecursionError: maximum recursion depth exceeded while calling a Python object
I’ve truncated the traceback in the middle, because it repeated a lot.
I’m using fastai=2.1.0
with cudatoolkit=11.0
and pytorch=1.8.0
on Windows 10. My GPU is an NVIDIA Quadro P2000.