Help when training a Segmentation model using the higj-level API

Mulac · November 23, 2020, 4:25pm

Describe the bug
In the 23_tutorial.vision.ipynb notebook, it fails to train the Segmentation model in the section titled: “Segmentation - Using the high-level API”

To Reproduce
Steps to reproduce the behavior:

Open 23_tutorial.vision.ipynb in colab
Run all

Expected behavior
The model to train successfully after calling learn.fine_tune()

Error
TypeError: no implementation found for ‘torch.nn.functional.cross_entropy’ on types that implement torch_function: [<class ‘fastai.torch_core.TensorImage’>, <class ‘fastai.torch_core.TensorMask’>]

click to view full stack trace

```

TypeError Traceback (most recent call last)

in ()
1 learn = unet_learner(dls, resnet34)
----> 2 learn.fine_tune(8)

17 frames

/usr/local/lib/python3.6/dist-packages/fastai/callback/schedule.py in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
155 “Fine tune with freeze for freeze_epochs then with unfreeze from epochs using discriminative LR”
156 self.freeze()
–> 157 self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
158 base_lr /= 2
159 self.unfreeze()

/usr/local/lib/python3.6/dist-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
110 scheds = {‘lr’: combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
111 ‘mom’: combined_cos(pct_start, *(self.moms if moms is None else moms))}
–> 112 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
113
114 # Cell

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
203 self.opt.set_hypers(lr=self.lr if lr is None else lr)
204 self.n_epoch = n_epoch
–> 205 self._with_events(self._do_fit, ‘fit’, CancelFitException, self._end_cleanup)
206
207 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_fit(self)
194 for epoch in range(self.n_epoch):
195 self.epoch=epoch
–> 196 self._with_events(self._do_epoch, ‘epoch’, CancelEpochException)
197
198 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch(self)
188
189 def _do_epoch(self):
–> 190 self._do_epoch_train()
191 self._do_epoch_validate()
192

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch_train(self)
180 def _do_epoch_train(self):
181 self.dl = self.dls.train
–> 182 self._with_events(self.all_batches, ‘train’, CancelTrainException)
183
184 def _do_epoch_validate(self, ds_idx=1, dl=None):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in all_batches(self)
158 def all_batches(self):
159 self.n_iter = len(self.dl)
–> 160 for o in enumerate(self.dl): self.one_batch(*o)
161
162 def _do_one_batch(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in one_batch(self, i, b)
176 self.iter = i
177 self._split(b)
–> 178 self._with_events(self._do_one_batch, ‘batch’, CancelBatchException)
179
180 def _do_epoch_train(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_one_batch(self)
163 self.pred = self.model(*self.xb)
164 self(‘after_pred’)
–> 165 if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)
166 self(‘after_loss’)
167 if not self.training or not len(self.yb): return

/usr/local/lib/python3.6/dist-packages/fastai/losses.py in call(self, inp, targ, **kwargs)
31 if targ.dtype in [torch.int8, torch.int16, torch.int32]: targ = targ.long()
32 if self.flatten: inp = inp.view(-1,inp.shape[-1]) if self.is_2d else inp.view(-1)
—> 33 return self.func.call(inp, targ.view(-1) if self.flatten else targ, **kwargs)
34
35 # Cell

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
–> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
960 def forward(self, input: Tensor, target: Tensor) -> Tensor:
961 return F.cross_entropy(input, target, weight=self.weight,
–> 962 ignore_index=self.ignore_index, reduction=self.reduction)
963
964

/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2463 cross_entropy, tens_ops, input, target, weight=weight,
2464 size_average=size_average, ignore_index=ignore_index, reduce=reduce,
-> 2465 reduction=reduction)
2466 if size_average is not None or reduce is not None:
2467 reduction = _Reduction.legacy_get_string(size_average, reduce)

/usr/local/lib/python3.6/dist-packages/torch/overrides.py in handle_torch_function(public_api, relevant_args, *args, **kwargs)
1069 raise TypeError("no implementation found for ‘{}’ on types that implement "
1070 ‘torch_function: {}’
-> 1071 .format(func_name, list(map(type, overloaded_args))))
1072
1073 def has_torch_function(relevant_args: Iterable[Any]) -> bool:

TypeError: no implementation found for ‘torch.nn.functional.cross_entropy’ on types that implement torch_function: [<class ‘fastai.torch_core.TensorImage’>, <class ‘fastai.torch_core.TensorMask’>]


</details>

muellerzr · November 23, 2020, 4:32pm

From what I can tell this is related to this issue:

(No solution yet)

muellerzr · November 24, 2020, 8:29pm

This has now been fixed in the latest pip release, thanks!

Mulac · November 24, 2020, 9:15pm

Awesome. Super quick guys!

eduguiu · November 26, 2020, 12:57pm

I use fastai 2.1.7 version, and still find an error
“TypeError: unsupported operand type(s) for *: ‘TensorImage’ and ‘TensorMask’”
when I add metrics=Dice() to the Camvid.tiny segmentation code. (see below)
Would you suggest opening a new issue on fastai’s repo?

click to view full stack trace

============= Code================
path = untar_data(URLs.CAMVID_TINY)
dls = SegmentationDataLoaders.from_label_func(
path, bs=8, fnames = get_image_files(path/“images”),
label_func = lambda o: path/‘labels’/f’{o.stem}_P{o.suffix}’, codes = np.loadtxt(path/‘codes.txt’, dtype=str))
learn = unet_learner(dls, resnet34, metrics=Dice())
learn.fine_tune(8)
============ Error ===================
TypeError Traceback (most recent call last)

in ()
7
8 learn = unet_learner(dls, resnet34, metrics=Dice())
----> 9 learn.fine_tune(8)

20 frames

/usr/local/lib/python3.6/dist-packages/fastai/callback/schedule.py in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
155 “Fine tune with freeze for freeze_epochs then with unfreeze from epochs using discriminative LR”
156 self.freeze()
–> 157 self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
158 base_lr /= 2
159 self.unfreeze()

/usr/local/lib/python3.6/dist-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
110 scheds = {‘lr’: combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
111 ‘mom’: combined_cos(pct_start, *(self.moms if moms is None else moms))}
–> 112 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
113
114 # Cell

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
203 self.opt.set_hypers(lr=self.lr if lr is None else lr)
204 self.n_epoch = n_epoch
–> 205 self._with_events(self._do_fit, ‘fit’, CancelFitException, self._end_cleanup)
206
207 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_fit(self)
194 for epoch in range(self.n_epoch):
195 self.epoch=epoch
–> 196 self._with_events(self._do_epoch, ‘epoch’, CancelEpochException)
197
198 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch(self)
189 def _do_epoch(self):
190 self._do_epoch_train()
–> 191 self._do_epoch_validate()
192
193 def _do_fit(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch_validate(self, ds_idx, dl)
185 if dl is None: dl = self.dls[ds_idx]
186 self.dl = dl
–> 187 with torch.no_grad(): self._with_events(self.all_batches, ‘validate’, CancelValidException)
188
189 def _do_epoch(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in all_batches(self)
158 def all_batches(self):
159 self.n_iter = len(self.dl)
–> 160 for o in enumerate(self.dl): self.one_batch(*o)
161
162 def _do_one_batch(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in one_batch(self, i, b)
176 self.iter = i
177 self._split(b)
–> 178 self._with_events(self._do_one_batch, ‘batch’, CancelBatchException)
179
180 def _do_epoch_train(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel_{event_type}’)
–> 156 finally: self(f’after_{event_type}’) ;final()
157
158 def all_batches(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in call(self, event_name)
130 def ordered_cbs(self, event): return [cb for cb in sort_by_run(self.cbs) if hasattr(cb, event)]
131
–> 132 def call(self, event_name): L(event_name).map(self._call_one)
133
134 def _call_one(self, event_name):

/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in map(self, f, gen, *args, **kwargs)
177 def range(cls, a, b=None, step=None): return cls(range_of(a, b=b, step=step))
178
–> 179 def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
180 def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
181 def filter(self, f=noop, negate=False, gen=False, **kwargs):

/usr/local/lib/python3.6/dist-packages/fastcore/basics.py in map_ex(iterable, f, gen, *args, **kwargs)
604 res = map(g, iterable)
605 if gen: return res
–> 606 return list(res)
607
608 # Cell

/usr/local/lib/python3.6/dist-packages/fastcore/basics.py in call(self, *args, **kwargs)
594 if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
595 fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
–> 596 return self.func(*fargs, **kwargs)
597
598 # Cell

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _call_one(self, event_name)
134 def _call_one(self, event_name):
135 assert hasattr(event, event_name), event_name
–> 136 [cb(event_name) for cb in sort_by_run(self.cbs)]
137
138 def _bn_bias_state(self, with_bias): return norm_bias_params(self.model, with_bias).map(self.opt.state)

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in (.0)
134 def _call_one(self, event_name):
135 assert hasattr(event, event_name), event_name
–> 136 [cb(event_name) for cb in sort_by_run(self.cbs)]
137
138 def _bn_bias_state(self, with_bias): return norm_bias_params(self.model, with_bias).map(self.opt.state)

/usr/local/lib/python3.6/dist-packages/fastai/callback/core.py in call(self, event_name)
42 (self.run_valid and not getattr(self, ‘training’, False)))
43 res = None
—> 44 if self.run and _run: res = getattr(self, event_name, noop)()
45 if event_name==‘after_fit’: self.run=True #Reset self.run to True at each end of fit
46 return res

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in after_batch(self)
455 if len(self.yb) == 0: return
456 mets = self._train_mets if self.training else self._valid_mets
–> 457 for met in mets: met.accumulate(self.learn)
458 if not self.training: return
459 self.lrs.append(self.opt.hypers[-1][‘lr’])

/usr/local/lib/python3.6/dist-packages/fastai/metrics.py in accumulate(self, learn)
346 def accumulate(self, learn):
347 pred,targ = flatten_check(learn.pred.argmax(dim=self.axis), learn.y)
–> 348 pred,targ = map(TensorBase, (pred, targ))
349 self.inter += (pred*targ).float().sum().item()
350 self.union += (pred+targ).float().sum().item()

TypeError: unsupported operand type(s) for *: ‘TensorImage’ and ‘TensorMask’

muellerzr · November 26, 2020, 1:50pm

Please!

eduguiu · November 27, 2020, 9:24am

Done, issue #3041.
Thank you, Zachary.

johnnv · December 6, 2020, 12:53pm

I have the same problem here @eduguiu, with a custom dataset, but it occurs during the fit process. But I believe it is not necessary to report on the issue… Do you know if the implementation of the solution would be on the same level?

eduguiu · December 7, 2020, 9:29am

Hi Joao, my problem arised also while training, at the very end of first epoch.
Userr2232 came up with a workaround. See issue #3041

a workaround is available at

which shows how to pin down to PyTorch 1.6 + FastAI 2.0.19.

Hope this helps.

Eduard