Thanks for the help Malcolm, sorry I should have specified, it was indeed this line that was throwing the error
learn = unet_learner(dl, resnet18, n_in=4, n_out=2, pretrained=False, loss_func=loss_fn, metrics=acc_metric)
But it only happens when setting
pretrained=True
Running the below code
dl.one_batch()[0].shape
returns
torch.Size([12, 4, 384, 384])
Which looks reasonable I think, batch size = 12, bands = 4 and image size = 384x384
While poking around I have also found a couple other problems,
Even running the tutorial in its default state appears to be somewhat problematic,
When running
learn.lr_find()
I’m getting the error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-12-d81c6bd29d71> in <module>
----> 1 learn.lr_find()
~/fastai/fastai/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
220 n_epoch = num_it//len(self.dls.train) + 1
221 cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 222 with self.no_logging(): self.fit(n_epoch, cbs=cb)
223 if show_plot: self.recorder.plot_lr_find()
224 if suggestions:
~/fastai/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
209 self.opt.set_hypers(lr=self.lr if lr is None else lr)
210 self.n_epoch = n_epoch
--> 211 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
212
213 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
~/fastai/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/fastai/fastai/learner.py in _do_fit(self)
200 for epoch in range(self.n_epoch):
201 self.epoch=epoch
--> 202 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
203
204 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
~/fastai/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/fastai/fastai/learner.py in _do_epoch(self)
194
195 def _do_epoch(self):
--> 196 self._do_epoch_train()
197 self._do_epoch_validate()
198
~/fastai/fastai/learner.py in _do_epoch_train(self)
186 def _do_epoch_train(self):
187 self.dl = self.dls.train
--> 188 self._with_events(self.all_batches, 'train', CancelTrainException)
189
190 def _do_epoch_validate(self, ds_idx=1, dl=None):
~/fastai/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/fastai/fastai/learner.py in all_batches(self)
164 def all_batches(self):
165 self.n_iter = len(self.dl)
--> 166 for o in enumerate(self.dl): self.one_batch(*o)
167
168 def _do_one_batch(self):
~/fastai/fastai/learner.py in one_batch(self, i, b)
182 self.iter = i
183 self._split(b)
--> 184 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
185
186 def _do_epoch_train(self):
~/fastai/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/fastai/fastai/learner.py in _do_one_batch(self)
170 self('after_pred')
171 if len(self.yb):
--> 172 self.loss_grad = self.loss_func(self.pred, *self.yb)
173 self.loss = self.loss_grad.clone()
174 self('after_loss')
<ipython-input-11-1da375f2a6ef> in loss_fn(pred, targ)
5 def loss_fn(pred, targ):
6 targ[targ==255] = 1
----> 7 return torch.nn.functional.cross_entropy(pred, targ.squeeze(1).type(torch.long))
8
9 learn = unet_learner(dl, resnet18, n_in=4, n_out=2, pretrained=False, loss_func=loss_fn, metrics=acc_metric)
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2460 tens_ops = (input, target)
2461 if any([type(t) is not Tensor for t in tens_ops]) and has_torch_function(tens_ops):
-> 2462 return handle_torch_function(
2463 cross_entropy, tens_ops, input, target, weight=weight,
2464 size_average=size_average, ignore_index=ignore_index, reduce=reduce,
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/torch/overrides.py in handle_torch_function(public_api, relevant_args, *args, **kwargs)
1064
1065 func_name = '{}.{}'.format(public_api.__module__, public_api.__name__)
-> 1066 raise TypeError("no implementation found for '{}' on types that implement "
1067 '__torch_function__: {}'
1068 .format(func_name, list(map(type, overloaded_args))))
TypeError: no implementation found for 'torch.nn.functional.cross_entropy' on types that implement __torch_function__: [<class 'fastai.torch_core.TensorImage'>, <class 'fastai.torch_core.TensorMask'>]
I’m getting this same error on my local machine and on Kaggle, considering that the code apparently worked with Fastai 2.0.13 and torch 1.6.0+cu101 (taken from the Kaggle print statements) I’m guessing something in torch or Fastai has changed. When searching around for the error above I found this thread, which appears to be the same issue, however apparently this has been fixed in the latest release which I have just installed, but I’m still getting the same error.
I have also tried modifying the script to use 3 bands, just to rule out some possible issues. I commented out the line below which appears to result in a dataloader with 3 bands.
map_filename(red_filename, str1='red', str2='nir'),
I also changed
n_in=4
to
n_in=3
However this returns the same error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-13-d81c6bd29d71> in <module>
----> 1 learn.lr_find()
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
220 n_epoch = num_it//len(self.dls.train) + 1
221 cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 222 with self.no_logging(): self.fit(n_epoch, cbs=cb)
223 if show_plot: self.recorder.plot_lr_find()
224 if suggestions:
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
209 self.opt.set_hypers(lr=self.lr if lr is None else lr)
210 self.n_epoch = n_epoch
--> 211 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
212
213 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
200 for epoch in range(self.n_epoch):
201 self.epoch=epoch
--> 202 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
203
204 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
194
195 def _do_epoch(self):
--> 196 self._do_epoch_train()
197 self._do_epoch_validate()
198
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
186 def _do_epoch_train(self):
187 self.dl = self.dls.train
--> 188 self._with_events(self.all_batches, 'train', CancelTrainException)
189
190 def _do_epoch_validate(self, ds_idx=1, dl=None):
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
164 def all_batches(self):
165 self.n_iter = len(self.dl)
--> 166 for o in enumerate(self.dl): self.one_batch(*o)
167
168 def _do_one_batch(self):
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
182 self.iter = i
183 self._split(b)
--> 184 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
185
186 def _do_epoch_train(self):
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
170 self('after_pred')
171 if len(self.yb):
--> 172 self.loss_grad = self.loss_func(self.pred, *self.yb)
173 self.loss = self.loss_grad.clone()
174 self('after_loss')
<ipython-input-12-9b2c6d2023e8> in loss_fn(pred, targ)
5 def loss_fn(pred, targ):
6 targ[targ==255] = 1
----> 7 return torch.nn.functional.cross_entropy(pred, targ.squeeze(1).type(torch.long))
8
9 learn = unet_learner(dl, resnet18, n_in=3, n_out=2, pretrained=True, loss_func=loss_fn, metrics=acc_metric).to_fp16()
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2460 tens_ops = (input, target)
2461 if any([type(t) is not Tensor for t in tens_ops]) and has_torch_function(tens_ops):
-> 2462 return handle_torch_function(
2463 cross_entropy, tens_ops, input, target, weight=weight,
2464 size_average=size_average, ignore_index=ignore_index, reduce=reduce,
~/anaconda3/envs/fastaf/lib/python3.8/site-packages/torch/overrides.py in handle_torch_function(public_api, relevant_args, *args, **kwargs)
1064
1065 func_name = '{}.{}'.format(public_api.__module__, public_api.__name__)
-> 1066 raise TypeError("no implementation found for '{}' on types that implement "
1067 '__torch_function__: {}'
1068 .format(func_name, list(map(type, overloaded_args))))
TypeError: no implementation found for 'torch.nn.functional.cross_entropy' on types that implement __torch_function__: [<class 'fastai.torch_core.TensorImage'>, <class 'fastai.torch_core.TensorMask'>]
From my limited understanding I think this is pointing to the custom loss function as the issue, but I don’t understand it well enough to make any progress with it.
Also just linking in @cordmaur as this is his tutorial, so figured he may like to know whats going on.