I’ve trained a model using mixed precision and saved the learner learn.save("model")
.
Later, when I load the learner to continue training learn.load("model")
, I got the following errror:
RuntimeError: grid_sampler(): expected input and grid to have same dtype, but input has float and grid has c10::Half
I’ve rebuilt the learner before loading as it was before training.
Any idea what am I missing here?
the full traceback:
RuntimeError Traceback (most recent call last)
<ipython-input-36-354de1b5d9f4> in <module>()
----> 1 learn.fit_flat_cos(50, 1e-3, wd=1e-2, pct_start=0.1,div_final=10)
27 frames
/usr/local/lib/python3.7/dist-packages/fastai/callback/schedule.py in fit_flat_cos(self, n_epoch, lr, div_final, pct_start, wd, cbs, reset_opt)
133 lr = np.array([h['lr'] for h in self.opt.hypers])
134 scheds = {'lr': combined_cos(pct_start, lr, lr, lr/div_final)}
--> 135 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
136
137 # Cell
/usr/local/lib/python3.7/dist-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
210 self.opt.set_hypers(lr=self.lr if lr is None else lr)
211 self.n_epoch = n_epoch
--> 212 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
213
214 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _do_fit(self)
201 for epoch in range(self.n_epoch):
202 self.epoch=epoch
--> 203 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
204
205 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _do_epoch(self)
196 def _do_epoch(self):
197 self._do_epoch_train()
--> 198 self._do_epoch_validate()
199
200 def _do_fit(self):
/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _do_epoch_validate(self, ds_idx, dl)
192 if dl is None: dl = self.dls[ds_idx]
193 self.dl = dl
--> 194 with torch.no_grad(): self._with_events(self.all_batches, 'validate', CancelValidException)
195
196 def _do_epoch(self):
/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
/usr/local/lib/python3.7/dist-packages/fastai/learner.py in all_batches(self)
164 def all_batches(self):
165 self.n_iter = len(self.dl)
--> 166 for o in enumerate(self.dl): self.one_batch(*o)
167
168 def _do_one_batch(self):
/usr/local/lib/python3.7/dist-packages/fastai/data/load.py in __iter__(self)
111 if self.device is not None and multiprocessing.get_start_method().lower() == "fork":
112 b = to_device(b, self.device)
--> 113 yield self.after_batch(b)
114 self.after_iter()
115 if hasattr(self, 'it'): del(self.it)
/usr/local/lib/python3.7/dist-packages/fastcore/transform.py in __call__(self, o)
196 self.fs.append(t)
197
--> 198 def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
199 def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
200 def __getitem__(self,i): return self.fs[i]
/usr/local/lib/python3.7/dist-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
148 for f in tfms:
149 if not is_enc: f = f.decode
--> 150 x = f(x, **kwargs)
151 return x
152
/usr/local/lib/python3.7/dist-packages/fastai/vision/augment.py in __call__(self, b, split_idx, **kwargs)
33 def __call__(self, b, split_idx=None, **kwargs):
34 self.before_call(b, split_idx=split_idx)
---> 35 return super().__call__(b, split_idx=split_idx, **kwargs) if self.do else b
36
37 # Cell
/usr/local/lib/python3.7/dist-packages/fastcore/transform.py in __call__(self, x, **kwargs)
71 @property
72 def name(self): return getattr(self, '_name', _get_name(self))
---> 73 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
74 def decode (self, x, **kwargs): return self._call('decodes', x, **kwargs)
75 def __repr__(self): return f'{self.name}:\nencodes: {self.encodes}decodes: {self.decodes}'
/usr/local/lib/python3.7/dist-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
81 def _call(self, fn, x, split_idx=None, **kwargs):
82 if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 83 return self._do_call(getattr(self, fn), x, **kwargs)
84
85 def _do_call(self, f, x, **kwargs):
/usr/local/lib/python3.7/dist-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
88 ret = f.returns(x) if hasattr(f,'returns') else None
89 return retain_type(f(x, **kwargs), x, ret)
---> 90 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
91 return retain_type(res, x)
92
/usr/local/lib/python3.7/dist-packages/fastcore/transform.py in <genexpr>(.0)
88 ret = f.returns(x) if hasattr(f,'returns') else None
89 return retain_type(f(x, **kwargs), x, ret)
---> 90 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
91 return retain_type(res, x)
92
/usr/local/lib/python3.7/dist-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
87 if f is None: return x
88 ret = f.returns(x) if hasattr(f,'returns') else None
---> 89 return retain_type(f(x, **kwargs), x, ret)
90 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
91 return retain_type(res, x)
/usr/local/lib/python3.7/dist-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
116 elif self.inst is not None: f = MethodType(f, self.inst)
117 elif self.owner is not None: f = MethodType(f, self.owner)
--> 118 return f(*args, **kwargs)
119
120 def __get__(self, inst, owner):
/usr/local/lib/python3.7/dist-packages/fastai/vision/augment.py in encodes(self, x)
397 return x.affine_coord(self.mat, coord_func, sz=self.size, mode=mode, pad_mode=self.pad_mode, align_corners=self.align_corners)
398
--> 399 def encodes(self, x:TensorImage): return self._encode(x, self.mode)
400 def encodes(self, x:TensorMask): return self._encode(x, self.mode_mask)
401 def encodes(self, x:(TensorPoint, TensorBBox)): return self._encode(x, self.mode, reverse=True)
/usr/local/lib/python3.7/dist-packages/fastai/vision/augment.py in _encode(self, x, mode, reverse)
395 def _encode(self, x, mode, reverse=False):
396 coord_func = None if len(self.coord_fs)==0 or self.split_idx else partial(compose_tfms, tfms=self.coord_fs, reverse=reverse)
--> 397 return x.affine_coord(self.mat, coord_func, sz=self.size, mode=mode, pad_mode=self.pad_mode, align_corners=self.align_corners)
398
399 def encodes(self, x:TensorImage): return self._encode(x, self.mode)
/usr/local/lib/python3.7/dist-packages/fastai/vision/augment.py in affine_coord(x, mat, coord_tfm, sz, mode, pad_mode, align_corners)
319 coords = affine_grid(mat, x.shape[:2] + size, align_corners=align_corners)
320 if coord_tfm is not None: coords = coord_tfm(coords)
--> 321 return TensorImage(_grid_sample(x, coords, mode=mode, padding_mode=pad_mode, align_corners=align_corners))
322
323 @patch
/usr/local/lib/python3.7/dist-packages/fastai/vision/augment.py in _grid_sample(x, coords, mode, padding_mode, align_corners)
304 else:
305 x = F.interpolate(x, scale_factor=1/d, mode='area')
--> 306 return F.grid_sample(x, coords, mode=mode, padding_mode=padding_mode, align_corners=align_corners)
307
308 # Cell
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in grid_sample(input, grid, mode, padding_mode, align_corners)
3361 return handle_torch_function(
3362 grid_sample, tens_ops, input, grid, mode=mode, padding_mode=padding_mode,
-> 3363 align_corners=align_corners)
3364 if mode != 'bilinear' and mode != 'nearest':
3365 raise ValueError("nn.functional.grid_sample(): expected mode to be "
/usr/local/lib/python3.7/dist-packages/torch/overrides.py in handle_torch_function(public_api, relevant_args, *args, **kwargs)
1058 # Use `public_api` instead of `implementation` so __torch_function__
1059 # implementations can do equality/identity comparisons.
-> 1060 result = overloaded_arg.__torch_function__(public_api, types, args, kwargs)
1061
1062 if result is not NotImplemented:
/usr/local/lib/python3.7/dist-packages/fastai/torch_core.py in __torch_function__(self, func, types, args, kwargs)
327 convert=False
328 if _torch_handled(args, self._opt, func): convert,types = type(self),(torch.Tensor,)
--> 329 res = super().__torch_function__(func, types, args=args, kwargs=kwargs)
330 if convert: res = convert(res)
331 if isinstance(res, TensorBase): res.set_meta(self, as_copy=True)
/usr/local/lib/python3.7/dist-packages/torch/tensor.py in __torch_function__(cls, func, types, args, kwargs)
993
994 with _C.DisableTorchFunction():
--> 995 ret = func(*args, **kwargs)
996 return _convert(ret, cls)
997
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in grid_sample(input, grid, mode, padding_mode, align_corners)
3389 align_corners = False
3390
-> 3391 return torch.grid_sampler(input, grid, mode_enum, padding_mode_enum, align_corners)
3392
3393
RuntimeError: grid_sampler(): expected input and grid to have same dtype, but input has float and grid has c10::Half