Hi, I am getting errors after installing cudnn when running lesson 1. I am having a hard time debugging this. The internet says it might have to do with a mismatch in matmult, but if that was the case I suspect it would be happening to more people. I have a 3070 Ti with 8gb of ram, and this lesson is dealing with small data, so I don’t think it’s memory.
RuntimeError Traceback (most recent call last)
Cell In[14], line 2
1 learn = vision_learner(dls, resnet18, metrics=error_rate)
----> 2 learn.fine_tune(3)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/callback/schedule.py:165, in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
163 "Fine tune with `Learner.freeze` for `freeze_epochs`, then with `Learner.unfreeze` for `epochs`, using discriminative LR."
164 self.freeze()
--> 165 self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
166 base_lr /= 2
167 self.unfreeze()
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/callback/schedule.py:119, in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt, start_epoch)
116 lr_max = np.array([h['lr'] for h in self.opt.hypers])
117 scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
118 'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 119 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd, start_epoch=start_epoch)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:256, in Learner.fit(self, n_epoch, lr, wd, cbs, reset_opt, start_epoch)
254 self.opt.set_hypers(lr=self.lr if lr is None else lr)
255 self.n_epoch = n_epoch
--> 256 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
192 def _with_events(self, f, event_type, ex, final=noop):
--> 193 try: self(f'before_{event_type}'); f()
194 except ex: self(f'after_cancel_{event_type}')
195 self(f'after_{event_type}'); final()
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:245, in Learner._do_fit(self)
243 for epoch in range(self.n_epoch):
244 self.epoch=epoch
--> 245 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
192 def _with_events(self, f, event_type, ex, final=noop):
--> 193 try: self(f'before_{event_type}'); f()
194 except ex: self(f'after_cancel_{event_type}')
195 self(f'after_{event_type}'); final()
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:239, in Learner._do_epoch(self)
238 def _do_epoch(self):
--> 239 self._do_epoch_train()
240 self._do_epoch_validate()
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:231, in Learner._do_epoch_train(self)
229 def _do_epoch_train(self):
230 self.dl = self.dls.train
--> 231 self._with_events(self.all_batches, 'train', CancelTrainException)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
192 def _with_events(self, f, event_type, ex, final=noop):
--> 193 try: self(f'before_{event_type}'); f()
194 except ex: self(f'after_cancel_{event_type}')
195 self(f'after_{event_type}'); final()
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:199, in Learner.all_batches(self)
197 def all_batches(self):
198 self.n_iter = len(self.dl)
--> 199 for o in enumerate(self.dl): self.one_batch(*o)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:227, in Learner.one_batch(self, i, b)
225 b = self._set_device(b)
226 self._split(b)
--> 227 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
192 def _with_events(self, f, event_type, ex, final=noop):
--> 193 try: self(f'before_{event_type}'); f()
194 except ex: self(f'after_cancel_{event_type}')
195 self(f'after_{event_type}'); final()
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/learner.py:205, in Learner._do_one_batch(self)
204 def _do_one_batch(self):
--> 205 self.pred = self.model(*self.xb)
206 self('after_pred')
207 if len(self.yb):
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
1186 # If we don't have any hooks, we want to skip the rest of the logic in
1187 # this function, and just call forward.
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/torch/nn/modules/container.py:204, in Sequential.forward(self, input)
202 def forward(self, input):
203 for module in self:
--> 204 input = module(input)
205 return input
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
1186 # If we don't have any hooks, we want to skip the rest of the logic in
1187 # this function, and just call forward.
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/torch/nn/modules/container.py:204, in Sequential.forward(self, input)
202 def forward(self, input):
203 for module in self:
--> 204 input = module(input)
205 return input
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
1186 # If we don't have any hooks, we want to skip the rest of the logic in
1187 # this function, and just call forward.
1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1189 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190 return forward_call(*input, **kwargs)
1191 # Do not call functions when jit is used
1192 full_backward_hooks, non_full_backward_hooks = [], []
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input)
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/fastai/torch_core.py:378, in TensorBase.__torch_function__(cls, func, types, args, kwargs)
376 if cls.debug and func.__name__ not in ('__str__','__repr__'): print(func, types, args, kwargs)
377 if _torch_handled(args, cls._opt, func): types = (torch.Tensor,)
--> 378 res = super().__torch_function__(func, types, args, ifnone(kwargs, {}))
379 dict_objs = _find_args(args) if args else _find_args(list(kwargs.values()))
380 if issubclass(type(res),TensorBase) and dict_objs: res.set_meta(dict_objs[0],as_copy=True)
File ~/miniconda3/envs/d2l/lib/python3.9/site-packages/torch/_tensor.py:1278, in Tensor.__torch_function__(cls, func, types, args, kwargs)
1275 return NotImplemented
1277 with _C.DisableTorchFunction():
-> 1278 ret = func(*args, **kwargs)
1279 if func in get_default_nowrap_functions():
1280 return ret
RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`