Hey guys, I am getting this error after running lesson 3 notebook.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/opt/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
100 xb, yb = cb_handler.on_batch_begin(xb, yb)
--> 101 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
102 if cb_handler.on_batch_end(loss): break
/opt/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
25 if not is_listy(yb): yb = [yb]
---> 26 out = model(*xb)
27 out = cb_handler.on_loss_begin(out)
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
/opt/anaconda3/lib/python3.7/site-packages/torchvision/models/resnet.py in forward(self, x)
90 if self.downsample is not None:
---> 91 identity = self.downsample(x)
92
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/batchnorm.py in forward(self, input)
82 self.training or not self.track_running_stats,
---> 83 exponential_average_factor, self.eps)
84
/opt/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
1696 input, weight, bias, running_mean, running_var,
-> 1697 training, momentum, eps, torch.backends.cudnn.enabled
1698 )
RuntimeError: CUDA out of memory. Tried to allocate 32.00 MiB (GPU 0; 7.43 GiB total capacity; 6.89 GiB already allocated; 10.94 MiB free; 48.76 MiB cached)
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
<ipython-input-18-c7a9c29f9dd1> in <module>
----> 1 learn.lr_find()
2 learn.recorder.plot()
/opt/anaconda3/lib/python3.7/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, wd)
30 cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
31 epochs = int(np.ceil(num_it/len(learn.data.train_dl)))
---> 32 learn.fit(epochs, start_lr, callbacks=[cb], wd=wd)
33
34 def to_fp16(learn:Learner, loss_scale:float=None, max_noskip:int=1000, dynamic:bool=True, clip:float=None,
/opt/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
198 callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
199 if defaults.extra_callbacks is not None: callbacks += defaults.extra_callbacks
--> 200 fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
201
202 def create_opt(self, lr:Floats, wd:Floats=0.)->None:
/opt/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
110 exception = e
111 raise
--> 112 finally: cb_handler.on_train_end(exception)
113
114 loss_func_name2activ = {'cross_entropy_loss': F.softmax, 'nll_loss': torch.exp, 'poisson_nll_loss': torch.exp,
/opt/anaconda3/lib/python3.7/site-packages/fastai/callback.py in on_train_end(self, exception)
321 def on_train_end(self, exception:Union[bool,Exception])->None:
322 "Handle end of training, `exception` is an `Exception` or False if no exceptions during training."
--> 323 self('train_end', exception=exception)
324
325 @property
/opt/anaconda3/lib/python3.7/site-packages/fastai/callback.py in __call__(self, cb_name, call_mets, **kwargs)
249 if call_mets:
250 for met in self.metrics: self._call_and_update(met, cb_name, **kwargs)
--> 251 for cb in self.callbacks: self._call_and_update(cb, cb_name, **kwargs)
252
253 def set_dl(self, dl:DataLoader):
/opt/anaconda3/lib/python3.7/site-packages/fastai/callback.py in _call_and_update(self, cb, cb_name, **kwargs)
239 def _call_and_update(self, cb, cb_name, **kwargs)->None:
240 "Call `cb_name` on `cb` and update the inner state."
--> 241 new = ifnone(getattr(cb, f'on_{cb_name}')(**self.state_dict, **kwargs), dict())
242 for k,v in new.items():
243 if k not in self.state_dict:
/opt/anaconda3/lib/python3.7/site-packages/fastai/callbacks/lr_finder.py in on_train_end(self, **kwargs)
33 def on_train_end(self, **kwargs:Any)->None:
34 "Cleanup learn model weights disturbed during LRFinder exploration."
---> 35 self.learn.load('tmp', purge=False)
36 if hasattr(self.learn.model, 'reset'): self.learn.model.reset()
37 for cb in self.callbacks:
/opt/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in load(self, file, device, strict, with_opt, purge, remove_module)
265 elif isinstance(device, int): device = torch.device('cuda', device)
266 source = self.path/self.model_dir/f'{file}.pth' if is_pathlike(file) else file
--> 267 state = torch.load(source, map_location=device)
268 if set(state.keys()) == {'model', 'opt'}:
269 model_state = state['model']
/opt/anaconda3/lib/python3.7/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
385 f = f.open('rb')
386 try:
--> 387 return _load(f, map_location, pickle_module, **pickle_load_args)
388 finally:
389 if new_fd:
/opt/anaconda3/lib/python3.7/site-packages/torch/serialization.py in _load(f, map_location, pickle_module, **pickle_load_args)
572 unpickler = pickle_module.Unpickler(f, **pickle_load_args)
573 unpickler.persistent_load = persistent_load
--> 574 result = unpickler.load()
575
576 deserialized_storage_keys = pickle_module.load(f, **pickle_load_args)
/opt/anaconda3/lib/python3.7/site-packages/torch/serialization.py in persistent_load(saved_id)
535 obj = data_type(size)
536 obj._torch_load_uninitialized = True
--> 537 deserialized_objects[root_key] = restore_location(obj, location)
538 storage = deserialized_objects[root_key]
539 if view_metadata is not None:
/opt/anaconda3/lib/python3.7/site-packages/torch/serialization.py in restore_location(storage, location)
405 elif isinstance(map_location, torch.device):
406 def restore_location(storage, location):
--> 407 return default_restore_location(storage, str(map_location))
408 else:
409 def restore_location(storage, location):
/opt/anaconda3/lib/python3.7/site-packages/torch/serialization.py in default_restore_location(storage, location)
117 def default_restore_location(storage, location):
118 for _, _, fn in _package_registry:
--> 119 result = fn(storage, location)
120 if result is not None:
121 return result
/opt/anaconda3/lib/python3.7/site-packages/torch/serialization.py in _cuda_deserialize(obj, location)
97 storage_type = getattr(torch.cuda, type(obj).__name__)
98 with torch.cuda.device(device):
---> 99 return storage_type(obj.size())
100 else:
101 return obj.cuda(device)
RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 7.43 GiB total capacity; 6.91 GiB already allocated; 2.94 MiB free; 39.62 MiB cached)
I ran the notebook as is without making any modifications to it and I am using the normal GPU configurations, so I dont really know what could possibly be going wrong. This occured after making a new databunch with the resized images, when I tried to run lr_find().
EDIT: I have also tried creating arbitrary exceptions like 1/0 in order to free the GPU VRAM. Needless to say, that didnt work either.