Learn.lr_find() and learn.fit_one_cycle kill the kernel

Hello everyone,

I am trying to build myself a simple network that can count people trained with the mall dataset. The in put is a 640x480 photo and output is a same size density map.

My code is as following

def block(ni, nf, ks=3): return ConvLayer(ni, nf, ks, stride=1)
def get_model():
    return nn.Sequential(
    block(3, 32),
    nn.MaxPool2d(2),
    
    block(32, 64),
    nn.MaxPool2d(2),
        
    block(64, 128),
    nn.MaxPool2d(2),
        
    block(128, 512),
        
    nn.Upsample(scale_factor=2),
    block(512, 128),
        
    nn.Upsample(scale_factor=2),
    block(128, 64),
    
    nn.Upsample(scale_factor=2),
    block(64, 1),
    )

And this is my loss function:

def EuclidLoss(input, target):
    return (input-target).square().mean().item()

The summary shows all the information

but I could not train the model. I tried several times but the the error message always showed up.

The kernel appears to have died. It will restart automatically

The terminal windows showed nothing so I have no clue what happened.

I am running the notebook on my TUF laptop with an AMD chip and GTX 1650 GPU. Does some one know what is the reason and how to fix this? Many thank.

I googled it. Is it correct that you have a 4GB GPU? Then probably a batch size of 64 with the pretty big input size is making you run out of resources. Try to start with batch size 8 and see if you can increase it.

Hi Jack,

thank you for the reply. I changed the bs to 3, it did not fix the problem but I have more information now. The error was shown as follow:

AttributeError                            Traceback (most recent call last)
<ipython-input-28-d81c6bd29d71> in <module>
----> 1 learn.lr_find()

~/.local/lib/python3.8/site-packages/fastai/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
    220     n_epoch = num_it//len(self.dls.train) + 1
    221     cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 222     with self.no_logging(): self.fit(n_epoch, cbs=cb)
    223     if show_plot: self.recorder.plot_lr_find()
    224     if suggestions:

~/.local/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    209             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    210             self.n_epoch = n_epoch
--> 211             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    212 
    213     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/.local/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/.local/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
    200         for epoch in range(self.n_epoch):
    201             self.epoch=epoch
--> 202             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    203 
    204     def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

~/.local/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/.local/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
    194 
    195     def _do_epoch(self):
--> 196         self._do_epoch_train()
    197         self._do_epoch_validate()
    198 

~/.local/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
    186     def _do_epoch_train(self):
    187         self.dl = self.dls.train
--> 188         self._with_events(self.all_batches, 'train', CancelTrainException)
    189 
    190     def _do_epoch_validate(self, ds_idx=1, dl=None):

~/.local/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/.local/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
    164     def all_batches(self):
    165         self.n_iter = len(self.dl)
--> 166         for o in enumerate(self.dl): self.one_batch(*o)
    167 
    168     def _do_one_batch(self):

~/.local/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
    182         self.iter = i
    183         self._split(b)
--> 184         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    185 
    186     def _do_epoch_train(self):

~/.local/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

~/.local/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
    171         if len(self.yb):
    172             self.loss_grad = self.loss_func(self.pred, *self.yb)
--> 173             self.loss = self.loss_grad.clone()
    174         self('after_loss')
    175         if not self.training or not len(self.yb): return

AttributeError: 'float' object has no attribute 'clone'

You could also try a Resize transform before moving things to the GPU so that you might fit a larger batch with your memory constraints.