Learning rate keeps changing

I’d need a reproducer for this, and for Christ’s sake, as we have said a thousand times, please post the full stack trace and not just the last error!
Thank you :slight_smile:

2 Likes
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-10-d81c6bd29d71> in <module>
----> 1 learn.lr_find()

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
    222     if suggestions:
    223         lrs,losses = tensor(self.recorder.lrs[5:-5]),tensor(self.recorder.losses[5:-5])
--> 224         lr_min = lrs[losses.argmin()].item()
    225         grads = (losses[1:]-losses[:-1]) / (lrs[1:].log()-lrs[:-1].log())
    226         lr_steep = lrs[grads.argmin()].item()

RuntimeError: cannot perform reduction function argmin on a tensor with no elements because the operation does not have an identity
1 Like

I understand better now. You have less than 10 losses, I’m guessing because of the big jumps at the beginning, so there is nothing to get the lr_min from. Will add a test to remove that bug.

2 Likes

Thank you so much @sgugger, really appreciated your help.

not sure if this it is correlated and can help as well, but I was trying to play with Recorder.plot_lr_find() and I got this

You can’t call a method of a class unless it’s a special kind of method called classmethod. You need to call the method on an instance like learn.recorder.plot_lr_find().

1 Like

Awesome, yes this plot_lr_find is what I was looking for. Thank you so much.

I’ve created a learner on Kaggle as follows:

learn = cnn_learner(dls, resnet18, metrics=[error_rate, accuracy])

After this I’m trying to do learn.lr_find() and it gives me the following error

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    187             try:
--> 188                 self._do_begin_fit(n_epoch)
    189                 for epoch in range(n_epoch):

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in _do_begin_fit(self, n_epoch)
    159     def _do_begin_fit(self, n_epoch):
--> 160         self.n_epoch,self.loss = n_epoch,tensor(0.);         self('begin_fit')
    161 

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in __call__(self, event_name)
    123 
--> 124     def __call__(self, event_name): L(event_name).map(self._call_one)
    125     def _call_one(self, event_name):

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in map(self, f, *args, **kwargs)
    371              else f.__getitem__)
--> 372         return self._new(map(g, self))
    373 

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in _new(self, items, *args, **kwargs)
    322     def _xtra(self): return None
--> 323     def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
    324     def __getitem__(self, idx): return self._get(idx) if is_indexer(idx) else L(self._get(idx), use_list=None)

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     40 
---> 41         res = super().__call__(*((x,) + args), **kwargs)
     42         res._newchk = 0

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in __init__(self, items, use_list, match, *rest)
    313         if (use_list is not None) or not _is_array(items):
--> 314             items = list(items) if use_list else _listify(items)
    315         if match is not None:

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in _listify(o)
    249     if isinstance(o, str) or _is_array(o): return [o]
--> 250     if is_iter(o): return list(o)
    251     return [o]

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in __call__(self, *args, **kwargs)
    215         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 216         return self.fn(*fargs, **kwargs)
    217 

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in _call_one(self, event_name)
    126         assert hasattr(event, event_name)
--> 127         [cb(event_name) for cb in sort_by_run(self.cbs)]
    128 

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in <listcomp>(.0)
    126         assert hasattr(event, event_name)
--> 127         [cb(event_name) for cb in sort_by_run(self.cbs)]
    128 

/opt/conda/lib/python3.6/site-packages/fastai2/callback/core.py in __call__(self, event_name)
     23                (self.run_valid and not getattr(self, 'training', False)))
---> 24         if self.run and _run: getattr(self, event_name, noop)()
     25         if event_name=='after_fit': self.run=True #Reset self.run to True at each end of fit

/opt/conda/lib/python3.6/site-packages/fastai2/callback/schedule.py in begin_fit(self)
    175         super().begin_fit()
--> 176         self.learn.save('_tmp')
    177         self.best_loss = float('inf')

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in save(self, file, **kwargs)
    264     def save(self, file, **kwargs):
--> 265         file = join_path_file(file, self.path/self.model_dir, ext='.pth')
    266         save_model(file, self.model, getattr(self,'opt',None), **kwargs)

/opt/conda/lib/python3.6/site-packages/fastcore/utils.py in join_path_file(file, path, ext)
    481     if not isinstance(file, (str, Path)): return file
--> 482     path.mkdir(parents=True, exist_ok=True)
    483     return path/f'{file}{ext}'

/opt/conda/lib/python3.6/pathlib.py in mkdir(self, mode, parents, exist_ok)
   1245         try:
-> 1246             self._accessor.mkdir(self, mode)
   1247         except FileNotFoundError:

/opt/conda/lib/python3.6/pathlib.py in wrapped(pathobj, *args)
    386         def wrapped(pathobj, *args):
--> 387             return strfunc(str(pathobj), *args)
    388         return staticmethod(wrapped)

OSError: [Errno 30] Read-only file system: '../input/rsna-hemorrhage-jpg/train_jpg/train_jpg/models'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-25-d81c6bd29d71> in <module>
----> 1 learn.lr_find()

/opt/conda/lib/python3.6/site-packages/fastai2/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
    221     n_epoch = num_it//len(self.dls.train) + 1
    222     cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 223     with self.no_logging(): self.fit(n_epoch, cbs=cb)
    224     if show_plot: self.recorder.plot_lr_find()
    225     if suggestions:

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    196 
    197             except CancelFitException:             self('after_cancel_fit')
--> 198             finally:                               self('after_fit')
    199 
    200     def validate(self, ds_idx=1, dl=None, cbs=None):

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in __call__(self, event_name)
    122     def ordered_cbs(self, event): return [cb for cb in sort_by_run(self.cbs) if hasattr(cb, event)]
    123 
--> 124     def __call__(self, event_name): L(event_name).map(self._call_one)
    125     def _call_one(self, event_name):
    126         assert hasattr(event, event_name)

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in map(self, f, *args, **kwargs)
    370              else f.format if isinstance(f,str)
    371              else f.__getitem__)
--> 372         return self._new(map(g, self))
    373 
    374     def filter(self, f, negate=False, **kwargs):

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in _new(self, items, *args, **kwargs)
    321     @property
    322     def _xtra(self): return None
--> 323     def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
    324     def __getitem__(self, idx): return self._get(idx) if is_indexer(idx) else L(self._get(idx), use_list=None)
    325     def copy(self): return self._new(self.items.copy())

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     39             return x
     40 
---> 41         res = super().__call__(*((x,) + args), **kwargs)
     42         res._newchk = 0
     43         return res

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in __init__(self, items, use_list, match, *rest)
    312         if items is None: items = []
    313         if (use_list is not None) or not _is_array(items):
--> 314             items = list(items) if use_list else _listify(items)
    315         if match is not None:
    316             if is_coll(match): match = len(match)

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in _listify(o)
    248     if isinstance(o, list): return o
    249     if isinstance(o, str) or _is_array(o): return [o]
--> 250     if is_iter(o): return list(o)
    251     return [o]
    252 

/opt/conda/lib/python3.6/site-packages/fastcore/foundation.py in __call__(self, *args, **kwargs)
    214             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    215         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 216         return self.fn(*fargs, **kwargs)
    217 
    218 # Cell

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in _call_one(self, event_name)
    125     def _call_one(self, event_name):
    126         assert hasattr(event, event_name)
--> 127         [cb(event_name) for cb in sort_by_run(self.cbs)]
    128 
    129     def _bn_bias_state(self, with_bias): return bn_bias_params(self.model, with_bias).map(self.opt.state)

/opt/conda/lib/python3.6/site-packages/fastai2/learner.py in <listcomp>(.0)
    125     def _call_one(self, event_name):
    126         assert hasattr(event, event_name)
--> 127         [cb(event_name) for cb in sort_by_run(self.cbs)]
    128 
    129     def _bn_bias_state(self, with_bias): return bn_bias_params(self.model, with_bias).map(self.opt.state)

/opt/conda/lib/python3.6/site-packages/fastai2/callback/core.py in __call__(self, event_name)
     22         _run = (event_name not in _inner_loop or (self.run_train and getattr(self, 'training', True)) or
     23                (self.run_valid and not getattr(self, 'training', False)))
---> 24         if self.run and _run: getattr(self, event_name, noop)()
     25         if event_name=='after_fit': self.run=True #Reset self.run to True at each end of fit
     26 

/opt/conda/lib/python3.6/site-packages/fastai2/callback/progress.py in after_fit(self)
     37     def after_fit(self):
     38         if getattr(self, 'mbar', False):
---> 39             self.mbar.on_iter_end()
     40             delattr(self, 'mbar')
     41         self.learn.logger = self.old_logger

/opt/conda/lib/python3.6/site-packages/fastprogress/fastprogress.py in on_iter_end(self)
    155             total_time = format_time(time.time() - self.main_bar.start_t)
    156             self.text = f'Total time: {total_time} <p>' + self.text
--> 157         self.out.update(HTML(self.text))
    158 
    159     def add_child(self, child):

AttributeError: 'NBMasterBar' object has no attribute 'out'

Is this not the right function to use? I tried learn.recorder.plot_lr_find() and it gives me a blank plot

I’m also not able to plot validation loss. Is something wrong with my installation?
I’ve just done pip install fastai2 on Kaggle.

@sgugger sorry to tag you but I’m stuck with the above 2 errors. Can you help?

Please do not at-mention me when there are other people that can help.

For your first error, the real error message is

(the rest is an error that happened because of that).

This is because learn.lr_find tries to save your model (to reload it once it has done its mock training) but it can’t on the default path it has (data path/models by default).You need to pass model_dir=Path(...) in your call to cnn_learner with the … being a path that points to somewhere writeable.

For your second error, no one can help without seeing the code you are executing.

Thank you for the response. And sorry for the at-mention again, I only did it after waiting 20 hours. For my second error, I’ve run learn.fine_tune(5) and then I’m doing learn.recorder.plot_loss()