# Lesson 16 official topic

LR finder is only a rough guide in any situation @devforfu - generally you’ll want to try around 4x lower and 4x higher LRs too.

3 Likes

I think `ProgressCB` is fantastic, but I thought it would be better to display how long it took to finish the epoch, like `fastai`. So, I looked into some `fastprogress` source code and came up with this:

``````from time import time
from copy import copy
from torcheval.metrics import Mean

from .datasets import *
from .learner import *

# Code from fastcore https://github.com/fastai/fastprogress/blob/master/fastprogress/core.py#L7
def format_time(t):
"Format `t` (in seconds) to (h):mm:ss"
t = int(t)
h,m,s = t//3600, (t//60)%60, t%60
if h!= 0: return f'{h}:{m:02d}:{s:02d}'
else:     return f'{m:02d}:{s:02d}'

class MetricsCB(Callback):
def __init__(self, *ms, **metrics):
for o in ms: metrics[type(o).__name__] = o
self.metrics = metrics
self.all_metrics = copy(metrics)
self.all_metrics['loss'] = self.loss = Mean()

def _log(self, x): print(x)

def before_fit(self, learn): learn.metrics = self

def before_epoch(self, learn):
for m in self.all_metrics.values(): m.reset()
self.start_time = time()

def after_epoch(self, learn):
log = {k: f'{v.compute():.3f}' for k, v in self.all_metrics.items()}
log['epoch'] = learn.epoch
log['train'] = learn.model.training
log['time'] = format_time(time() - self.start_time)
self._log(log)

def after_batch(self, learn):
x, y = learn.batch
for m in self.metrics.values():
m.update(to_cpu(learn.preds), to_cpu(y))
self.loss.update(to_cpu(learn.loss), weight=len(x))
``````

I only modified `MetricsCB` by adding one line to `before_epoch` and another to `after_epoch` to save time into the log.

Hopefully this helps.

1 Like

I am getting the following error when executing the codepiece in notebook 10. I have note made any modifications to the code or the miniai library. I am setting up my local Conda environment in my m1 Mac, not sure if it’s because of that.

Any inputs would be appreciated

``````set_seed(1)
learn = fit(nn.Sequential(*cnn_layers()))
``````
``````[W ParallelNative.cpp:230] Warning: Cannot set number of intraop threads after parallel work has started or after set_num_threads call when using native parallel backend (function set_num_threads)
[W ParallelNative.cpp:230] Warning: Cannot set number of intraop threads after parallel work has started or after set_num_threads call when using native parallel backend (function set_num_threads)
[W ParallelNative.cpp:230] Warning: Cannot set number of intraop threads after parallel work has started or after set_num_threads call when using native parallel backend (function set_num_threads)
[W ParallelNative.cpp:230] Warning: Cannot set number of intraop threads after parallel work has started or after set_num_threads call when using native parallel backend (function set_num_threads)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[22], line 2
1 set_seed(1)
----> 2 learn = fit(nn.Sequential(*cnn_layers()))

Cell In[21], line 4, in fit(model, epochs, xtra_cbs)
1 def fit(model, epochs=1, xtra_cbs=None):
2     # setting up a high learning rate of 0.6 as an experiment
3     learn = Learner(model, dls, loss_func=F.cross_entropy, lr=0.6, cbs=cbs+fc.L(xtra_cbs))
----> 4     learn.fit(epochs)
5     return learn

File ~/fastai-2023-part2/nbs/miniai/learner.py:178, in Learner.fit(self, n_epochs, train, valid, cbs, lr)
176     if lr is None: lr = self.lr
177     if self.opt_func: self.opt = self.opt_func(self.model.parameters(), lr)
--> 178     self._fit(train, valid)
179 finally:
180     for cb in cbs: self.cbs.remove(cb)

File ~/fastai-2023-part2/nbs/miniai/learner.py:129, in with_cbs.__call__.<locals>._f(o, *args, **kwargs)
127 try:
128     o.callback(f'before_{self.nm}')
--> 129     f(o, *args, **kwargs)
130     o.callback(f'after_{self.nm}')
131 except globals()[f'Cancel{self.nm.title()}Exception']: pass

File ~/fastai-2023-part2/nbs/miniai/learner.py:166, in Learner._fit(self, train, valid)
163 @with_cbs('fit')
164 def _fit(self, train, valid):
165     for self.epoch in self.epochs:
--> 166         if train: self.one_epoch(True)

File ~/fastai-2023-part2/nbs/miniai/learner.py:161, in Learner.one_epoch(self, training)
159 self.model.train(training)
160 self.dl = self.dls.train if training else self.dls.valid
--> 161 self._one_epoch()

File ~/fastai-2023-part2/nbs/miniai/learner.py:129, in with_cbs.__call__.<locals>._f(o, *args, **kwargs)
127 try:
128     o.callback(f'before_{self.nm}')
--> 129     f(o, *args, **kwargs)
130     o.callback(f'after_{self.nm}')
131 except globals()[f'Cancel{self.nm.title()}Exception']: pass

File ~/fastai-2023-part2/nbs/miniai/learner.py:156, in Learner._one_epoch(self)
154 @with_cbs('epoch')
155 def _one_epoch(self):
--> 156     for self.iter,self.batch in enumerate(self.dl): self._one_batch()

File ~/anaconda3/lib/python3.11/site-packages/fastprogress/fastprogress.py:50, in ProgressBar.__iter__(self)
48 except Exception as e:
49     self.on_interrupt()
---> 50     raise e

File ~/anaconda3/lib/python3.11/site-packages/fastprogress/fastprogress.py:41, in ProgressBar.__iter__(self)
39 if self.total != 0: self.update(0)
40 try:
---> 41     for i,o in enumerate(self.gen):
42         if self.total and i >= self.total: break
43         yield o

627 if self._sampler_iter is None:
628     # TODO(https://github.com/pytorch/pytorch/issues/76750)
629     self._reset()  # type: ignore[call-arg]
--> 630 data = self._next_data()
631 self._num_yielded += 1
632 if self._dataset_kind == _DatasetKind.Iterable and \
633         self._IterableDataset_len_called is not None and \
634         self._num_yielded > self._IterableDataset_len_called:

1343 else:
-> 1345     return self._process_data(data)

1369 self._try_put_index()
1370 if isinstance(data, ExceptionWrapper):
-> 1371     data.reraise()
1372 return data

File ~/anaconda3/lib/python3.11/site-packages/torch/_utils.py:694, in ExceptionWrapper.reraise(self)
690 except TypeError:
691     # If the exception takes multiple arguments, don't try to
692     # instantiate since we don't know how to
693     raise RuntimeError(msg) from None
--> 694 raise exception

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
^^^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
return self.collate_fn(data)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/fastai-2023-part2/nbs/miniai/datasets.py", line 27, in _f
def _f(b): return get(default_collate(b))
^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 265, in default_collate
return collate(batch, collate_fn_map=default_collate_fn_map)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 127, in collate
return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 127, in <dictcomp>
return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 119, in collate
return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 160, in collate_tensor_fn
storage = elem._typed_storage()._new_shared(numel, device=elem.device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/storage.py", line 866, in _new_shared
untyped_storage = torch.UntypedStorage._new_shared(size * self._element_size(), device=device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/storage.py", line 260, in _new_shared
return cls._new_using_filename_cpu(size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: torch_shm_manager at "/Users/anirudhg/anaconda3/lib/python3.11/site-packages/torch/bin/torch_shm_manager": could not generate a random directory for manager socket`Preformatted text`
``````
``````type or paste code here
``````

Hi Jeremy
I am facing an error like OSError: [WinError 127] The specified procedure could not be found while doing “from miniai.learner import *”.
any idea how to fix this?

i found the solution. in colab you have to do !pip install -Uqq git+https://github.com/fastai/course22p2 --no-cache-dir only