Non-Beginner Discussion

Hey folks,
I’m trying to implement a callback that visualizes my image prediction. Here’s my code:

class VisualisePredictions(Callback):
    "Visualize predictions"
    order = ProgressCallback.order+1
    def after_epoch(self, **kwargs):
        dl=self.dls[1]
        b=dl.one_batch()
        _,_,prd=self.get_preds(dl=[b],with_decoded=True)
        dec = self.dls.after_batch.decode((TensorRawImage(prd),))[0][0]
        show_raw_image(dec) # This is my personalised function that gets a tensor, converts it to np.float32 array, then coverts it from 65536 to 255, then displays it as an image

It was based on the Learner.show_results().

It works perfectly fine when before/after the training ends,

but while fit_one_cycle() running, the Google Colab keeps getting stuck.

It looks like there is an endless loop or what.

Here’s the tracebook. Any idea what could I do to solve this problem?

41 frames
/usr/local/lib/python3.7/dist-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    114     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    115               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 116     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    117 
    118 # Cell

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    220             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    221             self.n_epoch = n_epoch
--> 222             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    223 
    224     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    162 
    163     def _with_events(self, f, event_type, ex, final=noop):
--> 164         try: self(f'before_{event_type}');  f()
    165         except ex: self(f'after_cancel_{event_type}')
    166         self(f'after_{event_type}');  final()

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _do_fit(self)
    211         for epoch in range(self.n_epoch):
    212             self.epoch=epoch
--> 213             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    214 
    215     def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    164         try: self(f'before_{event_type}');  f()
    165         except ex: self(f'after_cancel_{event_type}')
--> 166         self(f'after_{event_type}');  final()
    167 
    168     def all_batches(self):

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in __call__(self, event_name)
    140 
    141     def ordered_cbs(self, event): return [cb for cb in self.cbs.sorted('order') if hasattr(cb, event)]
--> 142     def __call__(self, event_name): L(event_name).map(self._call_one)
    143 
    144     def _call_one(self, event_name):

/usr/local/lib/python3.7/dist-packages/fastcore/foundation.py in map(self, f, gen, *args, **kwargs)
    153     def range(cls, a, b=None, step=None): return cls(range_of(a, b=b, step=step))
    154 
--> 155     def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
    156     def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
    157     def argfirst(self, f, negate=False): return first(i for i,o in self.enumerate() if f(o))

/usr/local/lib/python3.7/dist-packages/fastcore/basics.py in map_ex(iterable, f, gen, *args, **kwargs)
    777     res = map(g, iterable)
    778     if gen: return res
--> 779     return list(res)
    780 
    781 # Cell

/usr/local/lib/python3.7/dist-packages/fastcore/basics.py in __call__(self, *args, **kwargs)
    762             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    763         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 764         return self.func(*fargs, **kwargs)
    765 
    766 # Cell

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _call_one(self, event_name)
    144     def _call_one(self, event_name):
    145         if not hasattr(event, event_name): raise Exception(f'missing {event_name}')
--> 146         for cb in self.cbs.sorted('order'): cb(event_name)
    147 
    148     def _bn_bias_state(self, with_bias): return norm_bias_params(self.model, with_bias).map(self.opt.state)

/usr/local/lib/python3.7/dist-packages/fastai/callback/core.py in __call__(self, event_name)
     55         res = None
     56         if self.run and _run:
---> 57             try: res = getattr(self, event_name, noop)()
     58             except (CancelBatchException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): raise
     59             except Exception as e:

<ipython-input-23-a2093953dc27> in after_epoch(self, **kwargs)
      6         b=dl.one_batch()
      7         try:
----> 8             _,_,prd=self.get_preds(dl=[b],with_decoded=True)
      9         except TypeError as e:
     10             raise TypeError(f"problem with get_preds")

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, reorder, cbs, **kwargs)
    262                 if with_decoded: res.insert(pred_i+2, getattr(self.loss_func, 'decodes', noop)(res[pred_i]))
    263             if reorder and hasattr(dl, 'get_idxs'): res = nested_reorder(res, tensor(idxs).argsort())
--> 264             return tuple(res)
    265         self._end_cleanup()
    266 

/usr/local/lib/python3.7/dist-packages/fastcore/xtras.py in __exit__(self, *args, **kwargs)
    493     def __init__(self, mgrs): self.default,self.stack = L(mgrs),ExitStack()
    494     def __enter__(self): self.default.map(self.stack.enter_context)
--> 495     def __exit__(self, *args, **kwargs): self.stack.__exit__(*args, **kwargs)
    496 
    497 # Cell

/usr/lib/python3.7/contextlib.py in __exit__(self, *exc_details)
    522                 # set-up context
    523                 fixed_ctx = exc_details[1].__context__
--> 524                 raise exc_details[1]
    525             except BaseException:
    526                 exc_details[1].__context__ = fixed_ctx

/usr/lib/python3.7/contextlib.py in __exit__(self, *exc_details)
    507             assert is_sync
    508             try:
--> 509                 if cb(*exc_details):
    510                     suppressed_exc = True
    511                     pending_raise = False

/usr/lib/python3.7/contextlib.py in _exit_wrapper(exc_type, exc, tb)
    375     def _create_exit_wrapper(cm, cm_exit):
    376         def _exit_wrapper(exc_type, exc, tb):
--> 377             return cm_exit(cm, exc_type, exc, tb)
    378         return _exit_wrapper
    379 

/usr/local/lib/python3.7/dist-packages/fastcore/xtras.py in __exit__(self, *args, **kwargs)
    493     def __init__(self, mgrs): self.default,self.stack = L(mgrs),ExitStack()
    494     def __enter__(self): self.default.map(self.stack.enter_context)
--> 495     def __exit__(self, *args, **kwargs): self.stack.__exit__(*args, **kwargs)
    496 
    497 # Cell

/usr/lib/python3.7/contextlib.py in __exit__(self, *exc_details)
    522                 # set-up context
    523                 fixed_ctx = exc_details[1].__context__
--> 524                 raise exc_details[1]
    525             except BaseException:
    526                 exc_details[1].__context__ = fixed_ctx

/usr/lib/python3.7/contextlib.py in __exit__(self, type, value, traceback)
    128                 value = type()
    129             try:
--> 130                 self.gen.throw(type, value, traceback)
    131             except StopIteration as exc:
    132                 # Suppress StopIteration *unless* it's the same exception that

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in replacing_yield(o, attr, val)
     22     "Context manager to temporarily replace an attribute"
     23     old = getattr(o,attr)
---> 24     try:     yield setattr(o,attr,val)
     25     finally: setattr(o,attr,old)
     26 

/usr/lib/python3.7/contextlib.py in __exit__(self, *exc_details)
    507             assert is_sync
    508             try:
--> 509                 if cb(*exc_details):
    510                     suppressed_exc = True
    511                     pending_raise = False

/usr/lib/python3.7/contextlib.py in _exit_wrapper(exc_type, exc, tb)
    375     def _create_exit_wrapper(cm, cm_exit):
    376         def _exit_wrapper(exc_type, exc, tb):
--> 377             return cm_exit(cm, exc_type, exc, tb)
    378         return _exit_wrapper
    379 

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in __exit__(self, exc_type, exc_value, tb)
    224     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
    225     def __enter__(self): self(_before_epoch); return self
--> 226     def __exit__(self, exc_type, exc_value, tb): self(_after_epoch)
    227 
    228     def validation_context(self, cbs=None, inner=False):

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in __call__(self, event_name)
    140 
    141     def ordered_cbs(self, event): return [cb for cb in self.cbs.sorted('order') if hasattr(cb, event)]
--> 142     def __call__(self, event_name): L(event_name).map(self._call_one)
    143 
    144     def _call_one(self, event_name):

/usr/local/lib/python3.7/dist-packages/fastcore/foundation.py in map(self, f, gen, *args, **kwargs)
    153     def range(cls, a, b=None, step=None): return cls(range_of(a, b=b, step=step))
    154 
--> 155     def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
    156     def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
    157     def argfirst(self, f, negate=False): return first(i for i,o in self.enumerate() if f(o))

/usr/local/lib/python3.7/dist-packages/fastcore/basics.py in map_ex(iterable, f, gen, *args, **kwargs)
    777     res = map(g, iterable)
    778     if gen: return res
--> 779     return list(res)
    780 
    781 # Cell

/usr/local/lib/python3.7/dist-packages/fastcore/basics.py in __call__(self, *args, **kwargs)
    762             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    763         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 764         return self.func(*fargs, **kwargs)
    765 
    766 # Cell

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in _call_one(self, event_name)
    144     def _call_one(self, event_name):
    145         if not hasattr(event, event_name): raise Exception(f'missing {event_name}')
--> 146         for cb in self.cbs.sorted('order'): cb(event_name)
    147 
    148     def _bn_bias_state(self, with_bias): return norm_bias_params(self.model, with_bias).map(self.opt.state)

/usr/local/lib/python3.7/dist-packages/fastai/callback/core.py in __call__(self, event_name)
     55         res = None
     56         if self.run and _run:
---> 57             try: res = getattr(self, event_name, noop)()
     58             except (CancelBatchException, CancelEpochException, CancelFitException, CancelStepException, CancelTrainException, CancelValidException): raise
     59             except Exception as e:

<ipython-input-23-a2093953dc27> in after_epoch(self, **kwargs)
      6         b=dl.one_batch()
      7         try:
----> 8             _,_,prd=self.get_preds(dl=[b],with_decoded=True)
      9         except TypeError as e:
     10             raise TypeError(f"problem with get_preds")

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, reorder, cbs, **kwargs)
    253         ctx_mgrs = self.validation_context(cbs=L(cbs)+[cb], inner=inner)
    254         if with_loss: ctx_mgrs.append(self.loss_not_reduced())
--> 255         with ContextManagers(ctx_mgrs):
    256             self._do_epoch_validate(dl=dl)
    257             if act is None: act = getattr(self.loss_func, 'activation', noop)

/usr/local/lib/python3.7/dist-packages/fastcore/xtras.py in __enter__(self)
    492     "Wrapper for `contextlib.ExitStack` which enters a collection of context managers"
    493     def __init__(self, mgrs): self.default,self.stack = L(mgrs),ExitStack()
--> 494     def __enter__(self): self.default.map(self.stack.enter_context)
    495     def __exit__(self, *args, **kwargs): self.stack.__exit__(*args, **kwargs)
    496 

/usr/local/lib/python3.7/dist-packages/fastcore/foundation.py in map(self, f, gen, *args, **kwargs)
    153     def range(cls, a, b=None, step=None): return cls(range_of(a, b=b, step=step))
    154 
--> 155     def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
    156     def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
    157     def argfirst(self, f, negate=False): return first(i for i,o in self.enumerate() if f(o))

/usr/local/lib/python3.7/dist-packages/fastcore/basics.py in map_ex(iterable, f, gen, *args, **kwargs)
    777     res = map(g, iterable)
    778     if gen: return res
--> 779     return list(res)
    780 
    781 # Cell

/usr/local/lib/python3.7/dist-packages/fastcore/basics.py in __call__(self, *args, **kwargs)
    762             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    763         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 764         return self.func(*fargs, **kwargs)
    765 
    766 # Cell

/usr/lib/python3.7/contextlib.py in enter_context(self, cm)
    425         _cm_type = type(cm)
    426         _exit = _cm_type.__exit__
--> 427         result = _cm_type.__enter__(cm)
    428         self._push_cm_exit(cm, _exit)
    429         return result

/usr/local/lib/python3.7/dist-packages/fastcore/xtras.py in __enter__(self)
    492     "Wrapper for `contextlib.ExitStack` which enters a collection of context managers"
    493     def __init__(self, mgrs): self.default,self.stack = L(mgrs),ExitStack()
--> 494     def __enter__(self): self.default.map(self.stack.enter_context)
    495     def __exit__(self, *args, **kwargs): self.stack.__exit__(*args, **kwargs)
    496 

/usr/local/lib/python3.7/dist-packages/fastcore/foundation.py in map(self, f, gen, *args, **kwargs)
    153     def range(cls, a, b=None, step=None): return cls(range_of(a, b=b, step=step))
    154 
--> 155     def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
    156     def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
    157     def argfirst(self, f, negate=False): return first(i for i,o in self.enumerate() if f(o))

/usr/local/lib/python3.7/dist-packages/fastcore/basics.py in map_ex(iterable, f, gen, *args, **kwargs)
    777     res = map(g, iterable)
    778     if gen: return res
--> 779     return list(res)
    780 
    781 # Cell

/usr/local/lib/python3.7/dist-packages/fastcore/basics.py in __call__(self, *args, **kwargs)
    762             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    763         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 764         return self.func(*fargs, **kwargs)
    765 
    766 # Cell

/usr/lib/python3.7/contextlib.py in enter_context(self, cm)
    425         _cm_type = type(cm)
    426         _exit = _cm_type.__exit__
--> 427         result = _cm_type.__enter__(cm)
    428         self._push_cm_exit(cm, _exit)
    429         return result

Interesting bug! The source of your problem is that get_preds calls the after_epoch event…
Where does it do that you might ask? Well this is the code of interest in get_preds:

        with ContextManagers(ctx_mgrs):
            self._do_epoch_validate(dl=dl)
            if act is None: act = getattr(self.loss_func, 'activation', noop)
            res = cb.all_tensors()
            pred_i = 1 if with_input else 0
            if res[pred_i] is not None:
                res[pred_i] = act(res[pred_i])
                if with_decoded: res.insert(pred_i+2, getattr(self.loss_func, 'decodes', noop)(res[pred_i]))
            if reorder and hasattr(dl, 'get_idxs'): res = nested_reorder(res, tensor(idxs).argsort())
            return tuple(res)
        self._end_cleanup()

Notice the context manager? Well it turns out that when you’re done in the context manager, it calls the __exit__ function of the object in the context manager, if it is defined. In this case, it is defined as the following:

def __exit__(self, exc_type, exc_value, tb): self(_after_epoch)

So that’s where your loop is coming from… You can see in the Traceback that it points out this line:

Hope that helps!

3 Likes

Losing spatial relations is one part of this but I think the pooled feature vectors are not useful even for spatial-agnostic tasks. I am doubting whether the pooling operation has any semantic meaning in feature space or is just a bad bottleneck that the network has to work around.

Flattening would only work if the features were in the same exact places in both images, right? ViTs are certainly interesting (I am looking forward to learning more about them :wink: and it does seem like one of their less talked about innovations is solving this exact problem.

Thank you very much!

I also figured out that the problem was rooted in that line of ContextManagers in get_preds.

(Which is why I added try: catch: inside my callback)

But I still can’t tell where to begin with, or just what could be wrong…

At first, the problem was with __exit__, but then it seems that it somehow ends up calling my callback again, and then the problem is with __enter__:

/usr/local/lib/python3.7/dist-packages/fastai/learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, reorder, cbs, **kwargs)
    253         ctx_mgrs = self.validation_context(cbs=L(cbs)+[cb], inner=inner)
    254         if with_loss: ctx_mgrs.append(self.loss_not_reduced())
--> 255         with ContextManagers(ctx_mgrs):
    256             self._do_epoch_validate(dl=dl)
    257             if act is None: act = getattr(self.loss_func, 'activation', noop)

/usr/local/lib/python3.7/dist-packages/fastcore/xtras.py in __enter__(self)
    492     "Wrapper for `contextlib.ExitStack` which enters a collection of context managers"
    493     def __init__(self, mgrs): self.default,self.stack = L(mgrs),ExitStack()
--> 494     def __enter__(self): self.default.map(self.stack.enter_context)
    495     def __exit__(self, *args, **kwargs): self.stack.__exit__(*args, **kwargs)
    496 

My only guess here is that in every epoch, some of the data is being cleared, so maybe the data that my callback is accessing (at that time) is already missing, but it can’t deal with this well, because it doesn’t know it. I still don’t know really how to proceed from here

Okay, good news.

By using this little guy’s help:

I managed to build my callback:

class VisualisePredictions1(Callback):
    "Visualize predictions"
    order = ProgressCallback.order+1
    def after_epoch(self, **kwargs):
        if not self.learn.training:
            with torch.no_grad(): 
                preds = self.learn.pred[0]
                preds = preds.detach().cpu()
                preds=TensorRawImage(preds) # My tensor
                preds = self.dls.after_batch.decode(preds)
                show_raw_image(preds) # My function

Which works nicely.

image

So now I’m a little asking for more help and guidance:

When calling self.learn.pred I can get only a specific image. The returned item has a shape of [1x3xHxW]
Where 3 stands for the number of channels, and 1 stands for the number of predicted items.

I tried to select another index out of the valid batch, but it returned an error “It’s not subscriptible” or so.

How can I choose the pred of a specific batch in the valid dataset?

Thanks

1 Like

Hi, so I am trying to load a pickled (.pkl) learner in my Kaggle inference kernel that was created using learner.export() function. When I tried loading using load_learner() I encountered the following error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/tmp/ipykernel_75/4239519382.py in <module>
----> 1 learn = load_learner("../input/cassava-classification-training/cassava-v1.pkl")

/opt/conda/lib/python3.7/site-packages/fastai/learner.py in load_learner(fname, cpu, pickle_module)
    384     "Load a `Learner` object in `fname`, optionally putting it on the `cpu`"
    385     distrib_barrier()
--> 386     try: res = torch.load(fname, map_location='cpu' if cpu else None, pickle_module=pickle_module)
    387     except AttributeError as e:
    388         e.args = [f"Custom classes or functions exported with your `Learner` are not available in the namespace currently.\nPlease re-declare or import them before calling `load_learner`:\n\t{e.args[0]}"]

/opt/conda/lib/python3.7/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
    605                     opened_file.seek(orig_position)
    606                     return torch.jit.load(opened_file)
--> 607                 return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)
    608         return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
    609 

/opt/conda/lib/python3.7/site-packages/torch/serialization.py in _load(zip_file, map_location, pickle_module, pickle_file, **pickle_load_args)
    880     unpickler = UnpicklerWrapper(data_file, **pickle_load_args)
    881     unpickler.persistent_load = persistent_load
--> 882     result = unpickler.load()
    883 
    884     torch._utils._validate_loaded_sparse_tensors()

/opt/conda/lib/python3.7/site-packages/torch/serialization.py in find_class(self, mod_name, name)
    873         def find_class(self, mod_name, name):
    874             mod_name = load_module_mapping.get(mod_name, mod_name)
--> 875             return super().find_class(mod_name, name)
    876 
    877     # Load the data (which may in turn use `persistent_load` to load tensors)

AttributeError: Custom classes or functions exported with your `Learner` are not available in the namespace currently.
Please re-declare or import them before calling `load_learner`:
	Can't get attribute 'AlbumentationsTransform' on <module '__main__'>

I have made sure to import all modules that I used in my training kernel, so I am not sure what to do. Thank you in advance for your kind assistance.

1 Like

Sorry I’ve solved this. Apparently, I need to re-define my albumentations class again in my inference kernel, so not merely importing the module.

Hi all, if you’re using timm pre-trained models in Kaggle notebook and you have to submit an inference kernel with no internet access, I highly recommend the following Kaggle post.

Briefly, you need to add the following dataset to your notebook:

And now you can import timm using the following command:

import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm
1 Like

FYI I’ve moved your posts to this non-beginner topic, since albumentations and no-internet kaggle notebooks are both not beginner topics.

1 Like

Thanks, Jeremy. TBH I got quite frustrated because I’m facing problems that I thought as easy, e.g. installing FastAI in WSL and no-internet notebooks. This reminds me to be kind to myself throughout my learning journey.

1 Like

FWIW - I don’t bother submitting to competitions with that restriction, because it’s too much of a pain. Maybe at some point I’ll spend the time to create a smooth workflow for it and compete in them, but it doesn’t really feel like it’s actually a great experience for learning/practicing data science skills…

2 Likes

Hi.

I wanted to try fastai with this dataset from Kaggle:
Mushrooms classification - Common genus’s images

There was already a fastai Notebook with a baseline using cnn_learner.

I copied it and verified that it had fastai 2.5.3
image

In that Notebook, fine tuning with 4 epochs, the error_rate results is 0.034637

I created one Notebook from scratch, that installed fastai 2.6.0 and trained using vision_learner

This Notebook, fine tuned with 4 epochs, had an error_rate result of 0.172876

So I wonder why the big difference between both cases?

Thanks!

3 Likes

Thanks for the suggestion. The notebook submission timed out after several hours. I am now looking into other competitions that do not require offline notebook submission.

Not sure why the big difference, but the best I could do was with resnet34x8epochs

epoch train_loss valid_loss error_rate time
0 1.902429 0.960317 0.306259 01:18
epoch train_loss valid_loss error_rate time
0 1.009672 0.640608 0.212370 01:19
1 0.665421 0.581202 0.192250 01:19
2 0.411279 0.500092 0.165425 01:19
3 0.237853 0.448220 0.137109 01:20
4 0.131850 0.479541 0.137109 01:19
5 0.062552 0.449048 0.122951 01:20
6 0.037600 0.450541 0.116244 01:19
7 0.027743 0.442344 0.117735 01:20

So, I threw resnet50 and 12 epochs at it, and the best I got was about 0.089

epoch train_loss valid_loss error_rate time
0 1.688787 0.956583 0.298063 01:26
epoch train_loss valid_loss error_rate time
0 0.772206 0.579936 0.172131 01:28
1 0.474315 0.547614 0.164680 01:26
2 0.324692 0.510597 0.156483 01:25
3 0.235094 0.537281 0.150522 01:26
4 0.168212 0.505873 0.140835 01:25
5 0.116778 0.385155 0.114009 01:26
6 0.078507 0.393273 0.101341 01:26
7 0.042922 0.369393 0.103577 01:26
8 0.028393 0.358416 0.096870 01:26
9 0.018406 0.342066 0.086438 01:25
10 0.012793 0.346952 0.083458 01:25
11 0.009411 0.346108 0.088674 01:26

Ok, so, with resnet152 and 20 epochs (with the from-scratch notebook) I get 0.073 error_rate.

learn152x20 = vision_learner(dls, resnet152, metrics=error_rate)

learn152x20.fine_tune(20)

Downloading: “https://download.pytorch.org/models/resnet152-394f9c45.pth” to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth

100%

230M/230M [00:21<00:00, 12.2MB/s]

epoch train_loss valid_loss error_rate time
0 1.560938 0.914907 0.257824 01:35
epoch train_loss valid_loss error_rate time
0 0.678508 0.480754 0.153502 01:42
1 0.357377 0.415922 0.124441 01:42
2 0.201083 0.467987 0.134873 01:42
3 0.181344 0.537187 0.134128 01:42
4 0.198509 0.541071 0.136364 01:42
5 0.168583 0.432575 0.119225 01:42
6 0.134007 0.501745 0.126677 01:42
7 0.125964 0.469065 0.118480 01:42
8 0.100381 0.433086 0.112519 01:42
9 0.076645 0.454373 0.108793 01:42
10 0.062590 0.420274 0.099106 01:42
11 0.037568 0.529958 0.115499 01:41
12 0.035728 0.388896 0.089419 01:43
13 0.031575 0.394728 0.084948 01:42
14 0.017722 0.363212 0.077496 01:41
15 0.011253 0.345614 0.076751 01:41
16 0.005199 0.332281 0.076006 01:42
17 0.002851 0.335153 0.073770 01:42
18 0.003276 0.328994 0.073770 01:41
19 0.003068 0.333372 0.073770 01:41
1 Like

I copy/edited the original notebook, and forced fastai 2.5.3, and replicated the original results:

https://www.kaggle.com/code/jhoward/mushrooms-classification-fastai?scriptVersionId=95369186

I then did the same thing with the latest fastai, and got the same results again:

https://www.kaggle.com/code/jhoward/mushrooms-classification-fastai?scriptVersionId=95371028

So it looks like maybe there’s something different with your from-scratch rewrite?

I’m getting 404s on these, maybe not public yet.

Thanks - try now.

1 Like

Thanks,

Sorry, I forgot that I forced my Notebook to not duplicate the images!

I mean, I don’t know why, when getting the images from the dataset, it gets two folders with the same images: Mushrooms and mushrooms/Mushrooms.

It can be seen in the copy you made:

But in my copy, I only used the folder Mushrooms:
image

So, I think now the is question would be, is the model that uses each image twice overfitting? or is the duplication producing an augmentation?

Sorry and thanks again.

Oh, sorry, I hadn’t seen your answer.

I would try that.

The difference was that one training was made with all the images duplicated.
I then asked about that in Jeremy’s response.

Thanks you.

Yeah sounds like the original had a bug so it’s not actually working correctly. Sorry I missed that!