I am running fast.ai on kaggle. It works initially, but after I change the validation set with split_by_files in the data, it doesn’t work anymore and I cannot find the reason of it.
src = (ImageList.from_csv(PATH, folder='train', csv_name='train_labels.csv', suffix='.tif')
.split_by_files(cv_idx)
.label_from_df()
.add_test_folder())
tfms = get_transforms(do_flip=True, flip_vert=True, max_rotate=.0, max_zoom=.1,
max_lighting=0.05, max_warp=0.)
data = (src.transform(tfms, size=96, resize_method=ResizeMethod.SQUISH)
.databunch(bs=64, path='.'))
data.normalize(imagenet_stats);
After I use learn.lr_find(), the validation loss is so high.
So I have to create a new learner after lr_find() and it works fine.
But when I unfreeze the model and train it again, the following error comes up:
---------------------------------------------------------------------------
RecursionError Traceback (most recent call last)
<ipython-input-17-ad12488cb6aa> in <module>()
2 RLR = ReduceLROnPlateauCallback(learn, monitor='roc_score',patience = 2)
3 SAVEML = SaveModelCallback(learn, every='improvement', monitor='roc_score', name='best')
----> 4 learn.fit_one_cycle(10, slice(1e-5,1e-4), callbacks = [RLR, SAVEML], pct_start=0.1)
/opt/conda/lib/python3.6/site-packages/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, final_div, wd, callbacks, tot_epochs, start_epoch)
20 callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor, pct_start=pct_start,
21 final_div=final_div, tot_epochs=tot_epochs, start_epoch=start_epoch))
---> 22 learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
23
24 def lr_find(learn:Learner, start_lr:Floats=1e-7, end_lr:Floats=10, num_it:int=100, stop_div:bool=True, wd:float=None):
/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
194 callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
195 if defaults.extra_callbacks is not None: callbacks += defaults.extra_callbacks
--> 196 fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
197
198 def create_opt(self, lr:Floats, wd:Floats=0.)->None:
/opt/conda/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
88 cb_handler = CallbackHandler(callbacks, metrics)
89 pbar = master_bar(range(epochs))
---> 90 cb_handler.on_train_begin(epochs, pbar=pbar, metrics=metrics)
91
92 exception=False
/opt/conda/lib/python3.6/site-packages/fastai/callback.py in on_train_begin(self, epochs, pbar, metrics)
262 self.state_dict.update(dict(n_epochs=epochs, pbar=pbar, metrics=metrics))
263 names = [(met.name if hasattr(met, 'name') else camel2snake(met.__class__.__name__)) for met in self.metrics]
--> 264 self('train_begin', metrics_names=names)
265 if self.state_dict['epoch'] != 0:
266 self.state_dict['pbar'].first_bar.total -= self.state_dict['epoch']
/opt/conda/lib/python3.6/site-packages/fastai/callback.py in __call__(self, cb_name, call_mets, **kwargs)
248 if call_mets:
249 for met in self.metrics: self._call_and_update(met, cb_name, **kwargs)
--> 250 for cb in self.callbacks: self._call_and_update(cb, cb_name, **kwargs)
251
252 def set_dl(self, dl:DataLoader):
/opt/conda/lib/python3.6/site-packages/fastai/callback.py in _call_and_update(self, cb, cb_name, **kwargs)
238 def _call_and_update(self, cb, cb_name, **kwargs)->None:
239 "Call `cb_name` on `cb` and update the inner state."
--> 240 new = ifnone(getattr(cb, f'on_{cb_name}')(**self.state_dict, **kwargs), dict())
241 for k,v in new.items():
242 if k not in self.state_dict:
/opt/conda/lib/python3.6/site-packages/fastai/callbacks/fp16.py in on_train_begin(self, **kwargs)
79 self.model_params, self.master_params = get_master(self.learn.layer_groups, self.flat_master)
80 #Changes the optimizer so that the optimization step is done in FP32.
---> 81 if self.opt is None or self.opt.n_params != self.learn.opt.n_params:
82 self.opt = self.learn.opt.new_with_params(self.master_params)
83 self.opt.load_state_dict(self.learn.opt.state_dict())
/opt/conda/lib/python3.6/site-packages/fastai/callback.py in n_params(self)
71
72 @property
---> 73 def n_params(self): return sum([len(pg['params']) for pg in self.opt.param_groups])
74
75 #Hyperparameters as properties
/opt/conda/lib/python3.6/site-packages/fastai/callback.py in __getattr__(self, k)
61
62 #Passthrough to the inner opt.
---> 63 def __getattr__(self, k:str)->Any: return getattr(self.opt, k, None)
64 def __setstate__(self,data:Any): self.__dict__.update(data)
65
... last 1 frames repeated, from the frame below ...
/opt/conda/lib/python3.6/site-packages/fastai/callback.py in __getattr__(self, k)
61
62 #Passthrough to the inner opt.
---> 63 def __getattr__(self, k:str)->Any: return getattr(self.opt, k, None)
64 def __setstate__(self,data:Any): self.__dict__.update(data)
65
RecursionError: maximum recursion depth exceeded
I find out that it works without the recusion error when I use
llearn = cnn_learner(data, models.densenet161, metrics=[accuracy, roc_score], loss_func=loss_func, ps=0.5, wd=1e-1)
instead of
learn = cnn_learner(data, models.densenet161, metrics=[accuracy, roc_score], loss_func=loss_func, ps=0.5, wd=1e-1).to_fp16()
so maybe to_fp16 is causing it? I am still experimenting.
Any suggestion? Thank you all.