Lambda problem with 01_intro code

jeffbiss · January 6, 2021, 6:53pm

I’ve encountered a problem while running the 01_intro code locally in Visual Code Studio:

path = untar_data(URLs.CAMVID_TINY)
dls = SegmentationDataLoaders.from_label_func(
    path, bs=8, fnames = get_image_files(path/"images"),
    label_func = lambda o: path/'labels'/f'{o.stem}_P{o.suffix}',
    codes = np.loadtxt(path/'codes.txt', dtype=str)
)
print(path)
learn = unet_learner(dls, resnet34)
learn.fine_tune(8)

I get the following error that does NOT happen in paperspace:

PicklingError                             Traceback (most recent call last)
<ipython-input-22-60c49369e175> in <module>
      7 print(path)
      8 learn = unet_learner(dls, resnet34)
----> 9 learn.fine_tune(8)

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\callback\schedule.py in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
    155     "Fine tune with `freeze` for `freeze_epochs` then with `unfreeze` from `epochs` using discriminative LR"
    156     self.freeze()
--> 157     self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
    158     base_lr /= 2
    159     self.unfreeze()

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\callback\schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    110     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    111               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 112     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    113 
    114 # Cell

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    204             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    205             self.n_epoch = n_epoch
--> 206             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    207 
    208     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\learner.py in _do_fit(self)
    195         for epoch in range(self.n_epoch):
    196             self.epoch=epoch
--> 197             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    198 
    199     def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\learner.py in _do_epoch(self)
    189 
    190     def _do_epoch(self):
--> 191         self._do_epoch_train()
    192         self._do_epoch_validate()
    193 

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\learner.py in _do_epoch_train(self)
    181     def _do_epoch_train(self):
    182         self.dl = self.dls.train
--> 183         self._with_events(self.all_batches, 'train', CancelTrainException)
    184 
    185     def _do_epoch_validate(self, ds_idx=1, dl=None):

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    162 
    163     def _do_one_batch(self):

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\fastai\data\load.py in __iter__(self)
     99         self.before_iter()
    100         self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
--> 101         for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
    102             if self.device is not None: b = to_device(b, self.device)
    103             yield self.after_batch(b)

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\site-packages\torch\utils\data\dataloader.py in __init__(self, loader)
    799             #     before it starts, and __del__ tries to join but will get:
    800             #     AssertionError: can only join a started process.
--> 801             w.start()
    802             self._index_queues.append(index_queue)
    803             self._workers.append(w)

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\multiprocessing\process.py in start(self)
    103                'daemonic processes are not allowed to have children'
    104         _cleanup()
--> 105         self._popen = self._Popen(self)
    106         self._sentinel = self._popen.sentinel
    107         _children.add(self)

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\multiprocessing\context.py in _Popen(process_obj)
    221     @staticmethod
    222     def _Popen(process_obj):
--> 223         return _default_context.get_context().Process._Popen(process_obj)
    224 
    225 class DefaultContext(BaseContext):

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\multiprocessing\context.py in _Popen(process_obj)
    320         def _Popen(process_obj):
    321             from .popen_spawn_win32 import Popen
--> 322             return Popen(process_obj)
    323 
    324     class SpawnContext(BaseContext):

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj)
     63             try:
     64                 reduction.dump(prep_data, to_child)
---> 65                 reduction.dump(process_obj, to_child)
     66             finally:
     67                 set_spawning_popen(None)

C:\Users\jbiss\AppData\Local\Programs\Python\Python36\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
     58 def dump(obj, file, protocol=None):
     59     '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60     ForkingPickler(file, protocol).dump(obj)
     61 
     62 #

PicklingError: Can't pickle <function <lambda> at 0x00000288834C0F28>: attribute lookup <lambda> on __main__ failed

Pickle Error with resnext101_32x4d Pretrained models discusses the same problem and provides an answer that doesn’t seem to work. Neither unet_learner nor Learner.fine_tune provides purge as a parameter.

It seems that this error is beyond my control and I do not know why it pops up locally. So, is there any advice as to how we can resolve these problems on our own, how to troubleshoot them?

jeffbiss · January 8, 2021, 4:22pm

I solved this issue with the following code that replaces the original lambda function with a named function:

path = untar_data(URLs.CAMVID_TINY)
print(path)
def replace_lambda(fn):
    return path/'labels'/f'{fn.stem}_P{fn.suffix}'

dls = SegmentationDataLoaders.from_label_func(
    path, bs=8, fnames = get_image_files(path/"images"),
    label_func = replace_lambda,
    codes = np.loadtxt(path/'codes.txt', dtype=str),
    num_workers=0
)

learn = unet_learner(dls, resnet34)
learn.fine_tune(8)

Note the num_workers=0 argument, this needs to be here for use in Windows to prevent a BrokenPipeError