Dataloaders error on learn.fine_tune(1)

Luigi8735 · August 28, 2020, 1:16pm

Hi all!
I have just started the lesson 1 and play a bit around with the code but I soon got this error when runnin the following code:

from fastai.vision.all import *
path = untar_data(URLs.PETS)/‘images’
def is_cat(x): return x[0].isupper()
dls = ImageDataLoaders.from_name_func(
path, get_image_files(path), valid_pct=0.2, seed=42,
label_func=is_cat, item_tfms=Resize(224))

learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fine_tune(1)

error:
Empty Traceback (most recent call last)
~\anaconda3\envs\Neural Net\lib\site-packages\torch\utils\data\dataloader.py in _try_get_data(self, timeout)
778 try:
–> 779 data = self._data_queue.get(timeout=timeout)
780 return (True, data)

~\anaconda3\envs\Neural Net\lib\multiprocessing\queues.py in get(self, block, timeout)
104 if not self._poll(timeout):
–> 105 raise Empty
106 elif not self._poll():

Empty:

During handling of the above exception, another exception occurred:

RuntimeError Traceback (most recent call last)
in
----> 1 learn.fine_tune(1)

~\anaconda3\envs\Neural Net\lib\site-packages\fastcore\utils.py in _f(*args, **kwargs)
452 init_args.update(log)
453 setattr(inst, ‘init_args’, init_args)
–> 454 return inst if to_return else f(*args, **kwargs)
455 return _f
456

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\callback\schedule.py in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
159 “Fine tune with freeze for freeze_epochs then with unfreeze from epochs using discriminative LR”
160 self.freeze()
–> 161 self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
162 base_lr /= 2
163 self.unfreeze()

~\anaconda3\envs\Neural Net\lib\site-packages\fastcore\utils.py in _f(*args, **kwargs)
452 init_args.update(log)
453 setattr(inst, ‘init_args’, init_args)
–> 454 return inst if to_return else f(*args, **kwargs)
455 return _f
456

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\callback\schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
111 scheds = {‘lr’: combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
112 ‘mom’: combined_cos(pct_start, *(self.moms if moms is None else moms))}
–> 113 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
114
115 # Cell

~\anaconda3\envs\Neural Net\lib\site-packages\fastcore\utils.py in _f(*args, **kwargs)
452 init_args.update(log)
453 setattr(inst, ‘init_args’, init_args)
–> 454 return inst if to_return else f(*args, **kwargs)
455 return _f
456

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
202 self.opt.set_hypers(lr=self.lr if lr is None else lr)
203 self.n_epoch,self.loss = n_epoch,tensor(0.)
–> 204 self._with_events(self._do_fit, ‘fit’, CancelFitException, self._end_cleanup)
205
206 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\learner.py in with_events(self, f, event_type, ex, final)
153
154 def with_events(self, f, event_type, ex, final=noop):
–> 155 try: self(f’before{event_type}’) ;f()
156 except ex: self(f’after_cancel{event_type}’)
157 finally: self(f’after_{event_type}’) ;final()

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\learner.py in _do_fit(self)
192 for epoch in range(self.n_epoch):
193 self.epoch=epoch
–> 194 self._with_events(self._do_epoch, ‘epoch’, CancelEpochException)
195
196 @log_args(but=‘cbs’)

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\learner.py in with_events(self, f, event_type, ex, final)
153
154 def with_events(self, f, event_type, ex, final=noop):
–> 155 try: self(f’before{event_type}’) ;f()
156 except ex: self(f’after_cancel{event_type}’)
157 finally: self(f’after_{event_type}’) ;final()

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\learner.py in _do_epoch(self)
186
187 def _do_epoch(self):
–> 188 self._do_epoch_train()
189 self._do_epoch_validate()
190

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\learner.py in _do_epoch_train(self)
178 def _do_epoch_train(self):
179 self.dl = self.dls.train
–> 180 self._with_events(self.all_batches, ‘train’, CancelTrainException)
181
182 def _do_epoch_validate(self, ds_idx=1, dl=None):

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\learner.py in with_events(self, f, event_type, ex, final)
153
154 def with_events(self, f, event_type, ex, final=noop):
–> 155 try: self(f’before{event_type}’) ;f()
156 except ex: self(f’after_cancel{event_type}’)
157 finally: self(f’after_{event_type}’) ;final()

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\learner.py in all_batches(self)
159 def all_batches(self):
160 self.n_iter = len(self.dl)
–> 161 for o in enumerate(self.dl): self.one_batch(*o)
162
163 def _do_one_batch(self):

~\anaconda3\envs\Neural Net\lib\site-packages\fastai\data\load.py in iter(self)
96 self.randomize()
97 self.before_iter()
—> 98 for b in _loadersself.fake_l.num_workers==0:
99 if self.device is not None: b = to_device(b, self.device)
100 yield self.after_batch(b)

~\anaconda3\envs\Neural Net\lib\site-packages\torch\utils\data\dataloader.py in next(self)
361
362 def next(self):
–> 363 data = self._next_data()
364 self._num_yielded += 1
365 if self._dataset_kind == _DatasetKind.Iterable and \

~\anaconda3\envs\Neural Net\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
972
973 assert not self._shutdown and self._tasks_outstanding > 0
–> 974 idx, data = self._get_data()
975 self._tasks_outstanding -= 1
976

~\anaconda3\envs\Neural Net\lib\site-packages\torch\utils\data\dataloader.py in _get_data(self)
939 else:
940 while True:
–> 941 success, data = self._try_get_data()
942 if success:
943 return data

~\anaconda3\envs\Neural Net\lib\site-packages\torch\utils\data\dataloader.py in _try_get_data(self, timeout)
790 if len(failed_workers) > 0:
791 pids_str = ', '.join(str(w.pid) for w in failed_workers)
–> 792 raise RuntimeError(‘DataLoader worker (pid(s) {}) exited unexpectedly’.format(pids_str))
793 if isinstance(e, queue.Empty):
794 return (False, None)

RuntimeError: DataLoader worker (pid(s) 12456, 4440, 3268, 448) exited unexpectedly

I am not running on GPU as I havent really got how to tell jupiter notebook to do that.
Thanks!!!
Luigi

hacklavya · August 29, 2020, 1:56pm

I am also getting the same error, this error will not occur on colab(I tried there). I am also looking how to change n_worked=0, because that is the solution of this problem I think.

muellerzr · August 29, 2020, 2:14pm

Pass n_workers=0 when building your DataLoaders to fix this. (Essentially we stop multi-processing)

hacklavya · August 29, 2020, 2:30pm

Thanks its
num_workers=0
instead
n_workers=0

Luigi8735 · August 29, 2020, 2:51pm

Hi all, thanks everybody for the answers. However before posting here I checked:

doc(ImageDataLoaders.from_name_func())

and I found no argument like num_workers to be passed to that function.
But I will try that
Thanks

cogsci2 · August 30, 2020, 12:36pm

I had the same issue

num_workers=0

fixed it. Thank you.

Nachy · September 8, 2020, 8:08pm

Hi, i am having a similar issue. However, my issue arises after the fine_tune, on the ClassificationInterpretation step. I tried the num_workers fix which allowed me to fine_tune, however i get the same error on the next step, as depicted here:

https://gyazo.com/8c05694c0278ea7be138e36cef56cdd3

The odd thing is that it worked yesterday while stepping through it. Today, it no longer works.
I am running it locally on a windows machine, but as stated, it worked yesterday. Does anyone have any suggestions as to why im suddenly getting this error on this step?