RuntimeError: DataLoader worker (pid(s) 1830, 1832, 1834, 1836, 1838, 1842, 1844, 1846, 1848, 1850, 1852, 1854, 1856, 1864, 1869, 1871) exited unexpectedly

learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fine_tune(0)

From the notebook 23_vision.tutorial. I got the following error:

RuntimeError Traceback (most recent call last)
~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/torch/utils/data/dataloader.py in _try_get_data(self, timeout)
778 try:
–> 779 data = self._data_queue.get(timeout=timeout)
780 return (True, data)

~/.pyenv/versions/3.8.5/lib/python3.8/multiprocessing/queues.py in get(self, block, timeout)
106 timeout = deadline - time.monotonic()
–> 107 if not self._poll(timeout):
108 raise Empty

~/.pyenv/versions/3.8.5/lib/python3.8/multiprocessing/connection.py in poll(self, timeout)
256 self._check_readable()
–> 257 return self._poll(timeout)
258

~/.pyenv/versions/3.8.5/lib/python3.8/multiprocessing/connection.py in _poll(self, timeout)
423 def _poll(self, timeout):
–> 424 r = wait([self], timeout)
425 return bool®

~/.pyenv/versions/3.8.5/lib/python3.8/multiprocessing/connection.py in wait(object_list, timeout)
930 while True:
–> 931 ready = selector.select(timeout)
932 if ready:

~/.pyenv/versions/3.8.5/lib/python3.8/selectors.py in select(self, timeout)
414 try:
–> 415 fd_event_list = self._selector.poll(timeout)
416 except InterruptedError:

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/torch/utils/data/_utils/signal_handling.py in handler(signum, frame)
65 # Python can still get and update the process status successfully.
—> 66 _error_if_any_worker_fails()
67 if previous_handler is not None:

RuntimeError: DataLoader worker (pid 1869) exited unexpectedly with exit code 1. Details are lost due to multiprocessing. Rerunning with num_workers=0 may give better error trace.

During handling of the above exception, another exception occurred:

RuntimeError Traceback (most recent call last)
in
1 learn = cnn_learner(dls, resnet34, metrics=error_rate,)
----> 2 learn.fine_tune(0)

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastcore/utils.py in _f(*args, **kwargs)
452 init_args.update(log)
453 setattr(inst, ‘init_args’, init_args)
–> 454 return inst if to_return else f(*args, **kwargs)
455 return _f
456

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/callback/schedule.py in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
159 “Fine tune with freeze for freeze_epochs then with unfreeze from epochs using discriminative LR”
160 self.freeze()
–> 161 self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
162 base_lr /= 2
163 self.unfreeze()

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastcore/utils.py in _f(*args, **kwargs)
452 init_args.update(log)
453 setattr(inst, ‘init_args’, init_args)
–> 454 return inst if to_return else f(*args, **kwargs)
455 return _f
456

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
111 scheds = {‘lr’: combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
112 ‘mom’: combined_cos(pct_start, *(self.moms if moms is None else moms))}
–> 113 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
114
115 # Cell

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastcore/utils.py in _f(*args, **kwargs)
452 init_args.update(log)
453 setattr(inst, ‘init_args’, init_args)
–> 454 return inst if to_return else f(*args, **kwargs)
455 return _f
456

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
202 self.opt.set_hypers(lr=self.lr if lr is None else lr)
203 self.n_epoch,self.loss = n_epoch,tensor(0.)
–> 204 self._with_events(self._do_fit, ‘fit’, CancelFitException, self._end_cleanup)
205
206 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
153
154 def with_events(self, f, event_type, ex, final=noop):
–> 155 try: self(f’before
{event_type}’) ;f()
156 except ex: self(f’after_cancel
{event_type}’)
157 finally: self(f’after_{event_type}’) ;final()

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
192 for epoch in range(self.n_epoch):
193 self.epoch=epoch
–> 194 self._with_events(self._do_epoch, ‘epoch’, CancelEpochException)
195
196 @log_args(but=‘cbs’)

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
153
154 def with_events(self, f, event_type, ex, final=noop):
–> 155 try: self(f’before
{event_type}’) ;f()
156 except ex: self(f’after_cancel
{event_type}’)
157 finally: self(f’after_{event_type}’) ;final()

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
186
187 def _do_epoch(self):
–> 188 self._do_epoch_train()
189 self._do_epoch_validate()
190

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
178 def _do_epoch_train(self):
179 self.dl = self.dls.train
–> 180 self._with_events(self.all_batches, ‘train’, CancelTrainException)
181
182 def _do_epoch_validate(self, ds_idx=1, dl=None):

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
153
154 def with_events(self, f, event_type, ex, final=noop):
–> 155 try: self(f’before
{event_type}’) ;f()
156 except ex: self(f’after_cancel
{event_type}’)
157 finally: self(f’after_{event_type}’) ;final()

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
159 def all_batches(self):
160 self.n_iter = len(self.dl)
–> 161 for o in enumerate(self.dl): self.one_batch(*o)
162
163 def _do_one_batch(self):

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/fastai/data/load.py in iter (self)
96 self.randomize()
97 self.before_iter()
—> 98 for b in _loadersself.fake_l.num_workers==0:
99 if self.device is not None: b = to_device(b, self.device)
100 yield self.after_batch(b)

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/torch/utils/data/dataloader.py in next (self)
361
362 def next (self):
–> 363 data = self._next_data()
364 self._num_yielded += 1
365 if self._dataset_kind == _DatasetKind.Iterable and \

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/torch/utils/data/dataloader.py in _next_data(self)
972
973 assert not self._shutdown and self._tasks_outstanding > 0
–> 974 idx, data = self._get_data()
975 self._tasks_outstanding -= 1
976

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/torch/utils/data/dataloader.py in _get_data(self)
939 else:
940 while True:
–> 941 success, data = self._try_get_data()
942 if success:
943 return data

~/.pyenv/versions/3.8.5/lib/python3.8/site-packages/torch/utils/data/dataloader.py in _try_get_data(self, timeout)
790 if len(failed_workers) > 0:
791 pids_str = ', '.join(str(w.pid) for w in failed_workers)
–> 792 raise RuntimeError(‘DataLoader worker (pid(s) {}) exited unexpectedly’.format(pids_str))
793 if isinstance(e, queue.Empty):
794 return (False, None)

RuntimeError: DataLoader worker (pid(s) 1830, 1832, 1834, 1836, 1838, 1842, 1844, 1846, 1848, 1850, 1852, 1854, 1856, 1864, 1869, 1871) exited unexpectedly

OS: MacOS
python -v: 3.8

Also, i tried this with num_workers=0…got the same error

i got the same error ,i run in my mac machine,use anaconda env
hope the answer

learner = cnn_learner(dls,resnet34,pretrained=True,metrics=error_rate)
learner.fit_one_cycle(1)
i got the error:
AttributeError: ‘_FakeLoader’ object has no attribute ‘noops’

i resove it!
dls = ImageDataLoaders.from_name_re(path,imgs,pat,item_tfms=item_tfms,batch_tfms=batch_tfms,bs=bs,num_workers=0)
add num_workers=0

2 Likes

thank you!
It worked :slight_smile: