Hi. The following happens on Windows, but not on Linux.
After a successful training loop (like 1st lesson), I try to predict either on a custom PIL image or on an element provided by the dls
, and it fails (below the stack trace), whereas if I convert the image to pytorch tensor and predict with learn.model()
, it succeeds.
Stack trace:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\learner.py in _do_epoch_validate(self, ds_idx, dl)
173 self.dl = dl; self('begin_validate')
--> 174 with torch.no_grad(): self.all_batches()
175 except CancelValidException: self('after_cancel_validate')
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\learner.py in all_batches(self)
141 self.n_iter = len(self.dl)
--> 142 for o in enumerate(self.dl): self.one_batch(*o)
143
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\data\load.py in __iter__(self)
96 self.before_iter()
---> 97 for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
98 if self.device is not None: b = to_device(b, self.device)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\torch\utils\data\dataloader.py in __init__(self, loader)
718 # AssertionError: can only join a started process.
--> 719 w.start()
720 self._index_queues.append(index_queue)
c:\users\poko\anaconda3\envs\f2\lib\multiprocessing\process.py in start(self)
111 _cleanup()
--> 112 self._popen = self._Popen(self)
113 self._sentinel = self._popen.sentinel
c:\users\poko\anaconda3\envs\f2\lib\multiprocessing\context.py in _Popen(process_obj)
222 def _Popen(process_obj):
--> 223 return _default_context.get_context().Process._Popen(process_obj)
224
c:\users\poko\anaconda3\envs\f2\lib\multiprocessing\context.py in _Popen(process_obj)
321 from .popen_spawn_win32 import Popen
--> 322 return Popen(process_obj)
323
c:\users\poko\anaconda3\envs\f2\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj)
88 reduction.dump(prep_data, to_child)
---> 89 reduction.dump(process_obj, to_child)
90 finally:
c:\users\poko\anaconda3\envs\f2\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
59 '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60 ForkingPickler(file, protocol).dump(obj)
61
c:\users\poko\anaconda3\envs\f2\lib\site-packages\torch\multiprocessing\reductions.py in reduce_tensor(tensor)
241 event_handle,
--> 242 event_sync_required) = storage._share_cuda_()
243 tensor_offset = tensor.storage_offset()
RuntimeError: cuda runtime error (801) : operation not supported at C:\w\1\s\tmp_conda_3.7_100118\conda\conda-bld\pytorch_1579082551706\work\torch/csrc/generic/StorageSharing.cpp:245
During handling of the above exception, another exception occurred:
IndexError Traceback (most recent call last)
<ipython-input-95-3b6ef5ec6f96> in <module>
----> 1 learn.predict(img)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\learner.py in predict(self, item, rm_type_tfms, with_input)
228 def predict(self, item, rm_type_tfms=None, with_input=False):
229 dl = self.dls.test_dl([item], rm_type_tfms=rm_type_tfms)
--> 230 inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
231 dec = self.dls.decode_batch((*tuplify(inp),*tuplify(dec_preds)))[0]
232 i = getattr(self.dls, 'n_inp', -1)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, **kwargs)
216 for mgr in ctx_mgrs: stack.enter_context(mgr)
217 self(event.begin_epoch if inner else _before_epoch)
--> 218 self._do_epoch_validate(dl=dl)
219 self(event.after_epoch if inner else _after_epoch)
220 if act is None: act = getattr(self.loss_func, 'activation', noop)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\learner.py in _do_epoch_validate(self, ds_idx, dl)
175 except CancelValidException: self('after_cancel_validate')
176 finally:
--> 177 dl,*_ = change_attrs(dl, names, old, has); self('after_validate')
178
179 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\learner.py in __call__(self, event_name)
121 def ordered_cbs(self, cb_func): return [cb for cb in sort_by_run(self.cbs) if hasattr(cb, cb_func)]
122
--> 123 def __call__(self, event_name): L(event_name).map(self._call_one)
124 def _call_one(self, event_name):
125 assert hasattr(event, event_name)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastcore\foundation.py in map(self, f, *args, **kwargs)
360 else f.format if isinstance(f,str)
361 else f.__getitem__)
--> 362 return self._new(map(g, self))
363
364 def filter(self, f, negate=False, **kwargs):
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastcore\foundation.py in _new(self, items, *args, **kwargs)
313 @property
314 def _xtra(self): return None
--> 315 def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
316 def __getitem__(self, idx): return self._get(idx) if is_indexer(idx) else L(self._get(idx), use_list=None)
317 def copy(self): return self._new(self.items.copy())
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastcore\foundation.py in __call__(cls, x, *args, **kwargs)
39 return x
40
---> 41 res = super().__call__(*((x,) + args), **kwargs)
42 res._newchk = 0
43 return res
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastcore\foundation.py in __init__(self, items, use_list, match, *rest)
304 if items is None: items = []
305 if (use_list is not None) or not _is_array(items):
--> 306 items = list(items) if use_list else _listify(items)
307 if match is not None:
308 if is_coll(match): match = len(match)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastcore\foundation.py in _listify(o)
240 if isinstance(o, list): return o
241 if isinstance(o, str) or _is_array(o): return [o]
--> 242 if is_iter(o): return list(o)
243 return [o]
244
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastcore\foundation.py in __call__(self, *args, **kwargs)
206 if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
207 fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 208 return self.fn(*fargs, **kwargs)
209
210 # Cell
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\learner.py in _call_one(self, event_name)
124 def _call_one(self, event_name):
125 assert hasattr(event, event_name)
--> 126 [cb(event_name) for cb in sort_by_run(self.cbs)]
127
128 def _bn_bias_state(self, with_bias): return bn_bias_params(self.model, with_bias).map(self.opt.state)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\learner.py in <listcomp>(.0)
124 def _call_one(self, event_name):
125 assert hasattr(event, event_name)
--> 126 [cb(event_name) for cb in sort_by_run(self.cbs)]
127
128 def _bn_bias_state(self, with_bias): return bn_bias_params(self.model, with_bias).map(self.opt.state)
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\callback\core.py in __call__(self, event_name)
21 _run = (event_name not in _inner_loop or (self.run_train and getattr(self, 'training', True)) or
22 (self.run_valid and not getattr(self, 'training', False)))
---> 23 if self.run and _run: getattr(self, event_name, noop)()
24 if event_name=='after_fit': self.run=True #Reset self.run to True at each end of fit
25
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\callback\core.py in after_validate(self)
92 def after_validate(self):
93 "Concatenate all recorded tensors"
---> 94 if self.with_input: self.inputs = detuplify(to_concat(self.inputs, dim=self.concat_dim))
95 if not self.save_preds: self.preds = detuplify(to_concat(self.preds, dim=self.concat_dim))
96 if not self.save_targs: self.targets = detuplify(to_concat(self.targets, dim=self.concat_dim))
c:\users\poko\anaconda3\envs\f2\lib\site-packages\fastai2\torch_core.py in to_concat(xs, dim)
211 def to_concat(xs, dim=0):
212 "Concat the element in `xs` (recursively if they are tuples/lists of tensors)"
--> 213 if is_listy(xs[0]): return type(xs[0])([to_concat([x[i] for x in xs], dim=dim) for i in range_of(xs[0])])
214 if isinstance(xs[0],dict): return {k: to_concat([x[k] for x in xs], dim=dim) for k in xs[0].keys()}
215 #We may receives xs that are not concatenatable (inputs of a text classifier for instance),
IndexError: list index out of range
pdb
says xs
is an empty list at the moment the error is thrown out… It seems Windows does mess with something, but I cannot really figure out what…