Hey there!
I come to share with you an issue I’m facing. Maybe some of you are in the same situation and I hope we could find an answer
As a beginner I’m practicing fastai.vision on “Urban Sound Classification” by converting audio files to spectograms. I faced a first Cuda Runtime Error when creating the ImageDataLoaders and the solution was using num_workers=0. Everything went well with the training and now I want to do some predictions with my test set. For now I just tried to predict a single image and here’s what I got
img = PILImage.create('../Urban Sound Classification/Data Sources/test_spectrogram/416.png')
img.resize((300,300))
learn.predict(TensorImage(image2tensor(img))) #not sure if it's the correct way to pass an image
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-17-fb76c5c3289b> in <module>
----> 1 learn.predict(TensorImage(image2tensor(img)))
~\miniconda3\envs\tensorflow\lib\site-packages\fastai\learner.py in predict(self, item, rm_type_tfms, with_input)
246 def predict(self, item, rm_type_tfms=None, with_input=False):
247 dl = self.dls.test_dl([item], rm_type_tfms=rm_type_tfms, num_workers=0)
--> 248 inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
249 i = getattr(self.dls, 'n_inp', -1)
250 inp = (inp,) if i==1 else tuplify(inp)
~\miniconda3\envs\tensorflow\lib\site-packages\fastai\learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, reorder, cbs, n_workers, **kwargs)
233 if with_loss: ctx_mgrs.append(self.loss_not_reduced())
234 with ContextManagers(ctx_mgrs):
--> 235 self._do_epoch_validate(dl=dl)
236 if act is None: act = getattr(self.loss_func, 'activation', noop)
237 res = cb.all_tensors()
~\miniconda3\envs\tensorflow\lib\site-packages\fastai\learner.py in _do_epoch_validate(self, ds_idx, dl)
186 if dl is None: dl = self.dls[ds_idx]
187 self.dl = dl
--> 188 with torch.no_grad(): self._with_events(self.all_batches, 'validate', CancelValidException)
189
190 def _do_epoch(self):
~\miniconda3\envs\tensorflow\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
153
154 def _with_events(self, f, event_type, ex, final=noop):
--> 155 try: self(f'before_{event_type}') ;f()
156 except ex: self(f'after_cancel_{event_type}')
157 finally: self(f'after_{event_type}') ;final()
~\miniconda3\envs\tensorflow\lib\site-packages\fastai\learner.py in all_batches(self)
159 def all_batches(self):
160 self.n_iter = len(self.dl)
--> 161 for o in enumerate(self.dl): self.one_batch(*o)
162
163 def _do_one_batch(self):
~\miniconda3\envs\tensorflow\lib\site-packages\fastai\data\load.py in __iter__(self)
101 self.randomize()
102 self.before_iter()
--> 103 for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
104 if self.device is not None: b = to_device(b, self.device)
105 yield self.after_batch(b)
~\miniconda3\envs\tensorflow\lib\site-packages\torch\utils\data\dataloader.py in __init__(self, loader)
735 # before it starts, and __del__ tries to join but will get:
736 # AssertionError: can only join a started process.
--> 737 w.start()
738 self._index_queues.append(index_queue)
739 self._workers.append(w)
~\miniconda3\envs\tensorflow\lib\multiprocessing\process.py in start(self)
110 'daemonic processes are not allowed to have children'
111 _cleanup()
--> 112 self._popen = self._Popen(self)
113 self._sentinel = self._popen.sentinel
114 # Avoid a refcycle if the target function holds an indirect
~\miniconda3\envs\tensorflow\lib\multiprocessing\context.py in _Popen(process_obj)
221 @staticmethod
222 def _Popen(process_obj):
--> 223 return _default_context.get_context().Process._Popen(process_obj)
224
225 class DefaultContext(BaseContext):
~\miniconda3\envs\tensorflow\lib\multiprocessing\context.py in _Popen(process_obj)
320 def _Popen(process_obj):
321 from .popen_spawn_win32 import Popen
--> 322 return Popen(process_obj)
323
324 class SpawnContext(BaseContext):
~\miniconda3\envs\tensorflow\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj)
87 try:
88 reduction.dump(prep_data, to_child)
---> 89 reduction.dump(process_obj, to_child)
90 finally:
91 set_spawning_popen(None)
~\miniconda3\envs\tensorflow\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
58 def dump(obj, file, protocol=None):
59 '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60 ForkingPickler(file, protocol).dump(obj)
61
62 #
~\miniconda3\envs\tensorflow\lib\site-packages\torch\multiprocessing\reductions.py in reduce_tensor(tensor)
238 ref_counter_offset,
239 event_handle,
--> 240 event_sync_required) = storage._share_cuda_()
241 tensor_offset = tensor.storage_offset()
242 shared_cache[handle] = StorageWeakRef(storage)
RuntimeError: cuda runtime error (801) : operation not supported at ..\torch/csrc/generic/StorageSharing.cpp:247
Full project on GitHub is here and the error is at the bottom if it can help: https://github.com/loicrouillermonay/Machine-Learning-Projects/blob/urban-sound-classification/Urban%20Sound%20Classification/Urban%20Sound%20Classification.ipynb