Using fastai v1.0.20:
How should a learner be created for segmentation? I am currently attempting segmentation for this challenge: https://www.kaggle.com/c/tgs-salt-identification-challenge
Right now I have separated the images and masks both into separate png files.
Following the examples in camvid I attempted to create a learner with:
data = (ImageFileList.from_folder(path_trn)
.label_from_func(get_y_fn)
.random_split_by_pct()
.datasets(SegmentationDataset, classes=codes)
.transform(get_transforms(), size=96, tfm_y=True)
.databunch(bs=64))
learn = Learner.create_unet(data, models.resnet34)
However, when doing a learn.fit, I get a cuda error:
RuntimeError Traceback (most recent call last)
<ipython-input-24-bd510e07cfec> in <module>
1 CUDA_LAUNCH_BLOCKING=1
----> 2 learn.fit(1)
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
160 callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
161 fit(epochs, self.model, self.loss_func, opt=self.opt, data=self.data, metrics=self.metrics,
--> 162 callbacks=self.callbacks+callbacks)
163
164 def create_opt(self, lr:Floats, wd:Floats=0.)->None:
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
92 except Exception as e:
93 exception = e
---> 94 raise e
95 finally: cb_handler.on_train_end(exception)
96
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
80 cb_handler.on_epoch_begin()
81
---> 82 for xb,yb in progress_bar(data.train_dl, parent=pbar):
83 xb, yb = cb_handler.on_batch_begin(xb, yb)
84 loss = loss_batch(model, xb, yb, loss_func, opt, cb_handler)
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastprogress/fastprogress.py in __iter__(self)
63 self.update(0)
64 try:
---> 65 for i,o in enumerate(self._gen):
66 yield o
67 if self.auto_update: self.update(i+1)
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastai/basic_data.py in __iter__(self)
85 y = b[1][0] if is_listy(b[1]) else b[1]
86 if not self.skip_size1 or y.size(0) != 1:
---> 87 yield self.proc_batch(b)
88
89 def one_batch(self)->Collection[Tensor]:
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastai/basic_data.py in proc_batch(self, b)
76 def proc_batch(self,b:Tensor)->Tensor:
77 "Proces batch `b` of `TensorImage`."
---> 78 b = to_device(b, self.device)
79 for f in listify(self.tfms): b = f(b)
80 return b
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastai/torch_core.py in to_device(b, device)
85 "Ensure `b` is on `device`."
86 device = ifnone(device, defaults.device)
---> 87 if is_listy(b): return [to_device(o, device) for o in b]
88 return b.to(device)
89
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastai/torch_core.py in <listcomp>(.0)
85 "Ensure `b` is on `device`."
86 device = ifnone(device, defaults.device)
---> 87 if is_listy(b): return [to_device(o, device) for o in b]
88 return b.to(device)
89
~/anaconda3/envs/fastai12/lib/python3.7/site-packages/fastai/torch_core.py in to_device(b, device)
86 device = ifnone(device, defaults.device)
87 if is_listy(b): return [to_device(o, device) for o in b]
---> 88 return b.to(device)
89
90 def data_collate(batch:ItemsList)->Tensor:
RuntimeError: cuda runtime error (59) : device-side assert triggered at /opt/conda/conda-bld/pytorch-nightly_1541497631545/work/aten/src/THC/generic/THCTensorCopy.cpp:20
The data seems fine. I was able to view the data properly with data.show_batch(). I was also able to create the learner fine as well, but for some reason the learner will not work when calling learn.fit(). I also am not sure how to create a proper metric for segmentation, although I would first like to fix the CUDA error before that.