I am just so confused. I have made my example follow so closely the only thing they differ on now is that they are loading different images. But I can now make the CAMVID example error out.
Can anyone replicate the example below? You only need to change the CAMVID path to fit your system.
import torchvision as tv
import torch; torch.__version__, torch.__file__
from src.dataset_builder import load_dataset_from_address
from utils.misc_utils import load_h5py
from PIL import Image
import numpy as np
import matplotlib.pylab as plt
# %pylab inline
from fastai2.basics import *
from fastai2.callback.all import *
from fastai2.vision.all import *
def seg_accuracy(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)==target).float().mean()
path = '/home/jakub/.fastai/data/camvid/images/'
dls = SegmentationDataLoaders.from_label_func(path, bs=1,
fnames = get_image_files(path),
item_tfms=RandomResizedCrop(256),
# label_func = lambda o: str(o).replace(
# '_standard_','_coco_').replace('standard','label').replace('jpg','png'),
label_func = lambda o: str(o).replace('images','labels').replace('.png','_P.png'),
codes = np.loadtxt('/home/jakub/.fastai/data/camvid/codes.txt', dtype=str),
batch_tfms=[*aug_transforms(size=(360,480)), Normalize.from_stats(*imagenet_stats)])
# +
# codes = np.loadtxt('st_codes.txt', dtype=str)
codes = np.loadtxt('/home/jakub/.fastai/data/camvid/codes.txt', dtype=str)
dls.vocab = codes
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']
def acc_camvid(input, target):
target = target.squeeze(1)
mask = target != void_code
return (input.argmax(dim=1)[mask]==target[mask]).float().mean()
# -
dls.show_batch(max_n=2, rows=1, vmin=1, vmax=30, figsize=(20, 7))
# +
opt_func = partial(Adam, lr=3e-3, wd=0.01)#, eps=1e-8)
learn = unet_learner(dls, resnet34, loss_func=CrossEntropyLossFlat(axis=1), opt_func=opt_func, path=path,
metrics=acc_camvid, n_out=6,
config = unet_config(norm_type=None, self_attention=True), wd_bn_bias=True)
# +
# lrnr = unet_learner(dls, resnet50, config=unet_config(self_attention=True),
# n_out=6, loss_func=seg_accuracy)
# -
learn.fit_one_cycle(10,3e-4)
This yields the following:
epoch train_loss valid_loss acc_camvid time
0 0.000000 00:02
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in one_batch(self, i, b)
251 if not self.training: return
--> 252 self.loss.backward(); self('after_backward')
253 self.opt.step(); self('after_step')
~/daisy-gan/venv/lib/python3.6/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
149 """
--> 150 torch.autograd.backward(self, gradient, retain_graph, create_graph)
151
~/daisy-gan/venv/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
98 tensors, grad_tensors, retain_graph, create_graph,
---> 99 allow_unreachable=True) # allow_unreachable flag
100
RuntimeError: cuda runtime error (710) : device-side assert triggered at /pytorch/aten/src/THC/generic/THCTensorMath.cu:26
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
<ipython-input-12-474116ee9487> in <module>
----> 1 learn.fit_one_cycle(10,3e-4)
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
88 scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
89 'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
---> 90 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
91
92 # Cell
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
287 try:
288 self.epoch=epoch; self('begin_epoch')
--> 289 self._do_epoch_train()
290 self._do_epoch_validate()
291 except CancelEpochException: self('after_cancel_epoch')
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in _do_epoch_train(self)
262 try:
263 self.dl = self.dls.train; self('begin_train')
--> 264 self.all_batches()
265 except CancelTrainException: self('after_cancel_train')
266 finally: self('after_train')
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in all_batches(self)
240 def all_batches(self):
241 self.n_iter = len(self.dl)
--> 242 for o in enumerate(self.dl): self.one_batch(*o)
243
244 def one_batch(self, i, b):
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in one_batch(self, i, b)
254 self.opt.zero_grad()
255 except CancelBatchException: self('after_cancel_batch')
--> 256 finally: self('after_batch')
257
258 def _do_begin_fit(self, n_epoch):
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in __call__(self, event_name)
221 def ordered_cbs(self, cb_func:str): return [cb for cb in sort_by_run(self.cbs) if hasattr(cb, cb_func)]
222
--> 223 def __call__(self, event_name): L(event_name).map(self._call_one)
224 def _call_one(self, event_name):
225 assert hasattr(event, event_name)
~/daisy-gan/venv/lib/python3.6/site-packages/fastcore/foundation.py in map(self, f, *args, **kwargs)
360 else f.format if isinstance(f,str)
361 else f.__getitem__)
--> 362 return self._new(map(g, self))
363
364 def filter(self, f, negate=False, **kwargs):
~/daisy-gan/venv/lib/python3.6/site-packages/fastcore/foundation.py in _new(self, items, *args, **kwargs)
313 @property
314 def _xtra(self): return None
--> 315 def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
316 def __getitem__(self, idx): return self._get(idx) if is_indexer(idx) else L(self._get(idx), use_list=None)
317 def copy(self): return self._new(self.items.copy())
~/daisy-gan/venv/lib/python3.6/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
39 return x
40
---> 41 res = super().__call__(*((x,) + args), **kwargs)
42 res._newchk = 0
43 return res
~/daisy-gan/venv/lib/python3.6/site-packages/fastcore/foundation.py in __init__(self, items, use_list, match, *rest)
304 if items is None: items = []
305 if (use_list is not None) or not _is_array(items):
--> 306 items = list(items) if use_list else _listify(items)
307 if match is not None:
308 if is_coll(match): match = len(match)
~/daisy-gan/venv/lib/python3.6/site-packages/fastcore/foundation.py in _listify(o)
240 if isinstance(o, list): return o
241 if isinstance(o, str) or _is_array(o): return [o]
--> 242 if is_iter(o): return list(o)
243 return [o]
244
~/daisy-gan/venv/lib/python3.6/site-packages/fastcore/foundation.py in __call__(self, *args, **kwargs)
206 if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
207 fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 208 return self.fn(*fargs, **kwargs)
209
210 # Cell
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in _call_one(self, event_name)
224 def _call_one(self, event_name):
225 assert hasattr(event, event_name)
--> 226 [cb(event_name) for cb in sort_by_run(self.cbs)]
227
228 def _bn_bias_state(self, with_bias): return bn_bias_params(self.model, with_bias).map(self.opt.state)
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in <listcomp>(.0)
224 def _call_one(self, event_name):
225 assert hasattr(event, event_name)
--> 226 [cb(event_name) for cb in sort_by_run(self.cbs)]
227
228 def _bn_bias_state(self, with_bias): return bn_bias_params(self.model, with_bias).map(self.opt.state)
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in __call__(self, event_name)
23 _run = (event_name not in _inner_loop or (self.run_train and getattr(self, 'training', True)) or
24 (self.run_valid and not getattr(self, 'training', False)))
---> 25 if self.run and _run: getattr(self, event_name, noop)()
26
27 @property
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in after_batch(self)
495 if len(self.yb) == 0: return
496 mets = self._train_mets if self.training else self._valid_mets
--> 497 for met in mets: met.accumulate(self.learn)
498 if not self.training: return
499 self.lrs.append(self.opt.hypers[-1]['lr'])
~/daisy-gan/venv/lib/python3.6/site-packages/fastai2/learner.py in accumulate(self, learn)
458 def accumulate(self, learn):
459 self.count += 1
--> 460 self.val = torch.lerp(to_detach(learn.loss.mean(), gather=False), self.val, self.beta)
461 @property
462 def value(self): return self.val/(1-self.beta**self.count)
RuntimeError: CUDA error: device-side assert triggered