I’m trying to ‘debug’ my model by using CAM and code from lesson 7. The difference between that code and mine is the data loader - I get the data object from CSV like so:
def get_data(sz):
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO)
return ImageClassifierData.from_csv(PROJECTBASEPATH, 'cropped-images', CSVPATH, tfms=tfms,
val_idxs=val_idxs)
The CSV file contains multiclass tags for images. I get an error on this line:
learn.fit(1e-2, 1, cycle_len=1)
And the error is: RuntimeError: reduce failed to synchronize: device-side assert triggered
Does anyone have a clue what I’m doing wrong?
I updated PyTorch from version 0.3 to 0.4 and the same error occurs.
Here’s the full stack trace from PyTorch:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-29-126bea823c15> in <module>()
----> 1 learn.fit(1e-2, 1, cycle_len=1)
~/image-deep-learning/fastai/learner.py in fit(self, lrs, n_cycle, wds, **kwargs)
285 self.sched = None
286 layer_opt = self.get_layer_opt(lrs, wds)
--> 287 return self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)
288
289 def warm_up(self, lr, wds=None):
~/image-deep-learning/fastai/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, best_save_name, use_clr, use_clr_beta, metrics, callbacks, use_wd_sched, norm_wds, wds_sched_mult, use_swa, swa_start, swa_eval_freq, **kwargs)
232 metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16,
233 swa_model=self.swa_model if use_swa else None, swa_start=swa_start,
--> 234 swa_eval_freq=swa_eval_freq, **kwargs)
235
236 def get_layer_groups(self): return self.models.get_layer_groups()
~/image-deep-learning/fastai/model.py in fit(model, data, n_epochs, opt, crit, metrics, callbacks, stepper, swa_model, swa_start, swa_eval_freq, **kwargs)
130 batch_num += 1
131 for cb in callbacks: cb.on_batch_begin()
--> 132 loss = model_stepper.step(V(x),V(y), epoch)
133 avg_loss = avg_loss * avg_mom + loss * (1-avg_mom)
134 debias_loss = avg_loss / (1 - avg_mom**batch_num)
~/image-deep-learning/fastai/model.py in step(self, xs, y, epoch)
52 if self.fp16: self.m.zero_grad()
53 else: self.opt.zero_grad()
---> 54 loss = raw_loss = self.crit(output, y)
55 if self.loss_scale != 1: assert(self.fp16); loss = loss*self.loss_scale
56 if self.reg_fn: loss = self.reg_fn(output, xtra, raw_loss)
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce)
1481 weight = weight.expand(new_size)
1482
-> 1483 return torch._C._nn.binary_cross_entropy(input, target, weight, size_average, reduce)
1484
1485