Issues with CAM code from lesson 7 and multiclass classification from CSV

gorgiRankovski · July 11, 2018, 1:02pm

I’m trying to ‘debug’ my model by using CAM and code from lesson 7. The difference between that code and mine is the data loader - I get the data object from CSV like so:

def get_data(sz):
    tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO)
    return ImageClassifierData.from_csv(PROJECTBASEPATH, 'cropped-images', CSVPATH, tfms=tfms,
                    val_idxs=val_idxs)

The CSV file contains multiclass tags for images. I get an error on this line:

learn.fit(1e-2, 1, cycle_len=1)

And the error is: RuntimeError: reduce failed to synchronize: device-side assert triggered

Does anyone have a clue what I’m doing wrong?

I updated PyTorch from version 0.3 to 0.4 and the same error occurs.

Here’s the full stack trace from PyTorch:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-29-126bea823c15> in <module>()
----> 1 learn.fit(1e-2, 1, cycle_len=1)

~/image-deep-learning/fastai/learner.py in fit(self, lrs, n_cycle, wds, **kwargs)
    285         self.sched = None
    286         layer_opt = self.get_layer_opt(lrs, wds)
--> 287         return self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)
    288 
    289     def warm_up(self, lr, wds=None):

~/image-deep-learning/fastai/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, best_save_name, use_clr, use_clr_beta, metrics, callbacks, use_wd_sched, norm_wds, wds_sched_mult, use_swa, swa_start, swa_eval_freq, **kwargs)
    232             metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16,
    233             swa_model=self.swa_model if use_swa else None, swa_start=swa_start,
--> 234             swa_eval_freq=swa_eval_freq, **kwargs)
    235 
    236     def get_layer_groups(self): return self.models.get_layer_groups()

~/image-deep-learning/fastai/model.py in fit(model, data, n_epochs, opt, crit, metrics, callbacks, stepper, swa_model, swa_start, swa_eval_freq, **kwargs)
    130             batch_num += 1
    131             for cb in callbacks: cb.on_batch_begin()
--> 132             loss = model_stepper.step(V(x),V(y), epoch)
    133             avg_loss = avg_loss * avg_mom + loss * (1-avg_mom)
    134             debias_loss = avg_loss / (1 - avg_mom**batch_num)

~/image-deep-learning/fastai/model.py in step(self, xs, y, epoch)
     52         if self.fp16: self.m.zero_grad()
     53         else: self.opt.zero_grad()
---> 54         loss = raw_loss = self.crit(output, y)
     55         if self.loss_scale != 1: assert(self.fp16); loss = loss*self.loss_scale
     56         if self.reg_fn: loss = self.reg_fn(output, xtra, raw_loss)

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce)
   1481         weight = weight.expand(new_size)
   1482 
-> 1483     return torch._C._nn.binary_cross_entropy(input, target, weight, size_average, reduce)
   1484 
   1485