Gradient Blending (for multi-modal models) - In Progress

Joan · November 24, 2020, 8:56am

An update on this. I am not quite sure if this should be the proper place to comment this. If you think is necessary I will open another topis.

I am trying to reformulate the whole pipeline to expect multicategory inputs. As @morgan did, I tried first to train the single models with both class imbalance weights and multicategory.

The thing is, I can train the visual model without an issue with something like:

get_x = lambda x:path/f'{x[0]}'
#get_y=ColReader('Label')
def get_y(r): return r[1].split(' ') # This is te column where the labels are (4 of them) 
batch_tfms = aug_transforms(flip_vert=False,do_flip=False, max_lighting=0.1, max_zoom=1.05, max_warp=0.)
#blocks = (ImageBlock(cls=PILDicom), CategoryBlock(vocab=[0,1]))

im_db_m = DataBlock(blocks    = (ImageBlock, MultiCategoryBlock),
                   get_x = get_x,
                   get_y =get_y,
                   #get_y     = ColReader('Label'),
                   #item_tfms = RandomResizedCrop(128, min_scale=0.35), 
                   #batch_tfms=Normalize.from_stats(*imagenet_stats)
                   splitter = splitter,
                   item_tfms = Resize(512),
                   batch_tfms = batch_tfms
                  )

vis_dl_m = im_db.dataloaders(train_df_ready, bs=8)

vis_learn_m = cnn_learner(vis_dl_m, resnet34, metrics=accuracy_multi, pretrained=True)

vis_dl_m.loss_func = BCEWithLogitsLossFlat(weight=class_weights)

being class_weights = tensor([11.3539, 1.0000, 5.8010, 5.1732], device='cuda')

Here I have to mention that I have a dataset with 4 single classes but I would like to train the model to expect merged labels in the future, that’s the reason for the multicategory. If you think a better approach should be performed, please, tell me and stop reading

So, fot the tabular_learner I have issues. I hot encoded the variables as explained here so I have a dataset with 4 more columns with my labels and True/False. If I try to train like:

y_names=['Label1', 'Label2', 'Label3', 'Label4']
to = TabularPandas(df_multi, procs, cat_names, cont_names,
                                 y_names=y_names, 
                                 y_block=MultiCategoryBlock(encoded=True, vocab=y_names), 
                                 splits=splits)
tab_dl_m = to.dataloaders(bs=8)
tab_learn_m = tabular_learner(tab_dl_m, metrics=accuracy_multi)
tab_learn_m.loss_func = BCEWithLogitsLossFlat(weight=class_weights)
tab_learn_m.fit_one_cycle(3)

A dimension error occurs:

epoch 	train_loss 	valid_loss 	accuracy_multi 	time
0 	0.000000 	00:00

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-277-14422a88807c> in <module>
----> 1 tab_learn_m.fit_one_cycle(3)

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastcore/utils.py in _f(*args, **kwargs)
    452         init_args.update(log)
    453         setattr(inst, 'init_args', init_args)
--> 454         return inst if to_return else f(*args, **kwargs)
    455     return _f
    456 

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    111     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    112               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 113     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    114 
    115 # Cell

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastcore/utils.py in _f(*args, **kwargs)
    452         init_args.update(log)
    453         setattr(inst, 'init_args', init_args)
--> 454         return inst if to_return else f(*args, **kwargs)
    455     return _f
    456 

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    202             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    203             self.n_epoch,self.loss = n_epoch,tensor(0.)
--> 204             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    205 
    206     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
    192         for epoch in range(self.n_epoch):
    193             self.epoch=epoch
--> 194             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    195 
    196     @log_args(but='cbs')

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
    186 
    187     def _do_epoch(self):
--> 188         self._do_epoch_train()
    189         self._do_epoch_validate()
    190 

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
    178     def _do_epoch_train(self):
    179         self.dl = self.dls.train
--> 180         self._with_events(self.all_batches, 'train', CancelTrainException)
    181 
    182     def _do_epoch_validate(self, ds_idx=1, dl=None):

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    162 
    163     def _do_one_batch(self):

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
    174         self.iter = i
    175         self._split(b)
--> 176         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    177 
    178     def _do_epoch_train(self):

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
    164         self.pred = self.model(*self.xb);                self('after_pred')
    165         if len(self.yb) == 0: return
--> 166         self.loss = self.loss_func(self.pred, *self.yb); self('after_loss')
    167         if not self.training: return
    168         self('before_backward')

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/layers.py in __call__(self, inp, targ, **kwargs)
    295         if targ.dtype in [torch.int8, torch.int16, torch.int32]: targ = targ.long()
    296         if self.flatten: inp = inp.view(-1,inp.shape[-1]) if self.is_2d else inp.view(-1)
--> 297         return self.func.__call__(inp, targ.view(-1) if self.flatten else targ, **kwargs)
    298 
    299 # Cell

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    626 
    627     def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 628         return F.binary_cross_entropy_with_logits(input, target,
    629                                                   self.weight,
    630                                                   pos_weight=self.pos_weight,

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/torch/nn/functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
   2538         raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
   2539 
-> 2540     return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
   2541 
   2542 

RuntimeError: The size of tensor a (32) must match the size of tensor b (4) at non-singleton dimension 0

So, somehow tabular learner loss expects a 32 weights tensor. Tried to add a 32 tensor in the class weights filling the rest of values with 0 or with 1 but both options give me a similar dimension error. However this error appears at the end of the epoch:

epoch 	train_loss 	valid_loss 	accuracy_multi 	time
0 	0.586030 	0.579553 	0.861255 	00:15

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-288-14422a88807c> in <module>
----> 1 tab_learn_m.fit_one_cycle(3)

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastcore/utils.py in _f(*args, **kwargs)
    452         init_args.update(log)
    453         setattr(inst, 'init_args', init_args)
--> 454         return inst if to_return else f(*args, **kwargs)
    455     return _f
    456 

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    111     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    112               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 113     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    114 
    115 # Cell

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastcore/utils.py in _f(*args, **kwargs)
    452         init_args.update(log)
    453         setattr(inst, 'init_args', init_args)
--> 454         return inst if to_return else f(*args, **kwargs)
    455     return _f
    456 

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    202             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    203             self.n_epoch,self.loss = n_epoch,tensor(0.)
--> 204             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    205 
    206     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
    192         for epoch in range(self.n_epoch):
    193             self.epoch=epoch
--> 194             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    195 
    196     @log_args(but='cbs')

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
    187     def _do_epoch(self):
    188         self._do_epoch_train()
--> 189         self._do_epoch_validate()
    190 
    191     def _do_fit(self):

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_validate(self, ds_idx, dl)
    183         if dl is None: dl = self.dls[ds_idx]
    184         self.dl = dl;
--> 185         with torch.no_grad(): self._with_events(self.all_batches, 'validate', CancelValidException)
    186 
    187     def _do_epoch(self):

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    162 
    163     def _do_one_batch(self):

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
    174         self.iter = i
    175         self._split(b)
--> 176         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    177 
    178     def _do_epoch_train(self):

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
    164         self.pred = self.model(*self.xb);                self('after_pred')
    165         if len(self.yb) == 0: return
--> 166         self.loss = self.loss_func(self.pred, *self.yb); self('after_loss')
    167         if not self.training: return
    168         self('before_backward')

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastai/layers.py in __call__(self, inp, targ, **kwargs)
    295         if targ.dtype in [torch.int8, torch.int16, torch.int32]: targ = targ.long()
    296         if self.flatten: inp = inp.view(-1,inp.shape[-1]) if self.is_2d else inp.view(-1)
--> 297         return self.func.__call__(inp, targ.view(-1) if self.flatten else targ, **kwargs)
    298 
    299 # Cell

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    626 
    627     def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 628         return F.binary_cross_entropy_with_logits(input, target,
    629                                                   self.weight,
    630                                                   pos_weight=self.pos_weight,

~/anaconda3/envs/fastai2/lib/python3.8/site-packages/torch/nn/functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
   2538         raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
   2539 
-> 2540     return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
   2541 
   2542 

RuntimeError: The size of tensor a (4) must match the size of tensor b (32) at non-singleton dimension 0

Sadly, multi_model expect the same as tabular_learner so I am kind of stuck. Any ideas why this is happening?