Help: Multitask learning with data block API and AWD_LSTM

limjiayi · August 5, 2020, 7:13am

Hi, I’ve been trying for about a week now to do multitask learning on a text dataset. The final outputs are 2 categorical labels.

This is my data block, which works fine - dls.one_batch() and dls.show_batch() return the expected output.

dblock = DataBlock(
    blocks=(TextBlock.from_df('nice_text', seq_len=64), CategoryBlock, CategoryBlock),
    getters=[
        ColReader(6), # 'nice_text' doesn't work
        ColReader('issue'),
        ColReader('outcome'),
    ],
    splitter=ColSplitter(),
    n_inp=1,
)    
dls = dblock.dataloaders(df, bs=4)

I’m modifying the AWD_LSTM model by adding a multihead classifier on top of it:

class Head(Module):
    def __init__(self, nc):
        self.fc = nn.Sequential(
            nn.Linear(768, 384),
            nn.ReLU(),
            nn.Linear(384, nc),
        )

    def forward(self, x):
        return self.fc(x)

class ModifiedModel(Module):
    def __init__(self, arch, n=dls.c):
        self.body = arch
        self.classifier = nn.ModuleList([Head(nc) for nc in n])
        
    def forward(self, x):    
        x = self.body(x)
        return [f(x) for f in self.classifier]

I also have a custom loss function that combines the cross-entropy losses:

class LossCombined(Module):
    def __init__(self, func=F.cross_entropy, weights=[2, 1]):
        self.func, self.w = func, weights

    def forward(self, xs, *ys):
        for i, w, x, y in zip(range(len(xs)), self.w, xs, ys):
            if i == 0: loss = w*self.func(x, y) 
            else: loss += w*self.func(x, y) 
        return loss

The above is based on this notebook: https://www.kaggle.com/mnpinto/bengali-ai-fastai2-starter-lb0-9598, which does multitask learning using fastai2 but on images.

awd_lstm = AWD_LSTM(len(dls.vocab[0]), 20, 10, 2)
model = ModifiedModel(awd_lstm).cuda()
learn = Learner(dls, model, loss_func=LossCombined())
learn.lr_find()

This last line where I attempt to find a good learning rate, errors out because apparently my input tensor is on the GPU but the hidden tensors are not, even though I used .cuda() on the model.

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-29-d81c6bd29d71> in <module>
----> 1 learn.lr_find()

/opt/conda/lib/python3.7/site-packages/fastai2/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
    226     n_epoch = num_it//len(self.dls.train) + 1
    227     cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 228     with self.no_logging(): self.fit(n_epoch, cbs=cb)
    229     if show_plot: self.recorder.plot_lr_find()
    230     if suggestions:

/opt/conda/lib/python3.7/site-packages/fastcore/utils.py in _f(*args, **kwargs)
    429         init_args.update(log)
    430         setattr(inst, 'init_args', init_args)
--> 431         return inst if to_return else f(*args, **kwargs)
    432     return _f
    433 

/opt/conda/lib/python3.7/site-packages/fastai2/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    201                     try:
    202                         self.epoch=epoch;          self('begin_epoch')
--> 203                         self._do_epoch_train()
    204                         self._do_epoch_validate()
    205                     except CancelEpochException:   self('after_cancel_epoch')

/opt/conda/lib/python3.7/site-packages/fastai2/learner.py in _do_epoch_train(self)
    173         try:
    174             self.dl = self.dls.train;                        self('begin_train')
--> 175             self.all_batches()
    176         except CancelTrainException:                         self('after_cancel_train')
    177         finally:                                             self('after_train')

/opt/conda/lib/python3.7/site-packages/fastai2/learner.py in all_batches(self)
    151     def all_batches(self):
    152         self.n_iter = len(self.dl)
--> 153         for o in enumerate(self.dl): self.one_batch(*o)
    154 
    155     def one_batch(self, i, b):

/opt/conda/lib/python3.7/site-packages/fastai2/learner.py in one_batch(self, i, b)
    157         try:
    158             self._split(b);                                  self('begin_batch')
--> 159             self.pred = self.model(*self.xb);                self('after_pred')
    160             if len(self.yb) == 0: return
    161             self.loss = self.loss_func(self.pred, *self.yb); self('after_loss')

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

<ipython-input-12-c1fc47959251> in forward(self, x)
      5 
      6     def forward(self, x):
----> 7         x = self.body(x)
      8         return [f(x) for f in self.heads]

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

/opt/conda/lib/python3.7/site-packages/fastai2/text/models/awdlstm.py in forward(self, inp, from_embeds)
    109         new_hidden = []
    110         for l, (rnn,hid_dp) in enumerate(zip(self.rnns, self.hidden_dps)):
--> 111             output, new_h = rnn(output, self.hidden[l])
    112             new_hidden.append(new_h)
    113             if l != self.n_layers - 1: output = hid_dp(output)

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

/opt/conda/lib/python3.7/site-packages/fastai2/text/models/awdlstm.py in forward(self, *args)
     55             #To avoid the warning that comes because the weights aren't flattened.
     56             warnings.simplefilter("ignore")
---> 57             return self.module.forward(*args)
     58 
     59     def reset(self):

/opt/conda/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
    575         if batch_sizes is None:
    576             result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
--> 577                               self.dropout, self.training, self.bidirectional, self.batch_first)
    578         else:
    579             result = _VF.lstm(input, batch_sizes, hx, self._flat_weights, self.bias,

RuntimeError: Input and hidden tensors are not at the same device, found input tensor at cuda:0 and hidden tensor at cpu

Any ideas?