Hi, I’ve been trying for about a week now to do multitask learning on a text dataset. The final outputs are 2 categorical labels.
This is my data block, which works fine - dls.one_batch()
and dls.show_batch()
return the expected output.
dblock = DataBlock(
blocks=(TextBlock.from_df('nice_text', seq_len=64), CategoryBlock, CategoryBlock),
getters=[
ColReader(6), # 'nice_text' doesn't work
ColReader('issue'),
ColReader('outcome'),
],
splitter=ColSplitter(),
n_inp=1,
)
dls = dblock.dataloaders(df, bs=4)
I’m modifying the AWD_LSTM model by adding a multihead classifier on top of it:
class Head(Module):
def __init__(self, nc):
self.fc = nn.Sequential(
nn.Linear(768, 384),
nn.ReLU(),
nn.Linear(384, nc),
)
def forward(self, x):
return self.fc(x)
class ModifiedModel(Module):
def __init__(self, arch, n=dls.c):
self.body = arch
self.classifier = nn.ModuleList([Head(nc) for nc in n])
def forward(self, x):
x = self.body(x)
return [f(x) for f in self.classifier]
I also have a custom loss function that combines the cross-entropy losses:
class LossCombined(Module):
def __init__(self, func=F.cross_entropy, weights=[2, 1]):
self.func, self.w = func, weights
def forward(self, xs, *ys):
for i, w, x, y in zip(range(len(xs)), self.w, xs, ys):
if i == 0: loss = w*self.func(x, y)
else: loss += w*self.func(x, y)
return loss
The above is based on this notebook: https://www.kaggle.com/mnpinto/bengali-ai-fastai2-starter-lb0-9598, which does multitask learning using fastai2 but on images.
awd_lstm = AWD_LSTM(len(dls.vocab[0]), 20, 10, 2)
model = ModifiedModel(awd_lstm).cuda()
learn = Learner(dls, model, loss_func=LossCombined())
learn.lr_find()
This last line where I attempt to find a good learning rate, errors out because apparently my input tensor is on the GPU but the hidden tensors are not, even though I used .cuda()
on the model.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-29-d81c6bd29d71> in <module>
----> 1 learn.lr_find()
/opt/conda/lib/python3.7/site-packages/fastai2/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
226 n_epoch = num_it//len(self.dls.train) + 1
227 cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 228 with self.no_logging(): self.fit(n_epoch, cbs=cb)
229 if show_plot: self.recorder.plot_lr_find()
230 if suggestions:
/opt/conda/lib/python3.7/site-packages/fastcore/utils.py in _f(*args, **kwargs)
429 init_args.update(log)
430 setattr(inst, 'init_args', init_args)
--> 431 return inst if to_return else f(*args, **kwargs)
432 return _f
433
/opt/conda/lib/python3.7/site-packages/fastai2/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
201 try:
202 self.epoch=epoch; self('begin_epoch')
--> 203 self._do_epoch_train()
204 self._do_epoch_validate()
205 except CancelEpochException: self('after_cancel_epoch')
/opt/conda/lib/python3.7/site-packages/fastai2/learner.py in _do_epoch_train(self)
173 try:
174 self.dl = self.dls.train; self('begin_train')
--> 175 self.all_batches()
176 except CancelTrainException: self('after_cancel_train')
177 finally: self('after_train')
/opt/conda/lib/python3.7/site-packages/fastai2/learner.py in all_batches(self)
151 def all_batches(self):
152 self.n_iter = len(self.dl)
--> 153 for o in enumerate(self.dl): self.one_batch(*o)
154
155 def one_batch(self, i, b):
/opt/conda/lib/python3.7/site-packages/fastai2/learner.py in one_batch(self, i, b)
157 try:
158 self._split(b); self('begin_batch')
--> 159 self.pred = self.model(*self.xb); self('after_pred')
160 if len(self.yb) == 0: return
161 self.loss = self.loss_func(self.pred, *self.yb); self('after_loss')
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
<ipython-input-12-c1fc47959251> in forward(self, x)
5
6 def forward(self, x):
----> 7 x = self.body(x)
8 return [f(x) for f in self.heads]
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
/opt/conda/lib/python3.7/site-packages/fastai2/text/models/awdlstm.py in forward(self, inp, from_embeds)
109 new_hidden = []
110 for l, (rnn,hid_dp) in enumerate(zip(self.rnns, self.hidden_dps)):
--> 111 output, new_h = rnn(output, self.hidden[l])
112 new_hidden.append(new_h)
113 if l != self.n_layers - 1: output = hid_dp(output)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
/opt/conda/lib/python3.7/site-packages/fastai2/text/models/awdlstm.py in forward(self, *args)
55 #To avoid the warning that comes because the weights aren't flattened.
56 warnings.simplefilter("ignore")
---> 57 return self.module.forward(*args)
58
59 def reset(self):
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
575 if batch_sizes is None:
576 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
--> 577 self.dropout, self.training, self.bidirectional, self.batch_first)
578 else:
579 result = _VF.lstm(input, batch_sizes, hx, self._flat_weights, self.bias,
RuntimeError: Input and hidden tensors are not at the same device, found input tensor at cuda:0 and hidden tensor at cpu
Any ideas?