ULMFiT - English Chat

EinAeffchen · September 21, 2018, 3:33pm

So I’m trying to create a ULMFiT model for English but based on tweets for short text classifications, did everything as usual, pretty much exactly as in other Jupyter Notebooks from the Language Model Zoo, but when I try to fit the model I get the following error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-46-b544778ca021> in <module>()
----> 1 learner.fit(lrs/2, 1, wds=wd, use_clr=(32,2), cycle_len=1)

~\Anaconda3\lib\site-packages\fastai\fastai\text.py in fit(self, *args, **kwargs)
    209 
    210     def _get_crit(self, data): return F.cross_entropy
--> 211     def fit(self, *args, **kwargs): return super().fit(*args, **kwargs, seq_first=True)
    212 
    213     def save_encoder(self, name): save_model(self.model[0], self.get_model_path(name))

~\Anaconda3\lib\site-packages\fastai\fastai\learner.py in fit(self, lrs, n_cycle, wds, **kwargs)
    300         self.sched = None
    301         layer_opt = self.get_layer_opt(lrs, wds)
--> 302         return self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)
    303 
    304     def warm_up(self, lr, wds=None):

~\Anaconda3\lib\site-packages\fastai\fastai\learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, best_save_name, use_clr, use_clr_beta, metrics, callbacks, use_wd_sched, norm_wds, wds_sched_mult, use_swa, swa_start, swa_eval_freq, **kwargs)
    247             metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16,
    248             swa_model=self.swa_model if use_swa else None, swa_start=swa_start,
--> 249             swa_eval_freq=swa_eval_freq, **kwargs)
    250 
    251     def get_layer_groups(self): return self.models.get_layer_groups()

~\Anaconda3\lib\site-packages\fastai\fastai\model.py in fit(model, data, n_epochs, opt, crit, metrics, callbacks, stepper, swa_model, swa_start, swa_eval_freq, visualize, **kwargs)
    139             batch_num += 1
    140             for cb in callbacks: cb.on_batch_begin()
--> 141             loss = model_stepper.step(V(x),V(y), epoch)
    142             avg_loss = avg_loss * avg_mom + loss * (1-avg_mom)
    143             debias_loss = avg_loss / (1 - avg_mom**batch_num)

~\Anaconda3\lib\site-packages\fastai\fastai\model.py in step(self, xs, y, epoch)
     48     def step(self, xs, y, epoch):
     49         xtra = []
---> 50         output = self.m(*xs)
     51         if isinstance(output,tuple): output,*xtra = output
     52         if self.fp16: self.m.zero_grad()

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    475             result = self._slow_forward(*input, **kwargs)
    476         else:
--> 477             result = self.forward(*input, **kwargs)
    478         for hook in self._forward_hooks.values():
    479             hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
     89     def forward(self, input):
     90         for module in self._modules.values():
---> 91             input = module(input)
     92         return input
     93 

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    475             result = self._slow_forward(*input, **kwargs)
    476         else:
--> 477             result = self.forward(*input, **kwargs)
    478         for hook in self._forward_hooks.values():
    479             hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\fastai\fastai\lm_rnn.py in forward(self, input)
     96             self.reset()
     97         with set_grad_enabled(self.training):
---> 98             emb = self.encoder_with_dropout(input, dropout=self.dropoute if self.training else 0)
     99             emb = self.dropouti(emb)
    100             raw_output = emb

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    475             result = self._slow_forward(*input, **kwargs)
    476         else:
--> 477             result = self.forward(*input, **kwargs)
    478         for hook in self._forward_hooks.values():
    479             hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\fastai\fastai\rnn_reg.py in forward(self, words, dropout, scale)
    182             X = F.embedding(words,
    183                 masked_embed_weight, padding_idx, self.embed.max_norm,
--> 184                 self.embed.norm_type, self.embed.scale_grad_by_freq, self.embed.sparse)
    185         else:
    186             X = self.embed._backend.Embedding.apply(words,

~\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   1108         with torch.no_grad():
   1109             torch.embedding_renorm_(weight, input, max_norm, norm_type)
-> 1110     return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
   1111 
   1112 

RuntimeError: Expected tensor for argument #1 'indices' to have scalar type Long; but got CUDAFloatTensor instead (while checking arguments for embedding)

I found this issue from Github already, which is supposed to have fixed the error, but it still occurs for me.
Anyone ideas on how to solve this?

Edit:
Found a workaround by forcing the tensors into long:

md.trn_dl.data = md.trn_dl.data.long()
md.val_dl.data = md.val_dl.data.long()
md.trn_dl.data

But I still don’t understand why it is necessary this time, while with the same code it worked before.