Using Pytorch arch + LR Finder error

Hi everyone, I’m trying to figure out how to use a custom Pytorch model with Fastai for NLP, and any tips and help would be appreciated. (I also apologize if this has been answered in any of the walk-thrus, which I haven’t finished watching yet). I’ve already checked out this thread on the forums, but I’ve still run into trouble with getting the LR finder to work: Building a custom pytorch model

To test things out, I’ve copied and pasted Fastai’s AWD_LSTM class and the LinearDecoder class into a Jupyter notebook, renamed them as awd_lstm_class and linear_decoder, and I’m trying to get them to work directly with a regular Fastai Learner(). (Source: https://github.com/fastai/fastai/blob/master/fastai/text/models/awd_lstm.py)

So instead of typing:

learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.3)
learn.lr_find()
learn.recorder.plot(skip_start=30,skip_end=12,suggestion=True)

I’ve done the following:

vocab_sz = len(data_lm.train_ds.vocab.itos)
emb_sz = 400
n_hid = 1152
n_layers = 3

model = nn.Sequential(
            awd_lstm_class(vocab_sz, emb_sz, n_hid, n_layers),
            linear_decoder(n_out=vocab_sz, n_hid=emb_sz, bias=True, output_p=0.1)
                     )
model = model.to(device)

learn = Learner(data_lm, model)
learn.lr_find()
learn.recorder.plot(skip_start=30,skip_end=12,suggestion=True)

Which produces the following error:

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

----------------------------------------------------------------------
AttributeError                       Traceback (most recent call last)
<ipython-input-76-48f2c15efdff> in <module>
      1 # learn = language_model_learner(data_lm, awd_lstm, drop_mult=0.3)
      2 learn = Learner(data_lm, model)
----> 3 learn.lr_find()
      4 learn.recorder.plot(skip_start=30,skip_end=12,suggestion=True)

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, wd)
     30     cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
     31     epochs = int(np.ceil(num_it/len(learn.data.train_dl)))
---> 32     learn.fit(epochs, start_lr, callbacks=[cb], wd=wd)
     33 
     34 def to_fp16(learn:Learner, loss_scale:float=None, max_noskip:int=1000, dynamic:bool=True, clip:float=None,

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    200         callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
    201         self.cb_fns_registered = True
--> 202         fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
    203 
    204     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
     99             for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
    100                 xb, yb = cb_handler.on_batch_begin(xb, yb)
--> 101                 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
    102                 if cb_handler.on_batch_end(loss): break
    103 

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     28 
     29     if not loss_func: return to_detach(out), yb[0].detach()
---> 30     loss = loss_func(out, *yb)
     31 
     32     if opt is not None:

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/layers.py in __call__(self, input, target, **kwargs)
    232 
    233     def __call__(self, input:Tensor, target:Tensor, **kwargs)->Rank0Tensor:
--> 234         input = input.transpose(self.axis,-1).contiguous()
    235         target = target.transpose(self.axis,-1).contiguous()
    236         if self.floatify: target = target.float()

AttributeError: 'tuple' object has no attribute 'transpose'

Also, I’ve noticed the following differences, but I’m not sure what to do about them:

This model's structure (click to expand)
Sequential(
  (0): awd_lstm_class(
    (encoder): Embedding(60000, 400, padding_idx=1)
    (encoder_dp): EmbeddingDropout(
      (emb): Embedding(60000, 400, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): WeightDropout(
        (module): LSTM(400, 1152, batch_first=True)
      )
      (1): WeightDropout(
        (module): LSTM(1152, 1152, batch_first=True)
      )
      (2): WeightDropout(
        (module): LSTM(1152, 400, batch_first=True)
      )
    )
    (input_dp): RNNDropout()
    (hidden_dps): ModuleList(
      (0): RNNDropout()
      (1): RNNDropout()
      (2): RNNDropout()
    )
  )
  (1): linear_decoder(
    (decoder): Linear(in_features=400, out_features=60000, bias=True)
    (output_dp): RNNDropout()
  )
)
Fastai's AWD_LSTM's original structure (click to expand)
model=SequentialRNN(
  (0): AWD_LSTM(
    (encoder): Embedding(60000, 400, padding_idx=1)
    (encoder_dp): EmbeddingDropout(
      (emb): Embedding(60000, 400, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): WeightDropout(
        (module): LSTM(400, 1152, batch_first=True)
      )
      (1): WeightDropout(
        (module): LSTM(1152, 1152, batch_first=True)
      )
      (2): WeightDropout(
        (module): LSTM(1152, 400, batch_first=True)
      )
    )
    (input_dp): RNNDropout()
    (hidden_dps): ModuleList(
      (0): RNNDropout()
      (1): RNNDropout()
      (2): RNNDropout()
    )
  )
  (1): LinearDecoder(
    (decoder): Linear(in_features=400, out_features=60000, bias=True)
    (output_dp): RNNDropout()
  )
), opt_func=functools.partial(<class 'torch.optim.adam.Adam'>, betas=(0.9, 0.99)), loss_func=FlattenedLoss of CrossEntropyLoss(), metrics=[<function accuracy at 0x7fa11278d170>], true_wd=True, bn_wd=True, wd=0.01, train_bn=True, path=PosixPath('.'), model_dir='models', callback_fns=[functools.partial(<class 'fastai.basic_train.Recorder'>, add_time=True, silent=False)], callbacks=[RNNTrainer
learn: ...
alpha: 2.0
beta: 1.0], layer_groups=[Sequential(
  (0): WeightDropout(
    (module): LSTM(400, 1152, batch_first=True)
  )
  (1): RNNDropout()
), Sequential(
  (0): WeightDropout(
    (module): LSTM(1152, 1152, batch_first=True)
  )
  (1): RNNDropout()
), Sequential(
  (0): WeightDropout(
    (module): LSTM(1152, 400, batch_first=True)
  )
  (1): RNNDropout()
), Sequential(
  (0): Embedding(60000, 400, padding_idx=1)
  (1): EmbeddingDropout(
    (emb): Embedding(60000, 400, padding_idx=1)
  )
  (2): LinearDecoder(
    (decoder): Linear(in_features=400, out_features=60000, bias=True)
    (output_dp): RNNDropout()
  )
)], add_time=True, silent=False, cb_fns_registered=False)

If anyone could help point me in the right direction to help me figure out what I’m missing or doing wrong, I’d appreciate it.

The problem is that your model returns an output that is a tuple, not a single tensor, so you need to use a loss function that handles that.

1 Like

Hi Sgugger, I am facing the same issue right now.
At the beginning I didn’t pass an loss function so I got the above problem. Then after I add a loss_func (which is loss_func = nn.CrossEntropyLoss()) to the Learner, it raises

AttributeError: 'tuple' object has no attribute 'log_softmax'

Here is the code:

loss_func = nn.CrossEntropyLoss()
learn = Learner(databunch, 
                model,
                loss_func=loss_func,
                metrics=accuracy)

and the error output:

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
/home/linhan/anaconda3/lib/python3.7/site-packages/numpy/core/_asarray.py:83: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
  return array(a, dtype, copy=False, order=order)
<string>:6: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-31-d81c6bd29d71> in <module>()
----> 1 learn.lr_find()

~/anaconda3/lib/python3.7/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, wd)
     39     cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
     40     epochs = int(np.ceil(num_it/len(learn.data.train_dl))) * (num_distrib() or 1)
---> 41     learn.fit(epochs, start_lr, callbacks=[cb], wd=wd)
     42 
     43 def to_fp16(learn:Learner, loss_scale:float=None, max_noskip:int=1000, dynamic:bool=True, clip:float=None,

~/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    198         else: self.opt.lr,self.opt.wd = lr,wd
    199         callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
--> 200         fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
    201 
    202     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
     99             for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
    100                 xb, yb = cb_handler.on_batch_begin(xb, yb)
--> 101                 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
    102                 if cb_handler.on_batch_end(loss): break
    103 

~/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     28 
     29     if not loss_func: return to_detach(out), to_detach(yb[0])
---> 30     loss = loss_func(out, *yb)
     31 
     32     if opt is not None:

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    930     def forward(self, input, target):
    931         return F.cross_entropy(input, target, weight=self.weight,
--> 932                                ignore_index=self.ignore_index, reduction=self.reduction)
    933 
    934 

~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2315     if size_average is not None or reduce is not None:
   2316         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2318 
   2319 

~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in log_softmax(input, dim, _stacklevel, dtype)
   1533         dim = _get_softmax_dim('log_softmax', input.dim(), _stacklevel)
   1534     if dtype is None:
-> 1535         ret = input.log_softmax(dim)
   1536     else:
   1537         ret = input.log_softmax(dim, dtype=dtype)

AttributeError: 'tuple' object has no attribute 'log_softmax'

Thank you so much for your reply.

Based on the error message, the problem is in the data bunch , you should check what comes out from your data bunch, it should be a tuple of tensors for one batch (at least in fastai2).