Using Pytorch arch + LR Finder error

jgilbert2000 · September 24, 2019, 7:37am

Hi everyone, I’m trying to figure out how to use a custom Pytorch model with Fastai for NLP, and any tips and help would be appreciated. (I also apologize if this has been answered in any of the walk-thrus, which I haven’t finished watching yet). I’ve already checked out this thread on the forums, but I’ve still run into trouble with getting the LR finder to work: Building a custom pytorch model

To test things out, I’ve copied and pasted Fastai’s AWD_LSTM class and the LinearDecoder class into a Jupyter notebook, renamed them as awd_lstm_class and linear_decoder, and I’m trying to get them to work directly with a regular Fastai Learner(). (Source: https://github.com/fastai/fastai/blob/master/fastai/text/models/awd_lstm.py)

So instead of typing:

learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.3)
learn.lr_find()
learn.recorder.plot(skip_start=30,skip_end=12,suggestion=True)

I’ve done the following:

vocab_sz = len(data_lm.train_ds.vocab.itos)
emb_sz = 400
n_hid = 1152
n_layers = 3

model = nn.Sequential(
            awd_lstm_class(vocab_sz, emb_sz, n_hid, n_layers),
            linear_decoder(n_out=vocab_sz, n_hid=emb_sz, bias=True, output_p=0.1)
                     )
model = model.to(device)

learn = Learner(data_lm, model)
learn.lr_find()
learn.recorder.plot(skip_start=30,skip_end=12,suggestion=True)

Which produces the following error:

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

----------------------------------------------------------------------
AttributeError                       Traceback (most recent call last)
<ipython-input-76-48f2c15efdff> in <module>
      1 # learn = language_model_learner(data_lm, awd_lstm, drop_mult=0.3)
      2 learn = Learner(data_lm, model)
----> 3 learn.lr_find()
      4 learn.recorder.plot(skip_start=30,skip_end=12,suggestion=True)

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, wd)
     30     cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
     31     epochs = int(np.ceil(num_it/len(learn.data.train_dl)))
---> 32     learn.fit(epochs, start_lr, callbacks=[cb], wd=wd)
     33 
     34 def to_fp16(learn:Learner, loss_scale:float=None, max_noskip:int=1000, dynamic:bool=True, clip:float=None,

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    200         callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
    201         self.cb_fns_registered = True
--> 202         fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
    203 
    204     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
     99             for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
    100                 xb, yb = cb_handler.on_batch_begin(xb, yb)
--> 101                 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
    102                 if cb_handler.on_batch_end(loss): break
    103 

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     28 
     29     if not loss_func: return to_detach(out), yb[0].detach()
---> 30     loss = loss_func(out, *yb)
     31 
     32     if opt is not None:

~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/layers.py in __call__(self, input, target, **kwargs)
    232 
    233     def __call__(self, input:Tensor, target:Tensor, **kwargs)->Rank0Tensor:
--> 234         input = input.transpose(self.axis,-1).contiguous()
    235         target = target.transpose(self.axis,-1).contiguous()
    236         if self.floatify: target = target.float()

AttributeError: 'tuple' object has no attribute 'transpose'

Also, I’ve noticed the following differences, but I’m not sure what to do about them:

This model's structure (click to expand)

Sequential(
  (0): awd_lstm_class(
    (encoder): Embedding(60000, 400, padding_idx=1)
    (encoder_dp): EmbeddingDropout(
      (emb): Embedding(60000, 400, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): WeightDropout(
        (module): LSTM(400, 1152, batch_first=True)
      )
      (1): WeightDropout(
        (module): LSTM(1152, 1152, batch_first=True)
      )
      (2): WeightDropout(
        (module): LSTM(1152, 400, batch_first=True)
      )
    )
    (input_dp): RNNDropout()
    (hidden_dps): ModuleList(
      (0): RNNDropout()
      (1): RNNDropout()
      (2): RNNDropout()
    )
  )
  (1): linear_decoder(
    (decoder): Linear(in_features=400, out_features=60000, bias=True)
    (output_dp): RNNDropout()
  )
)

Fastai's AWD_LSTM's original structure (click to expand)

model=SequentialRNN(
  (0): AWD_LSTM(
    (encoder): Embedding(60000, 400, padding_idx=1)
    (encoder_dp): EmbeddingDropout(
      (emb): Embedding(60000, 400, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): WeightDropout(
        (module): LSTM(400, 1152, batch_first=True)
      )
      (1): WeightDropout(
        (module): LSTM(1152, 1152, batch_first=True)
      )
      (2): WeightDropout(
        (module): LSTM(1152, 400, batch_first=True)
      )
    )
    (input_dp): RNNDropout()
    (hidden_dps): ModuleList(
      (0): RNNDropout()
      (1): RNNDropout()
      (2): RNNDropout()
    )
  )
  (1): LinearDecoder(
    (decoder): Linear(in_features=400, out_features=60000, bias=True)
    (output_dp): RNNDropout()
  )
), opt_func=functools.partial(<class 'torch.optim.adam.Adam'>, betas=(0.9, 0.99)), loss_func=FlattenedLoss of CrossEntropyLoss(), metrics=[<function accuracy at 0x7fa11278d170>], true_wd=True, bn_wd=True, wd=0.01, train_bn=True, path=PosixPath('.'), model_dir='models', callback_fns=[functools.partial(<class 'fastai.basic_train.Recorder'>, add_time=True, silent=False)], callbacks=[RNNTrainer
learn: ...
alpha: 2.0
beta: 1.0], layer_groups=[Sequential(
  (0): WeightDropout(
    (module): LSTM(400, 1152, batch_first=True)
  )
  (1): RNNDropout()
), Sequential(
  (0): WeightDropout(
    (module): LSTM(1152, 1152, batch_first=True)
  )
  (1): RNNDropout()
), Sequential(
  (0): WeightDropout(
    (module): LSTM(1152, 400, batch_first=True)
  )
  (1): RNNDropout()
), Sequential(
  (0): Embedding(60000, 400, padding_idx=1)
  (1): EmbeddingDropout(
    (emb): Embedding(60000, 400, padding_idx=1)
  )
  (2): LinearDecoder(
    (decoder): Linear(in_features=400, out_features=60000, bias=True)
    (output_dp): RNNDropout()
  )
)], add_time=True, silent=False, cb_fns_registered=False)

If anyone could help point me in the right direction to help me figure out what I’m missing or doing wrong, I’d appreciate it.

sgugger · September 24, 2019, 12:21pm

The problem is that your model returns an output that is a tuple, not a single tensor, so you need to use a loss function that handles that.

LinHan · July 29, 2020, 9:25am

Hi Sgugger, I am facing the same issue right now.
At the beginning I didn’t pass an loss function so I got the above problem. Then after I add a loss_func (which is loss_func = nn.CrossEntropyLoss()) to the Learner, it raises

AttributeError: 'tuple' object has no attribute 'log_softmax'

Here is the code:

loss_func = nn.CrossEntropyLoss()
learn = Learner(databunch, 
                model,
                loss_func=loss_func,
                metrics=accuracy)

and the error output:

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
/home/linhan/anaconda3/lib/python3.7/site-packages/numpy/core/_asarray.py:83: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
  return array(a, dtype, copy=False, order=order)
<string>:6: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-31-d81c6bd29d71> in <module>()
----> 1 learn.lr_find()

~/anaconda3/lib/python3.7/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, wd)
     39     cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
     40     epochs = int(np.ceil(num_it/len(learn.data.train_dl))) * (num_distrib() or 1)
---> 41     learn.fit(epochs, start_lr, callbacks=[cb], wd=wd)
     42 
     43 def to_fp16(learn:Learner, loss_scale:float=None, max_noskip:int=1000, dynamic:bool=True, clip:float=None,

~/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    198         else: self.opt.lr,self.opt.wd = lr,wd
    199         callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
--> 200         fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
    201 
    202     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
     99             for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
    100                 xb, yb = cb_handler.on_batch_begin(xb, yb)
--> 101                 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
    102                 if cb_handler.on_batch_end(loss): break
    103 

~/anaconda3/lib/python3.7/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     28 
     29     if not loss_func: return to_detach(out), to_detach(yb[0])
---> 30     loss = loss_func(out, *yb)
     31 
     32     if opt is not None:

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    930     def forward(self, input, target):
    931         return F.cross_entropy(input, target, weight=self.weight,
--> 932                                ignore_index=self.ignore_index, reduction=self.reduction)
    933 
    934 

~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2315     if size_average is not None or reduce is not None:
   2316         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2318 
   2319 

~/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in log_softmax(input, dim, _stacklevel, dtype)
   1533         dim = _get_softmax_dim('log_softmax', input.dim(), _stacklevel)
   1534     if dtype is None:
-> 1535         ret = input.log_softmax(dim)
   1536     else:
   1537         ret = input.log_softmax(dim, dtype=dtype)

AttributeError: 'tuple' object has no attribute 'log_softmax'

Thank you so much for your reply.

Richard-Wang · July 29, 2020, 12:18pm

Based on the error message, the problem is in the data bunch , you should check what comes out from your data bunch, it should be a tuple of tensors for one batch (at least in fastai2).