ValueError: input batch_size and target batch_size

chanceZ · October 3, 2020, 3:59am

Hi, I’m using fastai to fine tune a distilbert model. And the error occurs when calculating the loss. Could you give me some advice on how to deal with it?

My code:

tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
distil_model = AutoModel.from_pretrained("bandainamco-mirai/distilbert-base-japanese") 

class DistilBertClassifier(torch.nn.Module):
  def __init__(self):
    super(DistilBertClassifier, self).__init__()
    self.distil_bert = distil_model
    self.linear = nn.Linear(768, NUM_CLASSES)
    nn.init.normal_(self.linear.weight, std=0.02)
    nn.init.normal_(self.linear.bias, 0)

  def forward(self, input_ids):
    vec, _ = self.distil_bert(input_ids)
    vec = vec[:,0,:]
    out = self.linear(vec)
    return out

model = DistilBertClassifier()
learn = Learner(dls,
                HF_BaseModelWrapper(model),
                opt_func=partial(Adam, decouple_wd=True),
                loss_func=CrossEntropyLossFlat(),
                metrics=[accuracy, F1Score(average='macro')],
                cbs=[HF_BaseModelCallback],
                splitter=hf_splitter)

The error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/tweets/test.py in <module>
     <a href='file:///home/zchelllo/tweets/test.py?line=112'>113</a> learn.freeze()
     <a href='file:///home/zchelllo/tweets/test.py?line=113'>114</a> # learn.lr_find(suggestions=True)
---> <a href='file:///home/zchelllo/tweets/test.py?line=114'>115</a> learn.fit_one_cycle(EPOCH)

~/anaconda3/lib/python3.8/site-packages/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

~/anaconda3/lib/python3.8/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    111     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    112               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 113     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    114 
    115 # Cell

~/anaconda3/lib/python3.8/site-packages/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    205             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    206             self.n_epoch,self.loss = n_epoch,tensor(0.)
--> 207             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    208 
    209     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
    195         for epoch in range(self.n_epoch):
    196             self.epoch=epoch
--> 197             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    198 
    199     @log_args(but='cbs')

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
    189 
    190     def _do_epoch(self):
--> 191         self._do_epoch_train()
    192         self._do_epoch_validate()
    193 

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
    181     def _do_epoch_train(self):
    182         self.dl = self.dls.train
--> 183         self._with_events(self.all_batches, 'train', CancelTrainException)
    184 
    185     def _do_epoch_validate(self, ds_idx=1, dl=None):

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    162 
    163     def _do_one_batch(self):

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
    177         self.iter = i
    178         self._split(b)
--> 179         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    180 
    181     def _do_epoch_train(self):

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
    164         self.pred = self.model(*self.xb)
    165         self('after_pred')
--> 166         if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)
    167         self('after_loss')
    168         if not self.training or not len(self.yb): return

~/anaconda3/lib/python3.8/site-packages/fastai/layers.py in __call__(self, inp, targ, **kwargs)
    295         if targ.dtype in [torch.int8, torch.int16, torch.int32]: targ = targ.long()
    296         if self.flatten: inp = inp.view(-1,inp.shape[-1]) if self.is_2d else inp.view(-1)
--> 297         return self.func.__call__(inp, targ.view(-1) if self.flatten else targ, **kwargs)
    298 
    299 # Cell

~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    945 
    946     def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 947         return F.cross_entropy(input, target, weight=self.weight,
    948                                ignore_index=self.ignore_index, reduction=self.reduction)
    949 

~/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2420     if size_average is not None or reduce is not None:
   2421         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2422     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2423 
   2424 

~/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2213 
   2214     if input.size(0) != target.size(0):
-> 2215         raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
   2216                          .format(input.size(0), target.size(0)))
   2217     if dim == 2:

ValueError: Expected input batch_size (1) to match target batch_size (64).

karthikr · October 3, 2020, 12:13pm

Since you mention the loss, could you show how you calculate the loss? Perhaps you need to reshape your inputs?

chanceZ · October 3, 2020, 2:30pm

I use the official loss function CrossEntropyLossFlat. Yes, I want to reshape my input size to match the target size. But I don’t know how to make it

karthikr · October 3, 2020, 4:28pm

When I look at ‘functional.py’ in GitHub, I see this validation (if input.size(0) != target.size(0)) comes once, in the nll_loss function (line 2260).

Can you paste your whole code? I would like to see how ‘dls’ is created as I see it passed to the creation of the Learner object and also how you train…I think I might be of more help then…

chanceZ · October 4, 2020, 3:19am

Here is the whole code. Here I use a library, blurr, which is a wrapper of transformers and fastai. I didn’t show code of df, cause there are lots of code related to text preprocessing.

# %%
from transformers import AutoModel, AutoTokenizer, AutoConfig
from fastai.text.all import *
from blurr.data.all import *
from blurr.modeling.all import *
import torch
from torch import nn
import torch.nn.functional as F

# %%
NUM_CLASSES = 2
BS = 64
MAX_LEN = 128
EPOCH = 5

# %%
hf_arch = 'distilbert'
tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
distil_model = AutoModel.from_pretrained("bandainamco-mirai/distilbert-base-japanese") 

# %%
class DistilBertClassifier(torch.nn.Module):
  def __init__(self):
    super(DistilBertClassifier, self).__init__()
    self.distil_bert = distil_model
    self.linear = nn.Linear(768, NUM_CLASSES)
    nn.init.normal_(self.linear.weight, std=0.02)
    nn.init.normal_(self.linear.bias, 0)

  def forward(self, input_ids):
    vec, _ = self.distil_bert(input_ids)
    vec = vec[:,0,:]
    # shape here is [batch_size, 768]
    out = self.linear(vec)
    # shape here is [batch_size, 2]
    return out

# %%
blocks = (HF_TextBlock(hf_arch=hf_arch,
                       hf_tokenizer=tokenizer, 
                       padding='max_length', 
                       max_length=MAX_LEN), CategoryBlock)

dblock = DataBlock(blocks=blocks,
                   get_x=ColReader('data'), get_y=ColReader('label'),
                   splitter=IndexSplitter(range(len(df['val'][0]))))

dls = dblock.dataloaders(pd.DataFrame(data={
    'data': df['val'][0].tolist() + df['review'][0].tolist(),
    'label': df['val'][1].tolist() + df['review'][1].tolist()
}), bs=BS)

# %%
model = DistilBertClassifier()
learn = Learner(dls,
                HF_BaseModelWrapper(model),
                opt_func=partial(Adam, decouple_wd=True),
                loss_func=CrossEntropyLossFlat(),
                metrics=[accuracy, F1Score(average='macro')],
                cbs=[HF_BaseModelCallback],
                splitter=hf_splitter)

learn.create_opt()
learn.freeze()
learn.fit_one_cycle(EPOCH)

karthikr · October 4, 2020, 9:27am

def forward(self, input_ids):
vec, _ = self.distil_bert(input_ids)
vec = vec[:,0,:]
# shape here is [batch_size, 768]
out = self.linear(vec)
# shape here is [batch_size, 2]
return out

I was trying to replicate the forward function you had written. You say the highlighted row produces an output of [batch_size,2]. Is it not [BS,MAX_LEN,NUM_CLASSES] (64,128,2)?

Can you print the shape using print(out.shape). ?

chanceZ · October 4, 2020, 3:08pm

The shape of input_ids is [bs, max_len].
After sending it to the model, we get fixed hidden states which is 768, [bs, 768].
And after the linear function, we got [bs, 2]. I have printed it, the shape is [bs, 2] here.

karthikr · October 5, 2020, 4:54am

ok.
For some reason, I am not able to run FastAI since this morning as when I import vision, I get a error “ModuleNotFoundError: No module named ‘fastai.callback.all’; ‘fastai.callback’ is not a package”

Can you change your loss function from “CrossEntropyLossFlat” to “CrossEntropyLossFlat( is_2d=False)”? I just want to see what message (if any), you get

chanceZ · October 5, 2020, 10:53am

Another error occurred. Here is the full error message.

Traceback (most recent call last):
  File "/home/zchelllo/tweets/test.py", line 116, in <module>
    learn.fit_one_cycle(EPOCH)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastcore/logargs.py", line 56, in _f
    return inst if to_return else f(*args, **kwargs)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/callback/schedule.py", line 113, in fit_one_cycle
    self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastcore/logargs.py", line 56, in _f
    return inst if to_return else f(*args, **kwargs)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 207, in fit
    self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 197, in _do_fit
    self._with_events(self._do_epoch, 'epoch', CancelEpochException)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 191, in _do_epoch
    self._do_epoch_train()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 183, in _do_epoch_train
    self._with_events(self.all_batches, 'train', CancelTrainException)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 161, in all_batches
    for o in enumerate(self.dl): self.one_batch(*o)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 179, in one_batch
    self._with_events(self._do_one_batch, 'batch', CancelBatchException)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 166, in _do_one_batch
    if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/layers.py", line 297, in __call__
    return self.func.__call__(inp, targ.view(-1) if self.flatten else targ, **kwargs)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 947, in forward
    return F.cross_entropy(input, target, weight=self.weight,
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 2422, in cross_entropy
    return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 1591, in log_softmax
    ret = input.log_softmax(dim)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

karthikr · October 5, 2020, 11:17am

Oh ok, sorry these approaches didn’t work.
I am still facing issues with FastAI. Let me delve into this issues once things are up, at my end.

chanceZ · October 6, 2020, 5:36am

ok, thank you very much.
And actually I’m using huggingface’s Trainer, they are more integrated and easy to use. It runs fine. Take your time.