ValueError: input batch_size and target batch_size

Hi, I’m using fastai to fine tune a distilbert model. And the error occurs when calculating the loss. Could you give me some advice on how to deal with it?

My code:

tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
distil_model = AutoModel.from_pretrained("bandainamco-mirai/distilbert-base-japanese") 

class DistilBertClassifier(torch.nn.Module):
  def __init__(self):
    super(DistilBertClassifier, self).__init__()
    self.distil_bert = distil_model
    self.linear = nn.Linear(768, NUM_CLASSES)
    nn.init.normal_(self.linear.weight, std=0.02)
    nn.init.normal_(self.linear.bias, 0)

  def forward(self, input_ids):
    vec, _ = self.distil_bert(input_ids)
    vec = vec[:,0,:]
    out = self.linear(vec)
    return out

model = DistilBertClassifier()
learn = Learner(dls,
                HF_BaseModelWrapper(model),
                opt_func=partial(Adam, decouple_wd=True),
                loss_func=CrossEntropyLossFlat(),
                metrics=[accuracy, F1Score(average='macro')],
                cbs=[HF_BaseModelCallback],
                splitter=hf_splitter)

The error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/tweets/test.py in <module>
     <a href='file:///home/zchelllo/tweets/test.py?line=112'>113</a> learn.freeze()
     <a href='file:///home/zchelllo/tweets/test.py?line=113'>114</a> # learn.lr_find(suggestions=True)
---> <a href='file:///home/zchelllo/tweets/test.py?line=114'>115</a> learn.fit_one_cycle(EPOCH)

~/anaconda3/lib/python3.8/site-packages/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

~/anaconda3/lib/python3.8/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    111     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    112               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 113     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    114 
    115 # Cell

~/anaconda3/lib/python3.8/site-packages/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    205             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    206             self.n_epoch,self.loss = n_epoch,tensor(0.)
--> 207             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    208 
    209     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
    195         for epoch in range(self.n_epoch):
    196             self.epoch=epoch
--> 197             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    198 
    199     @log_args(but='cbs')

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
    189 
    190     def _do_epoch(self):
--> 191         self._do_epoch_train()
    192         self._do_epoch_validate()
    193 

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
    181     def _do_epoch_train(self):
    182         self.dl = self.dls.train
--> 183         self._with_events(self.all_batches, 'train', CancelTrainException)
    184 
    185     def _do_epoch_validate(self, ds_idx=1, dl=None):

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    162 
    163     def _do_one_batch(self):

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
    177         self.iter = i
    178         self._split(b)
--> 179         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    180 
    181     def _do_epoch_train(self):

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
    164         self.pred = self.model(*self.xb)
    165         self('after_pred')
--> 166         if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)
    167         self('after_loss')
    168         if not self.training or not len(self.yb): return

~/anaconda3/lib/python3.8/site-packages/fastai/layers.py in __call__(self, inp, targ, **kwargs)
    295         if targ.dtype in [torch.int8, torch.int16, torch.int32]: targ = targ.long()
    296         if self.flatten: inp = inp.view(-1,inp.shape[-1]) if self.is_2d else inp.view(-1)
--> 297         return self.func.__call__(inp, targ.view(-1) if self.flatten else targ, **kwargs)
    298 
    299 # Cell

~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    945 
    946     def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 947         return F.cross_entropy(input, target, weight=self.weight,
    948                                ignore_index=self.ignore_index, reduction=self.reduction)
    949 

~/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2420     if size_average is not None or reduce is not None:
   2421         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2422     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2423 
   2424 

~/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2213 
   2214     if input.size(0) != target.size(0):
-> 2215         raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
   2216                          .format(input.size(0), target.size(0)))
   2217     if dim == 2:

ValueError: Expected input batch_size (1) to match target batch_size (64).

Since you mention the loss, could you show how you calculate the loss? Perhaps you need to reshape your inputs?

I use the official loss function CrossEntropyLossFlat. Yes, I want to reshape my input size to match the target size. But I don’t know how to make it :sob:

When I look at ‘functional.py’ in GitHub, I see this validation (if input.size(0) != target.size(0)) comes once, in the nll_loss function (line 2260).

Can you paste your whole code? I would like to see how ‘dls’ is created as I see it passed to the creation of the Learner object and also how you train…I think I might be of more help then…

Here is the whole code. Here I use a library, blurr, which is a wrapper of transformers and fastai. I didn’t show code of df, cause there are lots of code related to text preprocessing.

# %%
from transformers import AutoModel, AutoTokenizer, AutoConfig
from fastai.text.all import *
from blurr.data.all import *
from blurr.modeling.all import *
import torch
from torch import nn
import torch.nn.functional as F

# %%
NUM_CLASSES = 2
BS = 64
MAX_LEN = 128
EPOCH = 5

# %%
hf_arch = 'distilbert'
tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
distil_model = AutoModel.from_pretrained("bandainamco-mirai/distilbert-base-japanese") 

# %%
class DistilBertClassifier(torch.nn.Module):
  def __init__(self):
    super(DistilBertClassifier, self).__init__()
    self.distil_bert = distil_model
    self.linear = nn.Linear(768, NUM_CLASSES)
    nn.init.normal_(self.linear.weight, std=0.02)
    nn.init.normal_(self.linear.bias, 0)

  def forward(self, input_ids):
    vec, _ = self.distil_bert(input_ids)
    vec = vec[:,0,:]
    # shape here is [batch_size, 768]
    out = self.linear(vec)
    # shape here is [batch_size, 2]
    return out

# %%
blocks = (HF_TextBlock(hf_arch=hf_arch,
                       hf_tokenizer=tokenizer, 
                       padding='max_length', 
                       max_length=MAX_LEN), CategoryBlock)

dblock = DataBlock(blocks=blocks,
                   get_x=ColReader('data'), get_y=ColReader('label'),
                   splitter=IndexSplitter(range(len(df['val'][0]))))

dls = dblock.dataloaders(pd.DataFrame(data={
    'data': df['val'][0].tolist() + df['review'][0].tolist(),
    'label': df['val'][1].tolist() + df['review'][1].tolist()
}), bs=BS)

# %%
model = DistilBertClassifier()
learn = Learner(dls,
                HF_BaseModelWrapper(model),
                opt_func=partial(Adam, decouple_wd=True),
                loss_func=CrossEntropyLossFlat(),
                metrics=[accuracy, F1Score(average='macro')],
                cbs=[HF_BaseModelCallback],
                splitter=hf_splitter)

learn.create_opt()
learn.freeze()
learn.fit_one_cycle(EPOCH)

def forward(self, input_ids):
vec, _ = self.distil_bert(input_ids)
vec = vec[:,0,:]
# shape here is [batch_size, 768]
out = self.linear(vec)
# shape here is [batch_size, 2]
return out

I was trying to replicate the forward function you had written. You say the highlighted row produces an output of [batch_size,2]. Is it not [BS,MAX_LEN,NUM_CLASSES] (64,128,2)?

Can you print the shape using print(out.shape). ?

1 Like

The shape of input_ids is [bs, max_len].
After sending it to the model, we get fixed hidden states which is 768, [bs, 768].
And after the linear function, we got [bs, 2]. I have printed it, the shape is [bs, 2] here.

ok.
For some reason, I am not able to run FastAI since this morning as when I import vision, I get a error “ModuleNotFoundError: No module named ‘fastai.callback.all’; ‘fastai.callback’ is not a package”

Can you change your loss function from “CrossEntropyLossFlat” to “CrossEntropyLossFlat( is_2d=False)”? I just want to see what message (if any), you get

Another error occurred. Here is the full error message.

Traceback (most recent call last):
  File "/home/zchelllo/tweets/test.py", line 116, in <module>
    learn.fit_one_cycle(EPOCH)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastcore/logargs.py", line 56, in _f
    return inst if to_return else f(*args, **kwargs)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/callback/schedule.py", line 113, in fit_one_cycle
    self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastcore/logargs.py", line 56, in _f
    return inst if to_return else f(*args, **kwargs)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 207, in fit
    self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 197, in _do_fit
    self._with_events(self._do_epoch, 'epoch', CancelEpochException)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 191, in _do_epoch
    self._do_epoch_train()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 183, in _do_epoch_train
    self._with_events(self.all_batches, 'train', CancelTrainException)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 161, in all_batches
    for o in enumerate(self.dl): self.one_batch(*o)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 179, in one_batch
    self._with_events(self._do_one_batch, 'batch', CancelBatchException)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 155, in _with_events
    try:       self(f'before_{event_type}')       ;f()
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/learner.py", line 166, in _do_one_batch
    if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/fastai/layers.py", line 297, in __call__
    return self.func.__call__(inp, targ.view(-1) if self.flatten else targ, **kwargs)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 947, in forward
    return F.cross_entropy(input, target, weight=self.weight,
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 2422, in cross_entropy
    return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
  File "/home/zchelllo/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 1591, in log_softmax
    ret = input.log_softmax(dim)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

Oh ok, sorry these approaches didn’t work.
I am still facing issues with FastAI. Let me delve into this issues once things are up, at my end.

ok, thank you very much.
And actually I’m using huggingface’s Trainer, they are more integrated and easy to use. It runs fine. Take your time.