Databunch has different batchsize than provided

Im making a databunch for Text data.
My last batch however is of wrong size (40 instead of 64).

bs=64
test_reg = TextList.from_df(df=df_test, cols='tweet', path=path, vocab=data_lm.vocab)
data_reg = (TextList.from_df(df=df_train, path=path, cols='tweet', vocab=data_lm.vocab)
            .split_by_rand_pct()
            .label_from_df(cols='offensive_language', label_cls=FloatList)
            .add_test(test_reg)
            .databunch(bs=bs))

Code to print batches:

it = iter(data_reg.valid_dl)
while True:
  try:
    x,y = next(it)
    print(x.shape, y.shape)
  except:
    break

I get the last batch size as 40 even though I kept bs=64:

torch.Size([40, 5]) torch.Size([40])

All batch shapes:

torch.Size([64, 99]) torch.Size([64])
torch.Size([64, 42]) torch.Size([64])
torch.Size([64, 39]) torch.Size([64])
torch.Size([64, 38]) torch.Size([64])
torch.Size([64, 36]) torch.Size([64])
torch.Size([64, 35]) torch.Size([64])
torch.Size([64, 34]) torch.Size([64])
torch.Size([64, 33]) torch.Size([64])
torch.Size([64, 32]) torch.Size([64])
torch.Size([64, 31]) torch.Size([64])
torch.Size([64, 30]) torch.Size([64])
torch.Size([64, 30]) torch.Size([64])
torch.Size([64, 29]) torch.Size([64])
torch.Size([64, 28]) torch.Size([64])
torch.Size([64, 27]) torch.Size([64])
torch.Size([64, 26]) torch.Size([64])
torch.Size([64, 25]) torch.Size([64])
torch.Size([64, 24]) torch.Size([64])
torch.Size([64, 23]) torch.Size([64])
torch.Size([64, 23]) torch.Size([64])
torch.Size([64, 22]) torch.Size([64])
torch.Size([64, 21]) torch.Size([64])
torch.Size([64, 21]) torch.Size([64])
torch.Size([64, 20]) torch.Size([64])
torch.Size([64, 19]) torch.Size([64])
torch.Size([64, 18]) torch.Size([64])
torch.Size([64, 18]) torch.Size([64])
torch.Size([64, 17]) torch.Size([64])
torch.Size([64, 16]) torch.Size([64])
torch.Size([64, 16]) torch.Size([64])
torch.Size([64, 15]) torch.Size([64])
torch.Size([64, 14]) torch.Size([64])
torch.Size([64, 14]) torch.Size([64])
torch.Size([64, 13]) torch.Size([64])
torch.Size([64, 12]) torch.Size([64])
torch.Size([64, 11]) torch.Size([64])
torch.Size([64, 11]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 8]) torch.Size([64])
torch.Size([64, 7]) torch.Size([64])
torch.Size([40, 5]) torch.Size([40])

Please help me with this.
If there is a way to take out the last batch then I’ll be fine with it.
Thanks a lot

What do you mean they are 40? They’re all 64? (It’s BS, Seq Len)

Yes and the last batch is of size (40,5). (Please scroll down that list)
(Sorry I didnt explain it properly, I’ll put an edit in)

Ah yes. The validation set doesn’t drop the last partial batch and it didn’t fit perfectly to 64. That’s what’s happening :slight_smile: (this behavior happens in the training Dataloader only)

So what should I do? My fit expects a batch size of 64 and gives an error

Can you show the error please?

Seems to be an error when the sequence is fed to my RNN, this is because I use hidden state which has batch size of 64 from the time I create the tensor.

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-69-c57d89101931> in <module>()
----> 1 learn_reg.fit(5, 1e-2)

17 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_hidden_size(self, hx, expected_hidden_size, msg)
    174         # type: (Tensor, Tuple[int, int, int], str) -> None
    175         if hx.size() != expected_hidden_size:
--> 176             raise RuntimeError(msg.format(expected_hidden_size, tuple(hx.size())))
    177 
    178     def check_forward_args(self, input, hidden, batch_sizes):

RuntimeError: Expected hidden size (1, 40, 50), got (1, 64, 50)---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-69-c57d89101931> in <module>()
----> 1 learn_reg.fit(5, 1e-2)

17 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_hidden_size(self, hx, expected_hidden_size, msg)
    174         # type: (Tensor, Tuple[int, int, int], str) -> None
    175         if hx.size() != expected_hidden_size:
--> 176             raise RuntimeError(msg.format(expected_hidden_size, tuple(hx.size())))
    177 
    178     def check_forward_args(self, input, hidden, batch_sizes):

RuntimeError: Expected hidden size (1, 40, 50), got (1, 64, 50)

This is the RNN

class my_gru(nn.Module):
  def __init__(self, nh, bs, dropout=0, num_layers=1):
    super().__init__()
    self.nh, self.bs, self.num_layers = nh, bs, num_layers
    self.rnn = nn.GRU(self.nh, self.nh, num_layers, batch_first=True, dropout=dropout)
    if torch.cuda.is_available(): self.h = torch.zeros(self.num_layers, self.bs, self.nh, dtype=torch.double).cuda()
    else: self.h = torch.zeros(self.num_layers, self.bs, self.nh, dtype=torch.double)
  def forward(self, x):
    res, h = self.rnn(x, self.h)
    self.h = h.detach()
    return res
  def reset(self):
    print('yo im the worst')
    with torch.no_grad():
      if torch.cuda.is_available(): self.h = torch.zeros(self.num_layers, self.bs, self.nh, dtype=torch.double).cuda()
      else: self.h = torch.zeros(self.num_layers, self.bs, self.nh, dtype=torch.double)

This is my language model

LangModel(
  (encoder): LabEncoder(
    (rnn): my_gru(
      (rnn): GRU(50, 50, batch_first=True)
    )
    (encoder): Sequential(
      (0): Embedding(7368, 50)
      (1): my_gru(
        (rnn): GRU(50, 50, batch_first=True)
      )
    )
  )
  (decoder): LabDecoder(
    (decoder): Sequential(
      (0): BatchNorm1dFlat(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Linear(in_features=50, out_features=7368, bias=True)
    )
  )
)

I pass the encoder of this model to my Regression model after passing it to SentenceEncoder as its done in Lesson 12, or MultiBatchEncoder in source code. I didn’t exactly understand how they were outputting same seqeuence length so in my version I just cancatenate a tensor of zeros to make sequence length equal to a hard coded max length

class SentenceEncoder(nn.Module):
  def __init__(self, module, bptt, pad_idx=1, max_len=1000):
    super().__init__()
    self.bptt,self.module,self.pad_idx,self.max_len = bptt,module,pad_idx, max_len
  def concat(self, arrs, bs):
    return torch.cat([t for t in arrs], dim=1)
  
  def make_to_max_len(self, ip):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    padd = torch.zeros(ip.shape[0], self.max_len - ip.shape[1], ip.shape[2], device=device, dtype=torch.double)
    return torch.cat([ip, padd], dim=1)

  def forward(self, input):
    bs,sl = input.size()
    self.module.bs = bs
    self.module.reset()
    outputs = []
    for i in range(0, sl, self.bptt):
        o = self.module(input[:,i: min(i+self.bptt, sl)])
        outputs.append(o)

    ops =  self.concat(outputs, bs)
    three_d = self.make_to_max_len(ops)
    return three_d.view(ops.shape[0], -1)

This is the final Model

class RegModel(nn.Module):
  def __init__(self, learn_lm, y_range=[-0.5,3.5]):
    super(RegModel, self).__init__()
    self.learn_lm = learn_lm
    self.encoder = SentenceEncoder(learn_lm.model.encoder, learn_lm.data.bptt, learn_lm.data.vocab.stoi[PAD])
    self.plc = nn.Sequential(
      nn.Linear(self.encoder.max_len*50, 1).double(),
    )
    self.y_range = y_range
  
  def forward(self, x):
    print(x.shape)
    x = (self.encoder(x))
    
    x = self.plc(x)
    x = torch.sigmoid(x)
    x = x*(self.y_range[1] - self.y_range[0])
    x = torch.squeeze(x)
    return x.type(torch.float)