Databunch has different batchsize than provided

Narang · April 27, 2020, 12:51pm

Im making a databunch for Text data.
My last batch however is of wrong size (40 instead of 64).

bs=64
test_reg = TextList.from_df(df=df_test, cols='tweet', path=path, vocab=data_lm.vocab)
data_reg = (TextList.from_df(df=df_train, path=path, cols='tweet', vocab=data_lm.vocab)
            .split_by_rand_pct()
            .label_from_df(cols='offensive_language', label_cls=FloatList)
            .add_test(test_reg)
            .databunch(bs=bs))

Code to print batches:

it = iter(data_reg.valid_dl)
while True:
  try:
    x,y = next(it)
    print(x.shape, y.shape)
  except:
    break

I get the last batch size as 40 even though I kept bs=64:

torch.Size([40, 5]) torch.Size([40])

All batch shapes:

torch.Size([64, 99]) torch.Size([64])
torch.Size([64, 42]) torch.Size([64])
torch.Size([64, 39]) torch.Size([64])
torch.Size([64, 38]) torch.Size([64])
torch.Size([64, 36]) torch.Size([64])
torch.Size([64, 35]) torch.Size([64])
torch.Size([64, 34]) torch.Size([64])
torch.Size([64, 33]) torch.Size([64])
torch.Size([64, 32]) torch.Size([64])
torch.Size([64, 31]) torch.Size([64])
torch.Size([64, 30]) torch.Size([64])
torch.Size([64, 30]) torch.Size([64])
torch.Size([64, 29]) torch.Size([64])
torch.Size([64, 28]) torch.Size([64])
torch.Size([64, 27]) torch.Size([64])
torch.Size([64, 26]) torch.Size([64])
torch.Size([64, 25]) torch.Size([64])
torch.Size([64, 24]) torch.Size([64])
torch.Size([64, 23]) torch.Size([64])
torch.Size([64, 23]) torch.Size([64])
torch.Size([64, 22]) torch.Size([64])
torch.Size([64, 21]) torch.Size([64])
torch.Size([64, 21]) torch.Size([64])
torch.Size([64, 20]) torch.Size([64])
torch.Size([64, 19]) torch.Size([64])
torch.Size([64, 18]) torch.Size([64])
torch.Size([64, 18]) torch.Size([64])
torch.Size([64, 17]) torch.Size([64])
torch.Size([64, 16]) torch.Size([64])
torch.Size([64, 16]) torch.Size([64])
torch.Size([64, 15]) torch.Size([64])
torch.Size([64, 14]) torch.Size([64])
torch.Size([64, 14]) torch.Size([64])
torch.Size([64, 13]) torch.Size([64])
torch.Size([64, 12]) torch.Size([64])
torch.Size([64, 11]) torch.Size([64])
torch.Size([64, 11]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 8]) torch.Size([64])
torch.Size([64, 7]) torch.Size([64])
torch.Size([40, 5]) torch.Size([40])

Please help me with this.
If there is a way to take out the last batch then I’ll be fine with it.
Thanks a lot

muellerzr · April 27, 2020, 12:55pm

What do you mean they are 40? They’re all 64? (It’s BS, Seq Len)

Narang · April 27, 2020, 1:01pm

Yes and the last batch is of size (40,5). (Please scroll down that list)
(Sorry I didnt explain it properly, I’ll put an edit in)

muellerzr · April 27, 2020, 1:05pm

Ah yes. The validation set doesn’t drop the last partial batch and it didn’t fit perfectly to 64. That’s what’s happening (this behavior happens in the training Dataloader only)

Narang · April 27, 2020, 1:08pm

So what should I do? My fit expects a batch size of 64 and gives an error

muellerzr · April 27, 2020, 1:14pm

Can you show the error please?

Narang · April 27, 2020, 1:16pm

Seems to be an error when the sequence is fed to my RNN, this is because I use hidden state which has batch size of 64 from the time I create the tensor.

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-69-c57d89101931> in <module>()
----> 1 learn_reg.fit(5, 1e-2)

17 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_hidden_size(self, hx, expected_hidden_size, msg)
    174         # type: (Tensor, Tuple[int, int, int], str) -> None
    175         if hx.size() != expected_hidden_size:
--> 176             raise RuntimeError(msg.format(expected_hidden_size, tuple(hx.size())))
    177 
    178     def check_forward_args(self, input, hidden, batch_sizes):

RuntimeError: Expected hidden size (1, 40, 50), got (1, 64, 50)---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-69-c57d89101931> in <module>()
----> 1 learn_reg.fit(5, 1e-2)

17 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in check_hidden_size(self, hx, expected_hidden_size, msg)
    174         # type: (Tensor, Tuple[int, int, int], str) -> None
    175         if hx.size() != expected_hidden_size:
--> 176             raise RuntimeError(msg.format(expected_hidden_size, tuple(hx.size())))
    177 
    178     def check_forward_args(self, input, hidden, batch_sizes):

RuntimeError: Expected hidden size (1, 40, 50), got (1, 64, 50)

This is the RNN

class my_gru(nn.Module):
  def __init__(self, nh, bs, dropout=0, num_layers=1):
    super().__init__()
    self.nh, self.bs, self.num_layers = nh, bs, num_layers
    self.rnn = nn.GRU(self.nh, self.nh, num_layers, batch_first=True, dropout=dropout)
    if torch.cuda.is_available(): self.h = torch.zeros(self.num_layers, self.bs, self.nh, dtype=torch.double).cuda()
    else: self.h = torch.zeros(self.num_layers, self.bs, self.nh, dtype=torch.double)
  def forward(self, x):
    res, h = self.rnn(x, self.h)
    self.h = h.detach()
    return res
  def reset(self):
    print('yo im the worst')
    with torch.no_grad():
      if torch.cuda.is_available(): self.h = torch.zeros(self.num_layers, self.bs, self.nh, dtype=torch.double).cuda()
      else: self.h = torch.zeros(self.num_layers, self.bs, self.nh, dtype=torch.double)

Narang · April 27, 2020, 1:21pm

This is my language model

LangModel(
  (encoder): LabEncoder(
    (rnn): my_gru(
      (rnn): GRU(50, 50, batch_first=True)
    )
    (encoder): Sequential(
      (0): Embedding(7368, 50)
      (1): my_gru(
        (rnn): GRU(50, 50, batch_first=True)
      )
    )
  )
  (decoder): LabDecoder(
    (decoder): Sequential(
      (0): BatchNorm1dFlat(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Linear(in_features=50, out_features=7368, bias=True)
    )
  )
)

I pass the encoder of this model to my Regression model after passing it to SentenceEncoder as its done in Lesson 12, or MultiBatchEncoder in source code. I didn’t exactly understand how they were outputting same seqeuence length so in my version I just cancatenate a tensor of zeros to make sequence length equal to a hard coded max length

class SentenceEncoder(nn.Module):
  def __init__(self, module, bptt, pad_idx=1, max_len=1000):
    super().__init__()
    self.bptt,self.module,self.pad_idx,self.max_len = bptt,module,pad_idx, max_len
  def concat(self, arrs, bs):
    return torch.cat([t for t in arrs], dim=1)
  
  def make_to_max_len(self, ip):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    padd = torch.zeros(ip.shape[0], self.max_len - ip.shape[1], ip.shape[2], device=device, dtype=torch.double)
    return torch.cat([ip, padd], dim=1)

  def forward(self, input):
    bs,sl = input.size()
    self.module.bs = bs
    self.module.reset()
    outputs = []
    for i in range(0, sl, self.bptt):
        o = self.module(input[:,i: min(i+self.bptt, sl)])
        outputs.append(o)

    ops =  self.concat(outputs, bs)
    three_d = self.make_to_max_len(ops)
    return three_d.view(ops.shape[0], -1)

This is the final Model

class RegModel(nn.Module):
  def __init__(self, learn_lm, y_range=[-0.5,3.5]):
    super(RegModel, self).__init__()
    self.learn_lm = learn_lm
    self.encoder = SentenceEncoder(learn_lm.model.encoder, learn_lm.data.bptt, learn_lm.data.vocab.stoi[PAD])
    self.plc = nn.Sequential(
      nn.Linear(self.encoder.max_len*50, 1).double(),
    )
    self.y_range = y_range
  
  def forward(self, x):
    print(x.shape)
    x = (self.encoder(x))
    
    x = self.plc(x)
    x = torch.sigmoid(x)
    x = x*(self.y_range[1] - self.y_range[0])
    x = torch.squeeze(x)
    return x.type(torch.float)