RuntimeError: The size of tensor a (115) must match the size of tensor b (64) at non-singleton dimension 1

Hi everybody :slightly_smiling_face:
I have been facing this batch size problem for the past few weeks.
fastai version : 1.0.61
transformers version : 3.0.2
torch 1.7.0

I am trying to predict on text using huggingface transformers using RoBerta. It works when batch size =1 but for any other batch size I get the following error.

In [5]:

—> 1 learner.lr_find()

~/jupyter_py3/lib/python3.6/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, wd)
39 cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
40 epochs = int(np.ceil(num_it/len(learn.data.train_dl))) * (num_distrib() or 1)
—> 41 learn.fit(epochs, start_lr, callbacks=[cb], wd=wd)
42
43 def to_fp16(learn:Learner, loss_scale:float=None, max_noskip:int=1000, dynamic:bool=True, clip:float=None,

~/jupyter_py3/lib/python3.6/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
198 else: self.opt.lr,self.opt.wd = lr,wd
199 callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
–> 200 fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
201
202 def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/jupyter_py3/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
99 for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
100 xb, yb = cb_handler.on_batch_begin(xb, yb)
–> 101 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
102 if cb_handler.on_batch_end(loss): break
103

~/jupyter_py3/lib/python3.6/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
28
29 if not loss_func: return to_detach(out), to_detach(yb[0])
—> 30 loss = loss_func(out, *yb)
31
32 if opt is not None:

~/jupyter_py3/lib/python3.6/site-packages/torch/nn/functional.py in mse_loss(input, target, size_average, reduce, reduction)
2657 reduction = _Reduction.legacy_get_string(size_average, reduce)
2658
-> 2659 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2660 return torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2661

~/jupyter_py3/lib/python3.6/site-packages/torch/functional.py in broadcast_tensors(*tensors)
69 if any(type(t) is not Tensor for t in tensors) and has_torch_function(tensors):
70 return handle_torch_function(broadcast_tensors, tensors, *tensors)
—> 71 return _VF.broadcast_tensors(tensors) # type: ignore
72
73

RuntimeError: The size of tensor a (6) must match the size of tensor b (2) at non-singleton dimension 1

Please can anyone help me?