Hey all,
I’m trying to recreate the simple NN for log regression in pytorch for the MNIST_TINY dataset (following lesson4-mnist_sgd.ipynb from the ml1 course) in the new fastaiv1 lib, however I’m receiving a size mismatch when the input data meets the first linear layer.
from fastai import *
from fastai.vision import *
import torch.nn as nn
net = nn.Sequential(
nn.Linear(28*28, 100),
nn.ReLU(),
nn.Linear(100, 100),
nn.ReLU(),
nn.Linear(100, 10),
nn.LogSoftmax()
).cuda()
path = untar_data(URLs.MNIST_TINY)
data = ImageDataBunch.from_folder(path)
loss=nn.NLLLoss()
metrics=[accuracy]
opt=optim.SGD(net.parameters(), 1e-1, momentum=0.9, weight_decay=1e-3)
fit(model=net, data=data, epochs=5, loss_func=loss, opt=opt, metrics=metrics)
I get the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-7-5a60934d98ae> in <module>
----> 1 fit(model=net, data=data, epochs=5, loss_func=loss, opt=opt, metrics=metrics)
~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
92 except Exception as e:
93 exception = e
---> 94 raise e
95 finally: cb_handler.on_train_end(exception)
96
~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
82 for xb,yb in progress_bar(data.train_dl, parent=pbar):
83 xb, yb = cb_handler.on_batch_begin(xb, yb)
---> 84 loss = loss_batch(model, xb, yb, loss_func, opt, cb_handler)
85 if cb_handler.on_batch_end(loss): break
86
~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
16 if not is_listy(xb): xb = [xb]
17 if not is_listy(yb): yb = [yb]
---> 18 out = model(*xb)
19 out = cb_handler.on_loss_begin(out)
20
~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
--> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)
~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
90 def forward(self, input):
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
94
~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
487 result = self._slow_forward(*input, **kwargs)
488 else:
--> 489 result = self.forward(*input, **kwargs)
490 for hook in self._forward_hooks.values():
491 hook_result = hook(self, input, result)
~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/modules/linear.py in forward(self, input)
65 @weak_script_method
66 def forward(self, input):
---> 67 return F.linear(input, self.weight, self.bias)
68
69 def extra_repr(self):
~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1352 ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t())
1353 else:
-> 1354 output = input.matmul(weight.t())
1355 if bias is not None:
1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [5376 x 28], m2: [784 x 100] at /opt/conda/conda-bld/pytorch-nightly_1544092528828/work/aten/src/THC/generic/THCTensorMathBlas.cu:266
The fit routine works fine if I modify the NN to:
net = nn.Sequential(
nn.Linear(28,100),
nn.ReLU(),
nn.Linear(100, 100),
nn.ReLU(),
nn.Linear(100, 10),
Lambda(lambda x: x.view(x.size(0),-1)),
nn.LogSoftmax()
).cuda()
Though i’m not 100% this is correct. In the previous version of the notebook, the first linear layer took in a 28x28 pixel image, so I understood why the dimensions were (28*28,100). Here i’m not certain what the 5376 x28 represents. I believe it’s related to a batch size of 64 (default of the ImageDataBunch class perhaps?) and a channel of 3 but I can’t quite put the pieces together.