Understanding dimensions of ImageDataBunch

Hey all,

I’m trying to recreate the simple NN for log regression in pytorch for the MNIST_TINY dataset (following lesson4-mnist_sgd.ipynb from the ml1 course) in the new fastaiv1 lib, however I’m receiving a size mismatch when the input data meets the first linear layer.

from fastai import *
from fastai.vision import *
import torch.nn as nn

net = nn.Sequential(
    nn.Linear(28*28, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
    nn.LogSoftmax()
).cuda()

path = untar_data(URLs.MNIST_TINY)
data = ImageDataBunch.from_folder(path)

loss=nn.NLLLoss()
metrics=[accuracy]
opt=optim.SGD(net.parameters(), 1e-1, momentum=0.9, weight_decay=1e-3)

fit(model=net, data=data, epochs=5, loss_func=loss, opt=opt, metrics=metrics)

I get the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-7-5a60934d98ae> in <module>
----> 1 fit(model=net, data=data, epochs=5, loss_func=loss, opt=opt, metrics=metrics)

~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
     92     except Exception as e:
     93         exception = e
---> 94         raise e
     95     finally: cb_handler.on_train_end(exception)
     96 

~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
     82             for xb,yb in progress_bar(data.train_dl, parent=pbar):
     83                 xb, yb = cb_handler.on_batch_begin(xb, yb)
---> 84                 loss = loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     85                 if cb_handler.on_batch_end(loss): break
     86 

~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     16     if not is_listy(xb): xb = [xb]
     17     if not is_listy(yb): yb = [yb]
---> 18     out = model(*xb)
     19     out = cb_handler.on_loss_begin(out)
     20 

~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
     90     def forward(self, input):
     91         for module in self._modules.values():
---> 92             input = module(input)
     93         return input
     94 

~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/modules/linear.py in forward(self, input)
     65     @weak_script_method
     66     def forward(self, input):
---> 67         return F.linear(input, self.weight, self.bias)
     68 
     69     def extra_repr(self):

~/anaconda3/envs/fastaiv1/lib/python3.7/site-packages/torch/nn/functional.py in linear(input, weight, bias)
   1352         ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t())
   1353     else:
-> 1354         output = input.matmul(weight.t())
   1355         if bias is not None:
   1356             output += torch.jit._unwrap_optional(bias)

RuntimeError: size mismatch, m1: [5376 x 28], m2: [784 x 100] at /opt/conda/conda-bld/pytorch-nightly_1544092528828/work/aten/src/THC/generic/THCTensorMathBlas.cu:266

The fit routine works fine if I modify the NN to:

net = nn.Sequential(
    nn.Linear(28,100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
    Lambda(lambda x: x.view(x.size(0),-1)),
    nn.LogSoftmax()
).cuda()

Though i’m not 100% this is correct. In the previous version of the notebook, the first linear layer took in a 28x28 pixel image, so I understood why the dimensions were (28*28,100). Here i’m not certain what the 5376 x28 represents. I believe it’s related to a batch size of 64 (default of the ImageDataBunch class perhaps?) and a channel of 3 but I can’t quite put the pieces together.