Chapter 4 Further Research: Learner Matrix Multiplication Error

alejandrodgb · June 30, 2022, 2:05am

Hi, I am working on the further research section of chapter 4. I have built my own implementation of the Learner but keep getting a matrix multiplication problem (RuntimeError: mat1 and mat2 shapes cannot be multiplied (5376x28 and 784x30)). Here is my code:

Learner

class Lrner:
    def __init__(self, dls, model, opt,loss_fn, metric):
        self.dl_train = dls[0]
        self.dl_valid = dls[1]
        self.model = model
        self.opt = opt(self.model.parameters(),lr=0.1)
        self.loss = loss_fn
        self.metric = metric
        
    def cal_grad(self, xb, yb):
        preds = self.model(xb)
        loss = self.loss(preds, yb)
        loss.backward()
    
    def train_epoch(self):
        for xb, yb in self.dl_train:
            self.cal_grad(xb, yb)
            self.opt.step()
            self.opt.zero_grad()

    def validate_epoch(self):
        accs = [self.metric(self.model(xb),yb) for xb, yb in self.dl_valid]
        return round(torch.stack(accs).mean().item(),4)

    def fit(self, epochs):
        for i in range(epochs):
            self.train_epoch()
            print(self.validate_epoch(),end=' ')

Model

simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,1),
    nn.Sigmoid()
)

Dataloaders

I am using the same dataloaders created in the chapter 4 exercises
dls = DataLoaders(dl, valid_dl)

Error

I instantiate my class:
learner = Lrner(dls, simple_net, SGD, F.cross_entropy, accuracy)

I check the model:
In: learner.model
Out:

Sequential(
  (0): Linear(in_features=784, out_features=30, bias=True)
  (1): ReLU()
  (2): Linear(in_features=30, out_features=1, bias=True)
  (3): Sigmoid()
)

I try to fit it for one epoch:
In: learner.fit(1)
Out:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/var/folders/f1/bcv5hnqs1rd944j8hkw5gyb40000gn/T/ipykernel_3189/3857249191.py in <module>
----> 1 learner.fit(1)

/var/folders/f1/bcv5hnqs1rd944j8hkw5gyb40000gn/T/ipykernel_3189/1438402721.py in fit(self, epochs)
     25     def fit(self, epochs):
     26         for i in range(epochs):
---> 27             self.train_epoch()
     28             print(self.validate_epoch(),end=' ')

/var/folders/f1/bcv5hnqs1rd944j8hkw5gyb40000gn/T/ipykernel_3189/1438402721.py in train_epoch(self)
     15     def train_epoch(self):
     16         for xb, yb in self.dl_train:
---> 17             self.cal_grad(xb, yb)
     18             self.opt.step()
     19             self.opt.zero_grad()

/var/folders/f1/bcv5hnqs1rd944j8hkw5gyb40000gn/T/ipykernel_3189/1438402721.py in cal_grad(self, xb, yb)
      9 
     10     def cal_grad(self, xb, yb):
---> 11         preds = self.model(xb)
     12         loss = self.loss(preds, yb)
     13         loss.backward()

~/opt/anaconda3/envs/fastai/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

~/opt/anaconda3/envs/fastai/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
    139     def forward(self, input):
    140         for module in self:
--> 141             input = module(input)
    142         return input
    143 

~/opt/anaconda3/envs/fastai/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1108         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1109                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110             return forward_call(*input, **kwargs)
   1111         # Do not call functions when jit is used
   1112         full_backward_hooks, non_full_backward_hooks = [], []

~/opt/anaconda3/envs/fastai/lib/python3.7/site-packages/torch/nn/modules/linear.py in forward(self, input)
    101 
    102     def forward(self, input: Tensor) -> Tensor:
--> 103         return F.linear(input, self.weight, self.bias)
    104 
    105     def extra_repr(self) -> str:

~/opt/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/torch_core.py in __torch_function__(self, func, types, args, kwargs)
    339         convert=False
    340         if _torch_handled(args, self._opt, func): convert,types = type(self),(torch.Tensor,)
--> 341         res = super().__torch_function__(func, types, args=args, kwargs=kwargs)
    342         if convert: res = convert(res)
    343         if isinstance(res, TensorBase): res.set_meta(self, as_copy=True)

~/opt/anaconda3/envs/fastai/lib/python3.7/site-packages/torch/_tensor.py in __torch_function__(cls, func, types, args, kwargs)
   1140 
   1141         with _C.DisableTorchFunction():
-> 1142             ret = func(*args, **kwargs)
   1143             if func in get_default_nowrap_functions():
   1144                 return ret

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5376x28 and 784x30)

I understand that the error refers to a multiplication of two matrices. I assume the (784 x 30) matrix is the inputs (28*28,30) but I have no idea where the (5376x28) matrix comes from.

Troubleshooting

I have tried testing with a single fully connected linear layer nn.Linear(28*28,1) and copied the code from this response in the FastAi forums but haven’t been able to get anywhere.

Would anyone know where the (5376x28) matrix comes from and how to fix this?

alejandrodgb · June 30, 2022, 3:18am

Figured it out. The dataloader was loading from ImageDataLoaders rather than the original dls.

dls = ImageDataLoaders.from_folder(path,num_workers=0)