Dimention and mse() error while running lr_find(learn)


I have defined a custom data generator since I am using .npz image files like this:

class data_gen(torch.utils.data.Dataset):
    def __init__(self, files):
        self.files = files
    def __getitem__(self, i):
        file1 = self.files[i]
        tmp = np.load(file1, allow_pickle=True)
        img = tmp['x']
        img = np.reshape(img,(1,img.shape[0], img.shape[1]))
        img = torch.from_numpy(img).float()
        return img

    def __len__(self): 
        return len(self.files)

My model has been inspired from here. So the model is as follows:

  (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (CodeIn): Conv2d(256, 32, kernel_size=(2, 2), stride=(1, 1))
  (CodeOut): Sequential(
    (0): UpSample(
      (conv): Conv2d(32, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (ReLU): ReLU(inplace=True)
    (BN): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (Upsample0): Sequential(
    (0): UpSample(
      (conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (ReLU): ReLU(inplace=True)
    (BN): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (Upsample1): Sequential(
    (0): UpSample(
      (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (ReLU): ReLU(inplace=True)
    (BN): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (Upsample2): Sequential(
    (0): UpSample(
      (conv): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (ReLU): ReLU(inplace=True)
    (BN): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (Upsample3): Sequential(
    (0): UpSample(
      (conv): Conv2d(32, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Sigmoid): Sigmoid()

After defining the DataLoader:

train_ds = data_gen(X_train)
test_ds = data_gen(X_test)
dls = DataLoaders.from_dsets(train_ds, test_ds, bs=batch_size, device='cuda:0')

When I use one batch of the data, like this b = dls.one_batch(), I do get what I expect:
m(b).size() = torch.Size([8, 1, 512, 512])
But when I run

learn = Learner(dls=dls,model=m,loss_func=F.mse_loss)

I get the following error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-37-ea924a07f8d6> in <module>
      1 # learn.final_record
----> 2 learn.summary()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/callback/hook.py in summary(self)
    187     "Print a summary of the model, optimizer and loss function."
    188     xb = self.dls.train.one_batch()[:self.dls.train.n_inp]
--> 189     res = module_summary(self, *xb)
    190     res += f"Optimizer used: {self.opt_func}\nLoss function: {self.loss_func}\n\n"
    191     if self.opt is not None:

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/callback/hook.py in module_summary(learn, *xb)
    162     #  thus are not counted inside the summary
    163     #TODO: find a way to have them counted in param number somehow
--> 164     infos = layer_info(learn, *xb)
    165     n,bs = 64,find_bs(xb)
    166     inp_sz = _print_shapes(apply(lambda x:x.shape, xb), bs)

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/callback/hook.py in layer_info(learn, *xb)
    148     with Hooks(flatten_model(learn.model), _track) as h:
    149         batch = apply(lambda o:o[:1], xb)
--> 150         with learn: r = learn.get_preds(dl=[batch], inner=True, reorder=False)
    151         return h.stored

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, reorder, cbs, n_workers, **kwargs)
    233         if with_loss: ctx_mgrs.append(self.loss_not_reduced())
    234         with ContextManagers(ctx_mgrs):
--> 235             self._do_epoch_validate(dl=dl)
    236             if act is None: act = getattr(self.loss_func, 'activation', noop)
    237             res = cb.all_tensors()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_validate(self, ds_idx, dl)
    186         if dl is None: dl = self.dls[ds_idx]
    187         self.dl = dl
--> 188         with torch.no_grad(): self._with_events(self.all_batches, 'validate', CancelValidException)
    190     def _do_epoch(self):

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    163     def _do_one_batch(self):

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
    177         self.iter = i
    178         self._split(b)
--> 179         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    181     def _do_epoch_train(self):

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
    163     def _do_one_batch(self):
--> 164         self.pred = self.model(*self.xb)
    165         self('after_pred')
    166         if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/container.py in forward(self, input)
    115     def forward(self, input):
    116         for module in self:
--> 117             input = module(input)
    118         return input

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/conv.py in forward(self, input)
    418     def forward(self, input: Tensor) -> Tensor:
--> 419         return self._conv_forward(input, self.weight)
    421 class Conv3d(_ConvNd):

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
    413                             weight, self.bias, self.stride,
    414                             _pair(0), self.dilation, self.groups)
--> 415         return F.conv2d(input, weight, self.bias, self.stride,
    416                         self.padding, self.dilation, self.groups)

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 1, 7, 7], but got 3-dimensional input of size [1, 512, 512] instead

So I changed the input image dimension in the data generator to (1,1,img.shape[0], img.shape[1]). Then, for learn.summary(), I get:

Sequential (Input shape: ['1 x 1 x 512 x 512'])
Layer (type)         Output Shape         Param #    Trainable 
Conv2d               1 x 64 x 254 x 254   3,200      True      
BatchNorm2d          1 x 64 x 254 x 254   128        True      
ReLU                 1 x 64 x 254 x 254   0          False     
MaxPool2d            1 x 64 x 127 x 127   0          False     
Conv2d               1 x 64 x 127 x 127   36,864     True      
BatchNorm2d          1 x 64 x 127 x 127   128        True      
ReLU                 1 x 64 x 127 x 127   0          False     
Conv2d               1 x 64 x 127 x 127   36,864     True      
BatchNorm2d          1 x 64 x 127 x 127   128        True      
Conv2d               1 x 64 x 127 x 127   36,864     True      
BatchNorm2d          1 x 64 x 127 x 127   128        True      
ReLU                 1 x 64 x 127 x 127   0          False     
Conv2d               1 x 64 x 127 x 127   36,864     True      
BatchNorm2d          1 x 64 x 127 x 127   128        True      
Conv2d               1 x 64 x 127 x 127   36,864     True      
BatchNorm2d          1 x 64 x 127 x 127   128        True      
ReLU                 1 x 64 x 127 x 127   0          False     
Conv2d               1 x 64 x 127 x 127   36,864     True      
BatchNorm2d          1 x 64 x 127 x 127   128        True      
Conv2d               1 x 128 x 64 x 64    73,728     True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
ReLU                 1 x 128 x 64 x 64    0          False     
Conv2d               1 x 128 x 64 x 64    147,456    True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
Conv2d               1 x 128 x 64 x 64    8,192      True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
Conv2d               1 x 128 x 64 x 64    147,456    True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
ReLU                 1 x 128 x 64 x 64    0          False     
Conv2d               1 x 128 x 64 x 64    147,456    True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
Conv2d               1 x 128 x 64 x 64    147,456    True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
ReLU                 1 x 128 x 64 x 64    0          False     
Conv2d               1 x 128 x 64 x 64    147,456    True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
Conv2d               1 x 128 x 64 x 64    147,456    True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
ReLU                 1 x 128 x 64 x 64    0          False     
Conv2d               1 x 128 x 64 x 64    147,456    True      
BatchNorm2d          1 x 128 x 64 x 64    256        True      
Conv2d               1 x 256 x 32 x 32    294,912    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
ReLU                 1 x 256 x 32 x 32    0          False     
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
Conv2d               1 x 256 x 32 x 32    32,768     True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
ReLU                 1 x 256 x 32 x 32    0          False     
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
ReLU                 1 x 256 x 32 x 32    0          False     
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
ReLU                 1 x 256 x 32 x 32    0          False     
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
ReLU                 1 x 256 x 32 x 32    0          False     
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
ReLU                 1 x 256 x 32 x 32    0          False     
Conv2d               1 x 256 x 32 x 32    589,824    True      
BatchNorm2d          1 x 256 x 32 x 32    512        True      
Conv2d               1 x 32 x 31 x 31     32,800     True      
Conv2d               1 x 256 x 64 x 64    73,984     True      
ReLU                 1 x 256 x 64 x 64    0          False     
BatchNorm2d          1 x 256 x 64 x 64    512        True      
Conv2d               1 x 128 x 128 x 128  295,040    True      
ReLU                 1 x 128 x 128 x 128  0          False     
BatchNorm2d          1 x 128 x 128 x 128  256        True      
Conv2d               1 x 64 x 256 x 256   73,792     True      
ReLU                 1 x 64 x 256 x 256   0          False     
BatchNorm2d          1 x 64 x 256 x 256   128        True      
Conv2d               1 x 32 x 512 x 512   18,464     True      
ReLU                 1 x 32 x 512 x 512   0          False     
BatchNorm2d          1 x 32 x 512 x 512   64         True      
Conv2d               1 x 1 x 512 x 512    289        True      
Sigmoid              1 x 1 x 512 x 512    0          False     

Then, when I ran the code, I was able to get rid of the dimension error but got this:

TypeError                                 Traceback (most recent call last)
<ipython-input-68-b2851d5d28a2> in <module>
      7 #     print(i.shape)
      8 #     count+=1
----> 9 lr_find(learn)

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
    226     n_epoch = num_it//len(self.dls.train) + 1
    227     cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 228     with self.no_logging(): self.fit(n_epoch, cbs=cb)
    229     if show_plot: self.recorder.plot_lr_find()
    230     if suggestions:

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastcore/utils.py in _f(*args, **kwargs)
    471         init_args.update(log)
    472         setattr(inst, 'init_args', init_args)
--> 473         return inst if to_return else f(*args, **kwargs)
    474     return _f

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    205             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    206             self.n_epoch,self.loss = n_epoch,tensor(0.)
--> 207             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    209     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
    195         for epoch in range(self.n_epoch):
    196             self.epoch=epoch
--> 197             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    199     @log_args(but='cbs')

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
    190     def _do_epoch(self):
--> 191         self._do_epoch_train()
    192         self._do_epoch_validate()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
    181     def _do_epoch_train(self):
    182         self.dl = self.dls.train
--> 183         self._with_events(self.all_batches, 'train', CancelTrainException)
    185     def _do_epoch_validate(self, ds_idx=1, dl=None):

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    163     def _do_one_batch(self):

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
    177         self.iter = i
    178         self._split(b)
--> 179         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    181     def _do_epoch_train(self):

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
    164         self.pred = self.model(*self.xb)
    165         self('after_pred')
--> 166         if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)
    167         self('after_loss')
    168         if not self.training or not len(self.yb): return

TypeError: mse_loss() takes from 2 to 5 positional arguments but 8 were given

What should I do?

@sgugger @muellerzr, what do you suggest? Since my image files are of .npz type, how can I proceed?

Hi Sarvagya,

You are more likely to receive help if you post the complete stack trace and the definition for model. Please see

Hope this helps you to get the help you are asking for. :slightly_smiling_face:

1 Like


Thank you for the response. I have edited by question in the required way. I hope the post is now appropriate.

Good morning (from where I live),

Thanks for making a great revision of your original question! It could serve as a model for how to ask the forums effectively.

learn.summary() shows that your model is working as you expect. It receives a batch of a single image sized [bs(1),channels,h,w] and outputs a batch of size [bs(1), channels, h,w]. That’s good to know!

What I see is that data_gen’s __getitem__, derived from Dataset, should return a tuple of one input sample and its target, while yours is returning only the sample. This is probably confusing the fastai training loop and producing a misleading error. Give this theory a try and let us know what happens. :slightly_smiling_face:

1 Like

I changed the return to this: return (img, img) and it worked like a charm for training.

However, for m(b).size() where b = dls.one_batch() and m is the model, I get the following error:

TypeError                                 Traceback (most recent call last)
<ipython-input-21-fe2ee2f7575c> in <module>
      1 # autoencoder(b)
----> 2 m(b).size()

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/container.py in forward(self, input)
    115     def forward(self, input):
    116         for module in self:
--> 117             input = module(input)
    118         return input

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    720             result = self._slow_forward(*input, **kwargs)
    721         else:
--> 722             result = self.forward(*input, **kwargs)
    723         for hook in itertools.chain(
    724                 _global_forward_hooks.values(),

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/conv.py in forward(self, input)
    418     def forward(self, input: Tensor) -> Tensor:
--> 419         return self._conv_forward(input, self.weight)
    421 class Conv3d(_ConvNd):

~/anaconda3/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
    413                             weight, self.bias, self.stride,
    414                             _pair(0), self.dilation, self.groups)
--> 415         return F.conv2d(input, weight, self.bias, self.stride,
    416                         self.padding, self.dilation, self.groups)

TypeError: conv2d(): argument 'input' (position 1) must be Tensor, not tuple

What should I do?

one_batch returns a tuple of (input batch, target batch), not the input batch alone. The correct usage is

x,y = dls.one_batch()

Some advice on what you should do: All of your bugs involved not understanding and checking the shapes of variables. Try to learn the expected shapes for a Dataset item, a batch, an image input, a target, a model output, a weight matrix, etc. Then as you go along in the notebook, predict in your mind and then check the shapes of the variables at each step. This practice will help you both to understand the overall process and to find many bugs.

P.S. I am not sure about this fix.

return (img, img)

Do you want your model to predict its exact input? The second element of the tuple should be the target, like a category or segmentation map.

Edit: Oops, never mind… I see you are training some kind of auto-encoder.

I am literally being so stupid :sweat_smile: :sweat_smile:

Yeah. I want to extract dimensionally reduced features of the image for some unsupervised learning.

I have been there too. Thanks for letting me help. :slightly_smiling_face:

1 Like