I am trying to do some transfer learning from categorical models to scalar models. The images are the same; I would just like to apply models trained with the categorical learner to scalar data. I think I am not correctly reshaping the model, though. Broadly, I think this is similar to doing transfer learning from a model trained on 2 classes to data having a different number of classes.
- I created categorical data sets and their scalar equivalents. I then trained a resnet50
ConvLearner
on the categorical data and saved that to disk. - Next, I generated a
ConvLearner
with the same settings, but with scalar data. Loading my 2-category model weights into the ConvLearner that has been primed with scalar-shaped data leads to two errors:
size mismatch for 1.8.weight: copying a param of torch.Size([2, 512]) from checkpoint, where the shape is torch.Size([1, 512]) in current model.
size mismatch for 1.8.bias: copying a param of torch.Size([2]) from checkpoint, where the shape is torch.Size([1]) in current model.
That error makes sense, since I have a learner with data having 1 output that is trying to load weights from data with 2 outputs.
- So, I instead create a ConvLearner with the original 2-category data, and after I have loaded the model weights, I then reshape the last layer, replace the categorical data with the scalar data, and the loss_func with MSE:
learn_scalar = ConvLearner(data, # Note: load with data that is categorical-compatible
models.resnet50,
metrics=[exp_rmspe],
ps=0.8,
callback_fns=ShowGraph)
learn_scalar.load(name='categorical.mod')
# Replace last layer to have 1 output instead of 2
removed = list(learn_scalar.model.children())[:-1]
learn_scalar.model = torch.nn.Sequential(*removed)
learn_scalar.model = torch.nn.Sequential(learn_scalar.model, torch.nn.Linear(512, 1))
learn_scalar.model.to('cuda')
# Replace the categorical data with the scalar data
learn_scalar.data = data_scalar
# Replace the loss function with MSELossFlat() instead of crossentropy
learn_scalar.loss_func = MSELossFlat()
However, now I get an error informing me that the input and target shapes do not match.
# Train the last layer
learn_scalar.fit_one_cycle(1, slice(3e-2, 2e-1), pct_start=0.05)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-138-02adda90e460> in <module>()
1 # Train the last layer
----> 2 learn_scalar.fit_one_cycle(1, slice(3e-2, 2e-1), pct_start=0.05)
/app/fastai/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, wd, callbacks, **kwargs)
17 callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor,
18 pct_start=pct_start, **kwargs))
---> 19 learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
20
21 def lr_find(learn:Learner, start_lr:Floats=1e-7, end_lr:Floats=10, num_it:int=100, **kwargs:Any):
/app/fastai/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
135 callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
136 fit(epochs, self.model, self.loss_func, opt=self.opt, data=self.data, metrics=self.metrics,
--> 137 callbacks=self.callbacks+callbacks)
138
139 def create_opt(self, lr:Floats, wd:Floats=0.)->None:
/app/fastai/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
87 except Exception as e:
88 exception = e
---> 89 raise e
90 finally: cb_handler.on_train_end(exception)
91
/app/fastai/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
77 for xb,yb in progress_bar(data.train_dl, parent=pbar):
78 xb, yb = cb_handler.on_batch_begin(xb, yb)
---> 79 loss = loss_batch(model, xb, yb, loss_func, opt, cb_handler)[0]
80 if cb_handler.on_batch_end(loss): break
81
/app/fastai/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
16 if not is_listy(xb): xb = [xb]
17 if not is_listy(yb): yb = [yb]
---> 18 out = model(*xb)
19 out = cb_handler.on_loss_begin(out)
20
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/container.py in forward(self, input)
90 def forward(self, input):
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
94
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py in forward(self, input)
61
62 def forward(self, input):
---> 63 return F.linear(input, self.weight, self.bias)
64
65 def extra_repr(self):
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1138 return torch.addmm(bias, input, weight.t())
1139
-> 1140 output = input.matmul(weight.t())
1141 if bias is not None:
1142 output += bias
RuntimeError: size mismatch, m1: [1048576 x 4], m2: [512 x 1] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:266
I am guessing there is an additional modification to the model that I need to make (or that my modification was way off). Any guidance?