How can we implement a model which is trained on 3 class data to be trained on 2 class data?

joshiharshit5077 · September 15, 2022, 5:35am

Hey everyone,
I have been experiencing error while training my new databunch for my already trained retinanet model. The issue is I had trained my retinannet model on a data which had 3 classes while training, and i have saved the learner using learner.save. Now i want to use the weights of the already trained model for learning on my new dataset. The new dataset has 2 classes only also the number of images for training has reduced from 3k to nearly 100,can anyone explain to me how can i perform training of pre trained retinanet model.
PFA, the code for already trained model:

batch_size = 64

do_flip = True
flip_vert = True 
max_rotate = 90 
max_zoom = 1.1 
max_lighting = 0.2
max_warp = 0.2
p_affine = 0.75 
p_lighting = 0.75 

tfms = get_transforms(do_flip=do_flip,
                      flip_vert=flip_vert,
                      max_rotate=max_rotate,
                      max_zoom=max_zoom,
                      max_lighting=max_lighting,
                      max_warp=max_warp,
                      p_affine=p_affine,
                      p_lighting=p_lighting)

train, valid = ObjectItemListSlide(train_images), ObjectItemListSlide(valid_images)
item_list = ItemLists(".", train, valid)
lls = item_list.label_from_func(lambda x: x.y, label_cls=SlideObjectCategoryList)
lls = lls.transform(tfms, tfm_y=True, size=patch_size)
data = lls.databunch(bs=batch_size, collate_fn=bb_pad_collate,num_workers=0).normalize()

backbone = "ResNet34" #["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet150"]

backbone_model = models.resnet18
if backbone == "ResNet34":
    backbone_model = models.resnet34
if backbone == "ResNet50":
    backbone_model = models.resnet50
if backbone == "ResNet101":
    backbone_model = models.resnet101
if backbone == "ResNet150":
    backbone_model = models.resnet150

pre_trained_on_imagenet = True
encoder = create_body(models.resnet34, pre_trained_on_imagenet, -2)


loss_function = "FocalLoss" 

if loss_function == "FocalLoss":
    crit = RetinaNetFocalLoss(anchors)


channels = 128 


final_bias = -4 


n_conv = 3 
model = RetinaNet(encoder, n_classes=data.train_ds.c, 
                  n_anchors=len(scales) * len(ratios), 
                  sizes=[size[0] for size in sizes], 
                  chs=channels, # number of hidden layers for the classification head
                  final_bias=final_bias,
                  n_conv=n_conv # Number of hidden layers
                  )

voc = PascalVOCMetric(anchors, patch_size, [str(i) for i in data.train_ds.y.classes[1:]])
voc
learn = Learner(data, model, loss_func=crit, 
                callback_fns=[BBMetrics,ShowGraph,CSVLogger,partial(GradientClipping, clip=5.0],metrics=[voc])  

learn.split([model.encoder[6], model.c5top5])
learn.freeze_to(-2)
learn.lr_find()
learn.recorder.plot(suggestion=True)
plt.savefig('lr_finder_plot_bs64_400+.png')
from fastai.callbacks import SaveModelCallback
# Train with the callback function set to save weights every epoch
learn.unfreeze()
max_learning_rate = 1e-3
cyc_len = 1000
learn.fit_one_cycle(cyc_len, max_learning_rate,callbacks=[SaveModelCallback(learn, monitor='train_loss',
                   name='best_train_loss')])
learn.save("trained_model_1000", return_path=True,with_opt=True)

And now this is the code for loading my new databunch and learner model


batch_size = 16

do_flip = True
flip_vert = True 
max_rotate = 90 
max_zoom = 1.1 
max_lighting = 0.2
max_warp = 0.2
p_affine = 0.75 
p_lighting = 0.75 

tfms = get_transforms(do_flip=do_flip,
                      flip_vert=flip_vert,
                      max_rotate=max_rotate,
                      max_zoom=max_zoom,
                      max_lighting=max_lighting,
                      max_warp=max_warp,
                      p_affine=p_affine,
                      p_lighting=p_lighting)
train, valid = ObjectItemListSlide(train_images) ,ObjectItemListSlide(valid_images)
item_list = ItemLists(".", train, valid)
lls = item_list.label_from_func(lambda x: x.y, label_cls=SlideObjectCategoryList)
lls = lls.transform(tfms, tfm_y=True, size=patch_size)
data = lls.databunch(bs=batch_size, collate_fn=bb_pad_collate,num_workers=0).normalize()

backbone = "ResNet34" #["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet150"]

backbone_model = models.resnet18
if backbone == "ResNet34":
    backbone_model = models.resnet34
if backbone == "ResNet50":
    backbone_model = models.resnet50
if backbone == "ResNet101":
    backbone_model = models.resnet101
if backbone == "ResNet150":
    backbone_model = models.resnet150

pre_trained_on_imagenet = True 
encoder = create_body(backbone_model, pre_trained_on_imagenet, -2)


loss_function = "FocalLoss" 

if loss_function == "FocalLoss":
    crit = RetinaNetFocalLoss(anchors)


channels = 128 


final_bias = -4 


n_conv = 3 
model = RetinaNet(encoder, n_classes=data.train_ds.c, 
                  n_anchors=len(scales) * len(ratios), 
                  sizes=[size[0] for size in sizes], 
                  chs=channels, # number of hidden layers for the classification head
                  final_bias=final_bias,
                  n_conv=n_conv # Number of hidden layers
                  )

voc = PascalVOCMetric(anchors, patch_size, [str(i) for i in data.train_ds.y.classes[1:]])
voc
learn = Learner(data, model, loss_func=crit, 
                callback_fns=[BBMetrics,ShowGraph,CSVLogger,partial(GradientClipping, clip=5.0)],metrics=[voc])  

learn.split([model.encoder[6], model.c5top5])
learn.freeze_to(-2)
#here I am loading the learner model using the already saved learner model using #torch.save(learn.model,PATH)
learn.model=torch.load('/content/drive/MyDrive/model.pt',map_location=torch.device('cpu'))
#learn.model.eval()
max_learning_rate = 1e-3
cyc_len = 50
batch_size=64
learn.fit_one_cycle(cyc_len, max_learning_rate,callbacks=[SaveModelCallback(learn, monitor='train_loss', name='best_train_loss')])

I am getting this error while learn.fit:


 Starting Training with  n= 50 epochs with batch_size= 64 


 0.00% [0/50 00:00<?]
epoch	train_loss	valid_loss	pascal_voc_metric	BBloss	focal_loss	AP-Mitosis	time

 16.67% [1/6 00:01<00:09]
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-27-2cf0edfb1663> in <module>
      4 print("\n Starting Training with  n=",cyc_len,"epochs with batch_size=",batch_size,"\n")
      5 learn.fit_one_cycle(cyc_len, max_learning_rate,callbacks=[SaveModelCallback(learn, monitor='train_loss',
----> 6                                                                             name='best_train_loss')])

7 frames
/usr/local/lib/python3.7/dist-packages/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, final_div, wd, callbacks, tot_epochs, start_epoch)
     21     callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor, pct_start=pct_start,
     22                                        final_div=final_div, tot_epochs=tot_epochs, start_epoch=start_epoch))
---> 23     learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
     24 
     25 def fit_fc(learn:Learner, tot_epochs:int=1, lr:float=defaults.lr,  moms:Tuple[float,float]=(0.95,0.85), start_pct:float=0.72,

/usr/local/lib/python3.7/dist-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    198         else: self.opt.lr,self.opt.wd = lr,wd
    199         callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
--> 200         fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
    201 
    202     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

/usr/local/lib/python3.7/dist-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
    104             if not cb_handler.skip_validate and not learn.data.empty_val:
    105                 val_loss = validate(learn.model, learn.data.valid_dl, loss_func=learn.loss_func,
--> 106                                        cb_handler=cb_handler, pbar=pbar)
    107             else: val_loss=None
    108             if cb_handler.on_epoch_end(val_loss): break

/usr/local/lib/python3.7/dist-packages/fastai/basic_train.py in validate(model, dl, loss_func, cb_handler, pbar, average, n_batch)
     61             if not is_listy(yb): yb = [yb]
     62             nums.append(first_el(yb).shape[0])
---> 63             if cb_handler and cb_handler.on_batch_end(val_losses[-1]): break
     64             if n_batch and (len(nums)>=n_batch): break
     65         nums = np.array(nums, dtype=np.float32)

/usr/local/lib/python3.7/dist-packages/fastai/callback.py in on_batch_end(self, loss)
    306         "Handle end of processing one batch with `loss`."
    307         self.state_dict['last_loss'] = loss
--> 308         self('batch_end', call_mets = not self.state_dict['train'])
    309         if self.state_dict['train']:
    310             self.state_dict['iteration'] += 1

/usr/local/lib/python3.7/dist-packages/fastai/callback.py in __call__(self, cb_name, call_mets, **kwargs)
    248         "Call through to all of the `CallbakHandler` functions."
    249         if call_mets:
--> 250             for met in self.metrics: self._call_and_update(met, cb_name, **kwargs)
    251         for cb in self.callbacks: self._call_and_update(cb, cb_name, **kwargs)
    252 

/usr/local/lib/python3.7/dist-packages/fastai/callback.py in _call_and_update(self, cb, cb_name, **kwargs)
    239     def _call_and_update(self, cb, cb_name, **kwargs)->None:
    240         "Call `cb_name` on `cb` and update the inner state."
--> 241         new = ifnone(getattr(cb, f'on_{cb_name}')(**self.state_dict, **kwargs), dict())
    242         for k,v in new.items():
    243             if k not in self.state_dict:

/usr/local/lib/python3.7/dist-packages/object_detection_fastai/callbacks/callbacks.py in on_batch_end(self, last_output, last_target, **kwargs)
    153             num_boxes = len(bbox_gt) * 3
    154             for box, cla, scor in list(zip(bbox_pred, preds, scores))[:num_boxes]:
--> 155                 temp = BoundingBox(imageName=str(self.imageCounter), classId=self.metric_names_original[cla], x=box[0], y=box[1],
    156                                    w=box[2], h=box[3], typeCoordinates=CoordinatesType.Absolute, classConfidence=scor,
    157                                    bbType=BBType.Detected, format=BBFormat.XYWH, imgSize=(self.size, self.size))

IndexError: list index out of range

Can anyone tell how to resolve this issue , or what might be causing this issue?
P.S. I have tried loading the model using torch.save_dict and loading through torch.load_state_dict it gives me classifier weight mismatch error(which is obvious):

learn.model.load_state_dict(torch.load("/content/drive/MyDrive/myModel.pth",map_location=torch.device('cpu')))

The error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-56-229e34fc2c49> in <module>
---> 46 learn.model.load_state_dict(torch.load("/content/drive/MyDrive/myModel.pth",map_location=torch.device('cpu')))
     47 #learn.model.eval()

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in load_state_dict(self, state_dict, strict)
   1496         if len(error_msgs) > 0:
   1497             raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
-> 1498                                self.__class__.__name__, "\n\t".join(error_msgs)))
   1499         return _IncompatibleKeys(missing_keys, unexpected_keys)
   1500 

RuntimeError: Error(s) in loading state_dict for RetinaNet:
	size mismatch for classifier.3.weight: copying a param with shape torch.Size([3, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([2, 128, 3, 3]).
	size mismatch for classifier.3.bias: copying a param with shape torch.Size([3]) from checkpoint, the shape in current model is torch.Size([2]).

Thanks everyone for this wonderful forum.