I’m creating a SSD Model with fastai v1. My version od SSD gets hook outputs from the backbone and does some conlvolutions on top of them and gets results. My model has a custom head after the backbone too. But i dont know how to get the discriminative learning rates on different parts of this model.
class SSD_Model(nn.Module):
def __init__(self, grids = [4, 2, 1], backbone_grids = [28, 14, 7],
backbone = models.xresnet34, num_categories = None, anchors_per_cell = 9
):
super().__init__()
self.num_categories = num_categories
self.anchors_per_cell = anchors_per_cell
self.backbone = nn.Sequential(*list(backbone().children())[:-3])
self.backbone_grids = backbone_grids
self.grids = grids
self.hook_convs()
self.setup_out_convs()
if torch.cuda.is_available(): self.device = torch.device('cuda')
else: self.device = torch.device('cpu')
self.setup_device()
def forward(self, x):
classification_results = []
bbox_results = []
out = self.backbone(x)
for resblock, outblock in zip(self.conv_blocks, self.out_blocks):
out = resblock(out)
clas_out, bbox_out = outblock(out)
classification_results.append(clas_out); bbox_results.append(bbox_out)
for idx, module_list in enumerate(self.backbone_convs):
current_hook = self.outputs[idx].stored
clas_out, bbox_out = module_list(current_hook)
classification_results.append(clas_out); bbox_results.append(bbox_out)
return classification_results, bbox_results
def register_hooks(self):
self.outputs = []
self.backbone_idxs = model_idx_for_gridsize(self.backbone)
for grid_size, index in self.backbone_idxs:
self.outputs.append(hook_output(self.backbone[index], detach = False))
def module_list_for_hooks(self):
self.register_hooks()
self.backbone_convs = []
for idx, hook in enumerate(self.outputs):
self.backbone_convs.append(nn.Sequential())
def hook_convs(self, num_id_convs = 1):
self.module_list_for_hooks()
dummy_out = self.backbone(self.dummy_batch)
for idx, module_list in enumerate(self.backbone_convs):
current_hook_out = self.outputs[idx].stored
_, in_channels, grid, grid = current_hook_out.shape
nf = 256
for i in range(num_id_convs):
module_list.add_module(f'idconv{i}', ResBlock(in_channels, nf, stride = 1))
if nf > 64: nf /= 2
module_list.add_module(f'out_conv{i}', OutConv(int(nf*2), num_categories = self.num_categories,
num_anchorbxs = self.anchors_per_cell))
@property
def dummy_batch(self, input_shape = 224): return torch.rand((2, 3, input_shape, input_shape))
@property
def backbone_final_channel(self):
_, channel, grid, grid = self.backbone(self.dummy_batch).shape
return channel
def setup_out_convs(self):
self.conv_blocks = []; nf = 256; self.out_blocks = []
for idx, grid_size in enumerate(self.grids):
self.conv_blocks.append(ResBlock(self.backbone_final_channel if idx == 0 else nf, nf, stride = 2,
downsampler = None if idx == 0 else nn.AvgPool2d(2, 2, ceil_mode= True)))
self.out_blocks.append(OutConv(nf, num_categories = self.num_categories, num_anchorbxs = self.anchors_per_cell))
def setup_device(self):
self.backbone = self.backbone.to(self.device)
self.conv_blocks = [blk.to(self.device) for blk in self.conv_blocks]
self.out_blocks = [blk.to(self.device) for blk in self.out_blocks]
self.backbone_convs = [blk.to(self.device) for blk in self.backbone_convs]
can somebody advice me on how to get discriminative learning rates on different parts of this model which isnt sequential.
Note: This model is working and giving me the desired output of the appropriate shapes.
My models output shapes without flattening them
[torch.Size([5, 189, 4, 4]),
torch.Size([5, 189, 2, 2]),
torch.Size([5, 189, 1, 1]),
torch.Size([5, 189, 28, 28]),
torch.Size([5, 189, 14, 14]),
torch.Size([5, 189, 7, 7])],
[torch.Size([5, 36, 4, 4]),
torch.Size([5, 36, 2, 2]),
torch.Size([5, 36, 1, 1]),
torch.Size([5, 36, 28, 28]),
torch.Size([5, 36, 14, 14]),
torch.Size([5, 36, 7, 7])])
I’m using six grids -> three of which is hooked from the model and other three a custom head to backbone
This is my arcchitecture diagram.