Model Troubles

Hi all, I’m working on implementing NTS-Net into fast-ai at the moment. They provide the code so I’m working off of that. When I try to train, I run into an error on the forward methods. Here is the code up to where it fails:

class attention_net(nn.Module):
    def __init__(self, topN=4):
        super(attention_net, self).__init__()
        self.pretrained_model = resnet.resnet50(pretrained=True)
        self.pretrained_model.avgpool = nn.AdaptiveAvgPool2d(1)
        # self.pretrained_model.fc = nn.Linear(512 * 4, 200)
        self.pretrained_model.fc = nn.Linear(512 * 4, FC_NUMS)
        self.proposal_net = ProposalNet()
        self.topN = topN
        # self.concat_net = nn.Linear(2048 * (CAT_NUM + 1), 200)
        self.concat_net = nn.Linear(2048 * (CAT_NUM + 1), FC_NUMS)
        # self.partcls_net = nn.Linear(512 * 4, 200)
        self.partcls_net = nn.Linear(512 * 4, FC_NUMS)
        _, edge_anchors, _ = generate_default_anchor_maps()
        self.pad_side = 224
        self.edge_anchors = (edge_anchors + 224).astype(np.int)

    def forward(self, x):
        resnet_out, rpn_feature, feature = self.pretrained_model(x)
        x_pad = F.pad(x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0)
        batch = x.size(0)
        # we will reshape rpn to shape: batch * nb_anchor
        rpn_score = self.proposal_net(rpn_feature.detach())
        all_cdds = [
            np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1)
            for x in rpn_score.data.cpu().numpy()]

The error traces back to that for statement, with
all the input array dimensions except for the concatenation axis must match exactly

When I implemented this in pure pytorch first it was training without issues so I don’t quite know what changed in fastai or what needs to be changed

Please let me know also the best practices for debugging models like this? Or lead me in that right direction if possible? :slight_smile:

Thanks!

Edit: here is the source code I am using:

Again I did get this working in pytorch after some edits, I’m just trying to get it into fastai now

This looks like a tensor dimension mismatch.

First you can look at the model (summary) if you find already something there.

If this was not helpful you can try to put a pdb.set_trace() or a debugger layer in the forward method before you do the concatenation, then run a dummy batch through the NN, and then check the shapes of the tensors that go into the concatenation operation and the concatenation operation itself.

I hope this is helpful. If you have question just ask. :slight_smile:

1 Like

@MicPie Thank you so much! IT was a dimension missmatch. Now though I receive this:

/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
    197         callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
    198         if defaults.extra_callbacks is not None: callbacks += defaults.extra_callbacks
--> 199         fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
    200 
    201     def create_opt(self, lr:Floats, wd:Floats=0.)->None:

/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
     99             for xb,yb in progress_bar(learn.data.train_dl, parent=pbar):
    100                 xb, yb = cb_handler.on_batch_begin(xb, yb)
--> 101                 loss = loss_batch(learn.model, xb, yb, learn.loss_func, learn.opt, cb_handler)
    102                 if cb_handler.on_batch_end(loss): break
    103 

/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
     31 
     32     if opt is not None:
---> 33         loss,skip_bwd = cb_handler.on_backward_begin(loss)
     34         if not skip_bwd:                     loss.backward()
     35         if not cb_handler.on_backward_end(): opt.step()

/usr/local/lib/python3.6/dist-packages/fastai/callback.py in on_backward_begin(self, loss)
    290         self.smoothener.add_value(loss.detach().cpu())
    291         self.state_dict['last_loss'], self.state_dict['smooth_loss'] = loss, self.smoothener.smooth
--> 292         self('backward_begin', call_mets=False)
    293         return self.state_dict['last_loss'], self.state_dict['skip_bwd']
    294 

/usr/local/lib/python3.6/dist-packages/fastai/callback.py in __call__(self, cb_name, call_mets, **kwargs)
    249         if call_mets:
    250             for met in self.metrics: self._call_and_update(met, cb_name, **kwargs)
--> 251         for cb in self.callbacks: self._call_and_update(cb, cb_name, **kwargs)
    252 
    253     def set_dl(self, dl:DataLoader):

/usr/local/lib/python3.6/dist-packages/fastai/callback.py in _call_and_update(self, cb, cb_name, **kwargs)
    239     def _call_and_update(self, cb, cb_name, **kwargs)->None:
    240         "Call `cb_name` on `cb` and update the inner state."
--> 241         new = ifnone(getattr(cb, f'on_{cb_name}')(**self.state_dict, **kwargs), dict())
    242         for k,v in new.items():
    243             if k not in self.state_dict:

/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py in on_backward_begin(self, smooth_loss, **kwargs)
    468         self.losses.append(smooth_loss)
    469         if self.pbar is not None and hasattr(self.pbar,'child'):
--> 470             self.pbar.child.comment = f'{smooth_loss:.4f}'
    471 
    472     def on_epoch_end(self, epoch:int, num_batch:int, smooth_loss:Tensor,

/usr/local/lib/python3.6/dist-packages/torch/tensor.py in __format__(self, format_spec)
    385         if self.dim() == 0:
    386             return self.item().__format__(format_spec)
--> 387         return object.__format__(self, format_spec)
    388 
    389     def __ipow__(self, other):

TypeError: unsupported format string passed to Tensor.__format__

To me it looks like something may be wrong with the loss function or callbacks?