Custom loss function giving nans?

I’m working on a project integrating custom pytorch objects in to the fastai training api. Right now I’m a little confused about using a custom loss function. Two problems, firstly I’m just getting nans in the learner and secondly it won’t print out from my print statements for debugging. Anybody have any idea what’s going on?

Custom loss function:

def multitaskloss(preds, targets):
    loss = 0
    for ii, targ_loc in enumerate(targets['locs']):
        loss += nn.MSELoss(preds[0][ii], targ_loc)

    for ii, targ_label in enumerate(targets['labels']):
        loss += nn.BCEWithLogitsLoss(preds[1][ii], targ_label)
    return loss

Some more of the code in case it’s helpful

class llni_Dataset(
    def __init__(self, df):
        self.X = df[cont_names]
        self.y = df[targets]     
    def __len__(self): return len(self.y)
    def __getitem__(self, idx): 
        # convert X
        X = torch.as_tensor(self.X.loc[idx].values, dtype=torch.float32)
        # set up the target
        target = {}
        target['locs'] = torch.as_tensor(self.y.loc[idx, target_locs], dtype=torch.float32)
        target['labels'] = torch.as_tensor(self.y.loc[idx, target_labels].map(mapdict), dtype=torch.int64)        
        return X, target

train_ds = llni_Dataset(df.loc[~df['is_valid']])
valid_ds = llni_Dataset(df.loc[df['is_valid']].reset_index(drop = True))

# set up the PyTorch dataloaders
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)
valid_dl = DataLoader(valid_ds, batch_size=32, shuffle=True, num_workers=2)

# set up the fastai dataloaders
dls = DataLoaders(train_dl, valid_dl, device = 'cuda')

class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        self.lin1 = nn.Linear(input_size, 200)
        self.lin2 = nn.Linear(200, 100)
        self.lin3 = nn.Linear(100, 50)
        self.lin4_1 = nn.Linear(50, 13)
        self.lin4_2 = nn.Linear(50, 13)
        self.bn1 = nn.BatchNorm1d(200)
        self.bn2 = nn.BatchNorm1d(100)
        self.bn3 = nn.BatchNorm1d(50)
        self.drops = nn.Dropout(0.2)
    def forward(self, x):
        x = self.bn1(F.relu(self.lin1(x)))
        x = self.drops(x)
        x = self.bn2(F.relu(self.lin2(x)))
        x = self.drops(x)
        x = self.bn3(F.relu(self.lin3(x)))
        x = self.drops(x)
        labels = self.lin4_1(x)
        locs = self.lin4_2(x)
        return locs, labels
net = Net(len(cont_names))
opt_func = partial(OptimWrapper, opt=torch.optim.Adam)

learn = Learner(dls, model = net, loss_func = multitaskloss, opt_func=opt_func)

which gives

What do your print statements give you? I would also print preds[0][ii] and preds[1][ii] to make sure those look how you would expect.

The other thing I would confirm is that you are using your loss functions correctly. IIRC, you need to create that function before passing preds and targets in. So it would be more like:

nn.MSELOSS()(preds[0][ii], targ_loc)

I’m not 100% though but those are the first to things I would try, then I would break it down and pass one batch through the loss functions and see what it returns.