Target output mismatch

Hi, I’m trying to create a training loop based on this lecture but for structural data using the MixedInputClass. The training loop runs just fine for the training data set but for the validation data set, the length of the prediction variable (prds) equals the length of the prediction variable in the training data set and not the length of the input variables. I would be very grateful if someone could tell me how to debug this issue. Here’s my code

import pandas as pd 
import datetime
import glob as glob
import re
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display, HTML, display_html
import seaborn as sns
from fastai.metrics import*
from fastai.model import*
from fastai.dataset import*
import torch.nn as nn
from fastai.structured import *
from fastai.column_data import *

cat_vars = ['month','year','weekofyear','dayofweek','weekday','quarter','days_in_month','is_month_start','is_month_end','is_quarter_start','is_quarter_end','is_year_start','is_year_end']
contin_vars = ['discount','lag_price','quantity','mrp']

dep = 'price'

train_set = agg_data[:336]
test_set = agg_data[336:420]

#declaring categorical variables as categorical variables
for v in cat_vars: 
      train_set[v] = train_set[v].astype('category').cat.as_ordered()

apply_cats(test_set, train_set)

#n is the size of the train set
n = len(train_set); n

df, y, nas, mapper = proc_df(train_set, 'price', skip_flds=['date'],do_scale=True)
yl = np.log(y)

df_test, y_test, nas, mapper = proc_df(test_set, 'price', do_scale=True, skip_flds=['date'],mapper=mapper, na_dict=nas) 

samp_size = n 
train_ratio = 0.80
train_size = int(samp_size * train_ratio); train_size
val_idx = list(range(train_size, len(train_set)))

#putting the data in a data object
md = ColumnarModelData.from_data_frame(path, val_idx, df , yl.astype(np.float32), cat_flds=cat_vars, bs=20,test_df=df_test)

cat_sz = [(c, len(train_set[c].cat.categories)+1) for c in cat_vars]
emb_szs = [(c, min(50, (c)//2)) for _,c in cat_sz]

#self def pre-reqs

def inv_y(a): return np.exp(a)

def exp_rmspe(y_pred, targ):
         targ = inv_y(targ)
         pct_var = (targ - inv_y(y_pred))/targ
         return math.sqrt((pct_var**2).mean())

max_log_y = np.max(yl)
y_range = (0, max_log_y*1.2) 
def emb_init(x):
      x = x.weight.data
      sc = 2/(x.size(1)+1)
      x.uniform_(-sc,sc)

class MixedInputModel(nn.Module):
"""Model able to handle inputs consisting of both categorical and continuous variables.
Args:
   emb_szs (list of int): List of embedding size
   n_cont (int): Number of continuous variables in inputs
   emb_drop (float): Dropout applied to the output of embedding
   out_sz (int): Size of model's output.
   szs (list of int): List of hidden variables sizes
   drops (list of float): List of dropout applied to hidden variables
   y_range (list of float): Min and max of `y`. y_range[0] = min, y_range[1] = max.
   use_bn (bool): If use BatchNorm, set ``True``
   is_reg (bool): If regression, set ``True``
   is_multi (bool): If multi-label classification, set ``True``
"""
def __init__(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,y_range=None, use_bn=False, is_reg=True, is_multi=False):
    super().__init__() #constructor
    for i,(c,s) in enumerate(emb_szs): assert c > 1, f"cardinality must be >=2, got emb_szs[{i}]: ({c},{s})"
    if is_reg==False and is_multi==False: assert out_sz >= 2, "For classification with out_sz=1, use is_multi=True"
    self.embs = nn.ModuleList([nn.Embedding(c, s) for c,s in emb_szs])# go through each tuple, grab no. of cats and sz of embedding and construct the embeddings
    for emb in self.embs: emb_init(emb)
    n_emb = sum(e.embedding_dim for e in self.embs)
    self.n_emb, self.n_cont=n_emb, n_cont

    szs = [n_emb+n_cont] + szs
    self.lins = nn.ModuleList([
        nn.Linear(szs[i], szs[i+1]) for i in range(len(szs)-1)]) #list of lin layers:[1000,500] = no. of activations for lin lays; here lin layer goes from size szs[i] to szs[i+1]
    self.bns = nn.ModuleList([nn.BatchNorm1d(sz) for sz in szs[1:]])
    for o in self.lins: kaiming_normal(o.weight.data) #initialization
    self.outp = nn.Linear(szs[-1], out_sz) #final linear layer activation : fin layer has out_sz as its size
    kaiming_normal(self.outp.weight.data)

    self.emb_drop = nn.Dropout(emb_drop)
    self.drops = nn.ModuleList([nn.Dropout(drop) for drop in drops]) #dropouts: we have list of how much drop out to apply to each layer -> this goes through each thing in that list (for drop in drops)and create a droput layer for it(nn.Dropout(drop))
    self.bn = nn.BatchNorm1d(n_cont)
    self.use_bn,self.y_range = use_bn,y_range
    self.is_reg = is_reg
    self.is_multi = is_multi

#def forward(self, x_cat, x_cont):
def forward(self, cats, conts):
    if self.n_emb != 0:
        x = [e(x_cat[:,i]) for i,e in enumerate(self.embs)] #go through each embedding layer ; e(x_cat[:,i]) : treat like a fn , so call with the ith categorical var
        x = torch.cat(x, 1)#concat them all together
        x = self.emb_drop(x)#put that through dropout
    if self.n_cont != 0:
        x2 = self.bn(V(x_cont))
        x = torch.cat([x, x2], 1) if self.n_emb != 0 else x2
    for l,d,b in zip(self.lins, self.drops, self.bns): #go through each linear layer l
        x = F.relu(l(x)) # applyReLU_to_it(call it)
        if self.use_bn: x = b(x)
        x = d(x) #apply dropout to it
    x = self.outp(x) #apply final linear layer: has size out_sz (in ex it is =1)
    if not self.is_reg:
        if self.is_multi:
            x = F.sigmoid(x)
        else:
            x = F.log_softmax(x)
    elif self.y_range: #if y_range para is passed:
        x = F.sigmoid(x) #makes colaborative filtering better
        x = x*(self.y_range[1] - self.y_range[0]) #sales are going to be greater than 0 but less than max ; thus multiply sigmoid with range
        x = x+self.y_range[0] 
    return x

 net = MixedInputModel(emb_szs,len(contin_vars), 0.04, 1, [100,50], [0.001,0.01] ,y_range=y_range, use_bn=True, is_reg=True, is_multi=False)
 lssss = F.cross_entropy()
 learning_rate = 1e-2
 opt = optim.SGD(net.parameters(),lr = learning_rate,momentum = 0.9, weight_decay = 1e-3)

for epoch in range(1): #???????????????????????????
    losses=[]
    dl = iter(md.trn_dl)##?????????????????
for t in range(len(list(md.trn_dl))): #number of batches
  l = next(dl)
  x_cat, x_cont,y = l
  #a. Forward pass: compu
  y_pred = net(V(x_cat),V(x_cont))
  ls = lssss(y_pred, V(y))
  losses.append(ls) 

  # b.Use the optimizer object to zero all of the gradients for the variables to be updated (which are the learnable weights of the model)
  opt.zero_grad()

  #c. Backward pass: compute gradient of the loss with respect to model parameters
  ls.backward()

  #d. Calling the step function on an Optimizer makes an update to its parameters
  opt.step()
  
losses_val =[]
vali_dl = iter(md.val_dl)    
for tt in range(len(list(md.val_dl))):
  vdl = next(vali_dl)
  xv_cat,xv_cont,y_val = vdl
  prds = net(V(xv_cat),V(xv_cont))
  ls_val = lssss(prds, V(y_val))
  losses_val.append(ls_val)