I attempted my own custom nn module and inserted into the lesson3 notebook but the results are much worse with the same network parameters. What am I doing wrong?
class net2 (torch.nn.Module):
def __init__(self,dftrain,dfvalid,trainY,actual,contin_vars,cat_vars):
super(net2,self).__init__()
self.n,self.nv,self.contin_vars,self.cat_vars=dftrain.shape[0],dfvalid.shape[0],contin_vars,cat_vars
y=trainY[:,None]
yv=actual[:,None]
x_c,xv_c=self.normalize_inputs(dftrain,dfvalid,contin_vars)
x_cat,xv_cat=dftrain[cat_vars].values,dfvalid[cat_vars].values
self.contin_vars,self.cat_vars=contin_vars,cat_vars
self.x_c,self.x_cat,self.xv_c,self.xv_cat=x_c,x_cat,xv_c,xv_cat
self.x_c=torch.FloatTensor(self.x_c)
self.x_cat=torch.LongTensor(self.x_cat)
self.xv_c=torch.FloatTensor(self.xv_c)
self.xv_cat=torch.LongTensor(self.xv_cat)
self.y=torch.FloatTensor(y)
self.yv=torch.FloatTensor(yv)
# Embedding Layer for categorical
emb_dims=[]
num_weights=0
for i,myNm in enumerate(self.cat_vars):
num_codes=len(torch.unique(self.x_cat[:,i]))+1
cats=min(50,num_codes//2)
emb_dims.append((num_codes,cats))
num_weights+=cats
print (myNm, num_codes, cats)
self.emb_dims,self.num_weights=emb_dims,num_weights
self.define_architecture()
self.initialize_parameters()
if 1==0:
self.calculate_loss()
self.backward()
def initialize_parameters(self):
for i,emb_layer in enumerate(self.emb_layers):
emb_layer.weight.data.uniform_(0,0.05)
torch.nn.init.normal_(self.linear_3.weight,0,0.05)
torch.nn.init.normal_(self.linear_3.weight,0,0.05)
torch.nn.init.normal_(self.linear_4.weight,0,0.05)
def forward(self,cats,conts):
# Embedded layer followed by Dropout
x = [emb_layer(cats[:,i]) for i,emb_layer in enumerate(self.emb_layers)]
x=torch.cat(x,1)
x=self.dropout_1(x)
# Batch norm for Continuous
#x_c=self.batchnorm_1(conts)
x=torch.cat([x,conts],1)
# Linear followed by Dropout
lin2=self.linear_2(x)
#dropout2=self.dropout_2(lin2)
relu2=self.relu_2(lin2)
# Linear followed by Dropout
#lin3=self.linear_3(dropout2)
lin3=self.linear_3(relu2)
#dropout3=self.dropout_3(lin3)
relu3=self.relu_3(lin3)
# Linear
lin4=self.linear_4(relu3)
#lin4=self.sigmoid(relu3)
return lin4
def define_architecture(self):
# Embedding layer followed by dropout
self.emb_layers = torch.nn.ModuleList([torch.nn.Embedding(x, y)
for x, y in self.emb_dims])
self.dropout_1=torch.nn.Dropout(0.04)
# Continuous Layer followed by batch norm
self.batchnorm_1=torch.nn.BatchNorm1d(len(self.contin_vars))
self.linear_2=torch.nn.Linear(self.num_weights+len(self.contin_vars),1000)
self.relu_2=torch.nn.Dropout(0.001)
#self.relu_2=torch.nn.ReLU()
self.linear_3=torch.nn.Linear(1000,500)
self.relu_3=torch.nn.Dropout(0.01)
#self.relu_3=torch.nn.ReLU()
self.linear_4=torch.nn.Linear(500,1)
def normalize_inputs(self,dftrain,dfvalid,contin_vars):
self.mymeans = dftrain[contin_vars].apply(np.mean,axis=0)
self.mysds = dftrain[contin_vars].apply(np.std,axis=0)
mymeans_np=self.mymeans[None,:]
mysds_np=self.mysds[None,:]
x_c=dftrain[contin_vars].values
xv_c=dfvalid[contin_vars].values
if 1==0:
x_c=x_c-mymeans_np
x_c/=mysds_np
xv_c=xv_c-mymeans_np
xv_c/=mysds_np
return x_c,xv_c
followed by:
net=net2(dftrain,dftrain,trainY,trainY,contin_vars,cat_vars)
optimizer=torch.optim.SGD(net.parameters(),lr=1e-3)
bs=128
num_batches=np.int(dftrain.shape[0]/bs)
last_batch_size=dftrain.shape[0]-num_batches*bs
for epoch in range(1):
start=0
for t in range(num_batches+1):
start=t*bs
if t==num_batches:
end=start+last_batch_size
else:
end=(t+1)*bs
y_pred=net.forward(net.x_cat[start:end,:],net.x_c[start:end,:])
loss=loss_fn(y_pred,net.y[start:end])
if (t%1000==0):
print (t,loss.item())
loss.backward()
optimizer.step()
optimizer.zero_grad()
y_pred=net.forward(net.x_cat,net.x_c)
#yv_pred=net.forward(net.xv_cat,net.xv_c)
print (epoch, exp_rmspe(y_pred,net.y).item())#, exp_rmspe(yv_pred,net.yv).item())