Hi, I have been working on some classification data sets from kaggle, but my Structured learner model seems to be not performing as expected. Since I was more focused on the performance of the model, i used to borrow the feature engineering parts from an existing kernel in a kaggle competition and work on it.
But Random forest and Xgboost always tends to come in front of the ranks in slight margin, compared to the columnar nn module. For example: In 1.) Titanic kaggle, Xgboost =0.82 random forest =0.81, Structured learner =0.80, 2.) West Nile Virus Prediction kaggle, Xgboost =0.73 random forest =0.72, Structured learner =0.69
Please find the code used.
class MixedInputModel(nn.Module):
def init(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,
y_range=None, use_bn=False):
super().init()
self.embs = nn.ModuleList([nn.Embedding(c, s) for c,s in emb_szs])
for emb in self.embs: emb_init(emb)
n_emb = sum(e.embedding_dim for e in self.embs)
self.n_emb, self.n_cont=n_emb, n_cont
szs = [n_emb+n_cont] + szs
self.lins = nn.ModuleList([
nn.Linear(szs[i], szs[i+1]) for i in range(len(szs)-1)])
self.bns = nn.ModuleList([
nn.BatchNorm1d(sz) for sz in szs[1:]])
for o in self.lins: kaiming_normal(o.weight.data)
self.outp = nn.Linear(szs[-1], out_sz)
kaiming_normal(self.outp.weight.data)
self.emb_drop = nn.Dropout(emb_drop)
self.drops = nn.ModuleList([nn.Dropout(drop) for drop in drops])
self.bn = nn.BatchNorm1d(n_cont)
self.use_bn,self.y_range = use_bn,y_range
def forward(self, x_cat, x_cont):
if self.n_emb != 0:
x = [e(x_cat[:,i]) for i,e in enumerate(self.embs)]
x = torch.cat(x, 1)
x = self.emb_drop(x)
if self.n_cont != 0:
x2 = self.bn(x_cont)
x = torch.cat([x, x2], 1) if self.n_emb != 0 else x2
for l,d,b in zip(self.lins, self.drops, self.bns):
x = F.relu(l(x))
if self.use_bn: x = b(x)
x = d(x)
x = self.outp(x)
if self.y_range:
x = F.sigmoid(x)
x = x*(self.y_range[1] - self.y_range[0])
x = x+self.y_range[0]
return x
md = ColumnarModelData.from_data_frames(’/tmp’, trn_df, val_df, trn_y[0].astype(np.int64), val_y[0].astype(np.int64), cats, 5, test_df=df_test)
model = MixedInputModel(emb_szs, n_cont=len(df.columns)-len(cats), emb_drop=0, out_sz=3, szs=[500], drops=[0.5],use_bn=True).cuda()
bm = BasicModel(model, ‘binary_classifier’)
class StructuredLearner(Learner):
def __init__(self, data, models, **kwargs):
super().__init__(data, models, **kwargs)
self.crit = F.mse_loss
learn = StructuredLearner(md, bm)
learn.crit = F.binary_cross_entropy
Is there any method that i can implement on my model to make it more efficient in generating the results? I am not able to figure out if this is happening because of any glitch in the input parameters given or is it due to the predictive power caused between continuous and categorical features.