Hi all,
to practice and get deeper understanding i tried to use a simple sequential model in tabular learner
now i used the titanic dataset with data cleaning and minimal feature engineering
my code is available in a colab here: Google Colab
from pathlib import Path
from fastai.tabular.all import *
df = pd.read_csv('https://gist.githubusercontent.com/teamtom/3c62c2cd71f3bd7017596aa1e16847b2/raw/844da8ca7204897a29a7f62fc3eb6c19f95214b4/titanic_train.csv')
modes = df.mode().iloc[0]
df.fillna(modes, inplace=True)
df['LogFare'] = np.log(df['Fare']+1)
df = pd.get_dummies(df, columns=["Sex","Pclass","Embarked"])
splits = RandomSplitter(seed=42)(df)
dls = TabularPandas(
df, splits=splits, procs=[Normalize],
cat_names=[],
cont_names=['Age', 'SibSp', 'Parch', 'LogFare', 'Sex_female', 'Sex_male', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S'],
y_names='Survived', y_block=CategoryBlock()
).dataloaders(bs=64)
import torch.nn as nn
class NNet(nn.Module):
def __init__(self):
super(NNet, self).__init__()
self.nnet = nn.Sequential(
nn.Linear(12,10),
nn.ReLU(),
nn.Linear(10,10),
nn.ReLU(),
nn.Linear(10,1),
nn.Sigmoid()
)
def forward(self, _, x):
return self.nnet(x.view(-1,12))
model = NNet()
learn = Learner(dls, model=model, metrics=accuracy, loss_func=BCELossFlat(), cbs=ShowGraphCallback())
learn.fit(10, lr=0.03)
# i got this
epoch train_loss valid_loss accuracy time
0 0.403331 0.412062 0.595506 00:00
1 0.397318 0.387288 0.595506 00:00
2 0.392440 0.387936 0.595506 00:00
3 0.389725 0.391229 0.595506 00:00
4 0.387327 0.384920 0.595506 00:00
5 0.386091 0.381721 0.595506 00:00
6 0.384088 0.387873 0.595506 00:00
7 0.382418 0.382825 0.595506 00:00
8 0.378637 0.387598 0.595506 00:00
9 0.377786 0.384555 0.595506 00:00
the code runs without errors but there should be an issue somewhere because loss doesnât improve much and accuracy doesnât change at all
what am i doing wrong? what is the issue with my experiment?
thank you!