I have built an MLP with a standard Learner (also tried a TabularLearner but did not help with this problem). It trains properly and I can save the resulting model. However any attempt to get inferences from it fails with:
TypeError: list indices must be integers or slices, not list
Full stack trace after the code. This is the full training script, with the inference line at the end. I have a separate inference script, but tried adding the predict line here in case I messed up the model setup for inference somehow:
import pandas as pd
from fastai.tabular.all import *
import torch.nn as nn
import torch.nn.functional as F
source = '/Users/pierznewton-john/repos/gauquelin-ai/app/data/prepared.csv'
df = pd.read_csv(source)
# Continuous independent vars come after col 39
continuous_cols = df.columns[39:].tolist()
# get targets
target_cols = df.columns[:39].tolist()
# Split the dataset into training and validation sets
splits = RandomSplitter(0.1, 42)(range_of(df))
# Define preprocessing steps
procs = [Categorify, FillMissing, Normalize]
df[continuous_cols] = df[continuous_cols].astype(float)
for col in target_cols:
df[col] = df[col].astype(float)
# Create the TabularPandas object
to = TabularPandas(df, procs=procs, cont_names=continuous_cols, y_names=target_cols, splits=splits, y_block=MultiCategoryBlock())
# Create the DataLoaders
dls = to.dataloaders(bs=64) # Adjust batch size as needed
class SimpleMLP(nn.Module):
def __init__(self, input_size, layer_sizes=[100, 50], output_size=39):
super(SimpleMLP, self).__init__()
self.layers = nn.ModuleList()
last_size = input_size
for size in layer_sizes:
self.layers.append(nn.Linear(last_size, size))
last_size = size
self.output_layer = nn.Linear(last_size, output_size)
def forward(self, x_cat, x_cont):
# x_cat is empty and can be ignored
x = x_cont
for layer in self.layers:
x = F.relu(layer(x))
x = self.output_layer(x)
return x
def multi_label_accuracy(preds, targets, thresh=0.5):
"Compute accuracy for multi-label classification"
preds = preds.sigmoid() # Apply sigmoid to get probabilities
preds = (preds > thresh).float() # Convert to binary predictions
correct = (preds == targets).float() # Compare predictions with targets
return correct.mean() # Return mean accuracy
input_size = len(dls.cont_names) # Assuming only continuous features are used as input
output_size = 39
model = SimpleMLP(input_size=265, layer_sizes=[100, 50], output_size=39)
learn = Learner(dls, model, loss_func=nn.BCEWithLogitsLoss(), metrics=[multi_label_accuracy])
learn.fit_one_cycle(10)
val_loss, val_accuracy = learn.validate()
print(f"Validation loss: {val_loss}")
print(f"Validation accuracy: {val_accuracy}")
learn.model_dir = "trained"
learn.save('my_model')
# This fails...
row, clas, probs = learn.predict(df.iloc[0])
row.show()
Full error:
Traceback (most recent call last):
File "/Users/pedroponting/repos/gauquelin-ai/app/training/trainer.py", line 70, in <module>
row, clas, probs = learn.predict(df.iloc[0])
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastai/learner.py", line 320, in predict
dl = self.dls.test_dl([item], rm_type_tfms=rm_type_tfms, num_workers=0)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastai/tabular/data.py", line 54, in test_dl
if process: to.process()
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastai/tabular/core.py", line 180, in process
def process(self): self.procs(self)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastcore/transform.py", line 208, in __call__
def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastcore/transform.py", line 158, in compose_tfms
x = f(x, **kwargs)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastcore/transform.py", line 81, in __call__
def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastcore/transform.py", line 107, in _call
super()._call(fn,x,split_idx,**kwargs)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastcore/transform.py", line 91, in _call
return self._do_call(getattr(self, fn), x, **kwargs)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastcore/transform.py", line 97, in _do_call
return retain_type(f(x, **kwargs), x, ret)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastcore/dispatch.py", line 120, in __call__
return f(*args, **kwargs)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastai/tabular/core.py", line 308, in encodes
missing = pd.isnull(to.conts)
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastai/tabular/core.py", line 208, in f
def f(o): return o[list(getattr(o,nm+'_names'))]
File "/Users/pedroponting/.pyenv/versions/3.9.8/lib/python3.9/site-packages/fastcore/foundation.py", line 88, in __getitem__
def __getitem__(self, k): return self.items[list(k) if isinstance(k,CollBase) else k]
TypeError: list indices must be integers or slices, not list