Tabular inference in production with test_dl

HI,I fail to set up Tabular model for inference.

learn = load_learner(path/model.pkl') #previously has been exported with .export()
test_df = inderence_dataset_prep() #custom func to return inference dataset
test_dl = learn.dls.test_dl(test_df)
learn.get_preds(dl=test_dl)

this returns error:

AttributeError: ‘DataFrame’ object has no attribute ‘with_cont’

Thank you!

1 Like

Does your inderence_dataset_prep return a DataFrame?

Also what version of fastai are you using?

Can you also try doing the following:

to = learn.dls.train_ds.new(test_df)
to.process()
dl = TabDataLoader(to)
learn.get_preds(dl=dl)

Hi, Zachary!

my fastai version is ‘2.0.13’
yes, inderence_dataset_prep returns DataFrame

Zachary, now it’s better:

error:
AssertionError: nan values in price but not in setup training set

Do I have to specify procs with to again?

If so, does it forget my categories dictionary and normalization together with fill-missing policy?

My procs where procs=[Categorify, FillMissing, Normalize].

When specifying cpu=False

learn = load_learner(models_path/'model.pkl', cpu=False)
learn.get_preds(dl=dl) 

gives error:

TypeError: object of type 'NoneType' has no len()

How to run the model in GPU mode?

What’s happening here is fastai will preprocess and denote what classes are missing and what are not. So in your training set you initially used, if you expect values to be missing you should make a dummy row with np.nan for the columns that at one point may be missing. Note: this could also affect how inference goes since those values won’t ever “really” be trained on (one sample doesn’t do much at all), but that is why you have the error

1 Like

I am having the exact same issue. Code and traceback below:

Code:

import pandas as pd
import numpy as np
from fastai.tabular.all import *

train_features = pd.read_csv('train_features.csv')
train_targets = pd.read_csv('train_targets_scored.csv')
test_features = pd.read_csv('test_features.csv')

cat_names = ['cp_type', 'cp_time', 'cp_dose']
cont_names = [x for x in train_features.columns if x not in cat_names]

procs = [Categorify, FillMissing, Normalize]
splits = RandomSplitter(valid_pct=0.2)(range_of(train_features))

data = pd.concat([train_features, train_targets], axis=1)

to = TabularPandas(data, procs=[Categorify, FillMissing,Normalize],
                   cat_names = cat_names,
                   cont_names = cont_names,
                   y_names=train_targets.columns.to_list(),
                   y_block=MultiCategoryBlock(),
                   splits=splits)

trn_dl = TabDataLoader(to.train, bs=64, shuffle=True, drop_last=True)
val_dl = TabDataLoader(to.valid, bs=128)

dls = DataLoaders(trn_dl, val_dl)
learn = tabular_learner(dls, layers=[300, 250], n_out=206)
learn.fit_one_cycle(1, 0.00363078061491251)

All of the above runs no problem, but getting predictions on my test set (not part of to, the TabularPandas object) is giving me issues. I tried two things:

dl = learn.dls.test_dl(test_features)

learn.get_preds(dl=dl)

This gives the following traceback:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-7-68608cc7e448> in <module>()
      1 dl = learn.dls.test_dl(test_features)
      2 
----> 3 learn.get_preds(dl=dl)

14 frames
/usr/local/lib/python3.6/dist-packages/fastai/learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, reorder, cbs, **kwargs)
    233         if with_loss: ctx_mgrs.append(self.loss_not_reduced())
    234         with ContextManagers(ctx_mgrs):
--> 235             self._do_epoch_validate(dl=dl)
    236             if act is None: act = getattr(self.loss_func, 'activation', noop)
    237             res = cb.all_tensors()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch_validate(self, ds_idx, dl)
    186         if dl is None: dl = self.dls[ds_idx]
    187         self.dl = dl
--> 188         with torch.no_grad(): self._with_events(self.all_batches, 'validate', CancelValidException)
    189 
    190     def _do_epoch(self):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    162 
    163     def _do_one_batch(self):

/usr/local/lib/python3.6/dist-packages/fastai/data/load.py in __iter__(self)
    102         for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
    103             if self.device is not None: b = to_device(b, self.device)
--> 104             yield self.after_batch(b)
    105         self.after_iter()
    106         if hasattr(self, 'it'): del(self.it)

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in __call__(self, o)
    196         self.fs.append(t)
    197 
--> 198     def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
    199     def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
    200     def __getitem__(self,i): return self.fs[i]

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    148     for f in tfms:
    149         if not is_enc: f = f.decode
--> 150         x = f(x, **kwargs)
    151     return x
    152 

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in __call__(self, x, **kwargs)
    111     "A transform that always take tuples as items"
    112     _retain = True
--> 113     def __call__(self, x, **kwargs): return self._call1(x, '__call__', **kwargs)
    114     def decode(self, x, **kwargs):   return self._call1(x, 'decode', **kwargs)
    115     def _call1(self, x, name, **kwargs):

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _call1(self, x, name, **kwargs)
    114     def decode(self, x, **kwargs):   return self._call1(x, 'decode', **kwargs)
    115     def _call1(self, x, name, **kwargs):
--> 116         if not _is_tuple(x): return getattr(super(), name)(x, **kwargs)
    117         y = getattr(super(), name)(list(x), **kwargs)
    118         if not self._retain: return y

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in __call__(self, x, **kwargs)
     71     @property
     72     def name(self): return getattr(self, '_name', _get_name(self))
---> 73     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     74     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     75     def __repr__(self): return f'{self.name}:\nencodes: {self.encodes}decodes: {self.decodes}'

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
     81     def _call(self, fn, x, split_idx=None, **kwargs):
     82         if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 83         return self._do_call(getattr(self, fn), x, **kwargs)
     84 
     85     def _do_call(self, f, x, **kwargs):

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
     87             if f is None: return x
     88             ret = f.returns_none(x) if hasattr(f,'returns_none') else None
---> 89             return retain_type(f(x, **kwargs), x, ret)
     90         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     91         return retain_type(res, x)

/usr/local/lib/python3.6/dist-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
    110         if not f: return args[0]
    111         if self.inst is not None: f = MethodType(f, self.inst)
--> 112         return f(*args, **kwargs)
    113 
    114     def __get__(self, inst, owner):

/usr/local/lib/python3.6/dist-packages/fastai/tabular/core.py in encodes(self, to)
    321 
    322     def encodes(self, to):
--> 323         if not to.with_cont: res = (tensor(to.cats).long(),)
    324         else: res = (tensor(to.cats).long(),tensor(to.conts).float())
    325         ys = [n for n in to.y_names if n in to.items.columns]

/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in __getattr__(self, name)
   5272             if self._info_axis._can_hold_identifiers_and_holds_name(name):
   5273                 return self[name]
-> 5274             return object.__getattribute__(self, name)
   5275 
   5276     def __setattr__(self, name: str, value) -> None:

AttributeError: 'DataFrame' object has no attribute 'with_cont'

One thing I noticed here is that in the second to last frame of the traceback, the line highlighted, if not to.with_cont: res = (tensor(to.cats).long(),) might be the cause because when I run to.with_cont it outputs True, so I don’t think this line should run.

  1. Zach’s bit of code from post #2
to = learn.dls.train_ds.new(test_features)
to.process()
dl = TabDataLoader(to)
learn.get_preds(dl=dl)

This gives the following traceback:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-8-adcc56cec048> in <module>()
      1 to = learn.dls.train_ds.new(test_features)
----> 2 to.process()
      3 dl = TabDataLoader(top_k_accuracy)
      4 learn.get_preds(dl=dl)

8 frames
/usr/local/lib/python3.6/dist-packages/fastai/tabular/core.py in process(self)
    176     def show(self, max_n=10, **kwargs): display_df(self.new(self.all_cols[:max_n]).decode().items)
    177     def setup(self): self.procs.setup(self)
--> 178     def process(self): self.procs(self)
    179     def loc(self): return self.items.loc
    180     def iloc(self): return _TabIloc(self)

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in __call__(self, o)
    196         self.fs.append(t)
    197 
--> 198     def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
    199     def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
    200     def __getitem__(self,i): return self.fs[i]

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    148     for f in tfms:
    149         if not is_enc: f = f.decode
--> 150         x = f(x, **kwargs)
    151     return x
    152 

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in __call__(self, x, **kwargs)
     71     @property
     72     def name(self): return getattr(self, '_name', _get_name(self))
---> 73     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     74     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     75     def __repr__(self): return f'{self.name}:\nencodes: {self.encodes}decodes: {self.decodes}'

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
     81     def _call(self, fn, x, split_idx=None, **kwargs):
     82         if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 83         return self._do_call(getattr(self, fn), x, **kwargs)
     84 
     85     def _do_call(self, f, x, **kwargs):

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
     87             if f is None: return x
     88             ret = f.returns_none(x) if hasattr(f,'returns_none') else None
---> 89             return retain_type(f(x, **kwargs), x, ret)
     90         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     91         return retain_type(res, x)

/usr/local/lib/python3.6/dist-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
    110         if not f: return args[0]
    111         if self.inst is not None: f = MethodType(f, self.inst)
--> 112         return f(*args, **kwargs)
    113 
    114     def __get__(self, inst, owner):

/usr/local/lib/python3.6/dist-packages/fastai/data/transforms.py in encodes(self, o)
    257             self.vocab = CategoryMap(list(vals), add_na=self.add_na)
    258 
--> 259     def encodes(self, o): return TensorMultiCategory([self.vocab.o2i[o_] for o_ in o])
    260     def decodes(self, o): return MultiCategory      ([self.vocab    [o_] for o_ in o])
    261 

/usr/local/lib/python3.6/dist-packages/fastai/data/transforms.py in <listcomp>(.0)
    257             self.vocab = CategoryMap(list(vals), add_na=self.add_na)
    258 
--> 259     def encodes(self, o): return TensorMultiCategory([self.vocab.o2i[o_] for o_ in o])
    260     def decodes(self, o): return MultiCategory      ([self.vocab    [o_] for o_ in o])
    261 

KeyError: 'cp_type'

cp_type is one of the categorical variables in cat_names above.

@randy912 you have your list of y_names as the entire DataFrame columns. Not just the y variables.

No y_names is only the dataframe columns for train_targets, not data

I found a couple of kernels from the Lish-MOA Kaggle competition using FastAI, and I solved my issues by removing the y_block=MultiCategoryBlock() parameter for TabularPandas.

1 Like