EDIT: I forgot to add in the line of code that brought this error:
data = tabular_data_from_df('./', train_df, valid_df, dep_var, test_df=df_test, tfms=tfms, cat_names=cat_names)
I have train_df
, valid_df
, and test_df
. In test_df
it has all the same columns as the other two with the sole exception of 'target'
which is the dependent variable. It seems like it’s having a KeyError
because it wants the test_df
to have it. But when I fill it with 0
s I get nothing but 0
s as predictions after training. This error does not happen when I leave out the test_df=df_test
argument.
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2524 try:
-> 2525 return self._engine.get_loc(key)
2526 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'target'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-16-3700e55ea338> in <module>()
----> 1 data = tabular_data_from_df('./', train_df, valid_df, dep_var, test_df=df_test, tfms=tfms, cat_names=cat_names)
~/code/fastai/fastai/tabular/data.py in tabular_data_from_df(path, train_df, valid_df, dep_var, test_df, tfms, cat_names, cont_names, stats, log_output, **kwargs)
79 if test_df is not None:
80 datasets.append(TabularDataset.from_dataframe(test_df, dep_var, train_ds.tfms, train_ds.cat_names,
---> 81 train_ds.cont_names, train_ds.stats, log_output))
82 return DataBunch.create(*datasets, path=path, **kwargs)
83
~/code/fastai/fastai/tabular/data.py in from_dataframe(cls, df, dep_var, tfms, cat_names, cont_names, stats, log_output)
63 tfms[i] = tfm
64 cat_names, cont_names = tfm.cat_names, tfm.cont_names
---> 65 ds = cls(df, dep_var, cat_names, cont_names, stats, log_output)
66 ds.tfms,ds.cat_names,ds.cont_names = tfms,cat_names,cont_names
67 return ds
~/code/fastai/fastai/tabular/data.py in __init__(self, df, dep_var, cat_names, cont_names, stats, log_output)
22 def __init__(self, df:DataFrame, dep_var:str, cat_names:OptStrList=None, cont_names:OptStrList=None,
23 stats:OptStats=None, log_output:bool=False):
---> 24 if not is_numeric_dtype(df[dep_var]): df[dep_var] = df[dep_var].cat.astype(np.int64)
25 self.y = np2model_tensor(df[dep_var].values)
26 if log_output: self.y = torch.log(self.y.float())
~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2137 return self._getitem_multilevel(key)
2138 else:
-> 2139 return self._getitem_column(key)
2140
2141 def _getitem_column(self, key):
~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2144 # get column
2145 if self.columns.is_unique:
-> 2146 return self._get_item_cache(key)
2147
2148 # duplicate columns & possible reduce dimensionality
~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1840 res = cache.get(item)
1841 if res is None:
-> 1842 values = self._data.get(item)
1843 res = self._box_item_values(item, values)
1844 cache[item] = res
~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3841
3842 if not isna(item):
-> 3843 loc = self.items.get_loc(item)
3844 else:
3845 indexer = np.arange(len(self.items))[isna(self.items)]
~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2525 return self._engine.get_loc(key)
2526 except KeyError:
-> 2527 return self._engine.get_loc(self._maybe_cast_indexer(key))
2528
2529 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'target'
I know fastai/tabular/data.py
was updated recently, and I think I’m on the latest for a pip-installed dev copy.
Here’s the output of show_install(1)
:
platform : Linux-4.4.0-137-generic-x86_64-with-debian-stretch-sid
distro : Ubuntu 16.04 Xenial Xerus
python : 3.6.4
fastai : 1.0.6.dev0
torch : 1.0.0.dev20181007
torch cuda : Not available
torch cuda
nvcc cuda : Unknown
torch gpus
no supported gpus found on this system
I’d love for this to be something obvious that I missed or am doing wrong.