I was trying out the Tabular Data module to solve a problem at my workplace. The idea of using embedding for categorical variable is brilliant .
I trained for one cycle and have got very good results on the validation set so far.
However, I am unable to test on the test set. I run the following command:
learner.predict(learner.data.test_ds)
I got the following KeyError:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
TypeError: an integer is required
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2524 try:
-> 2525 return self._engine.get_loc(key)
2526 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
KeyError: 'AWBWeight'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
TypeError: an integer is required
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-77-46ef6cd0da53> in <module>()
----> 1 learner.predict(learner.data.test_ds)
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/basic_train.py in predict(self, item, **kwargs)
357 def predict(self, item:ItemBase, **kwargs):
358 "Return predicted class, label and probabilities for `item`."
--> 359 batch = self.data.one_item(item)
360 res = self.pred_batch(batch=batch)
361 pred,x = res[0],batch[0]
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/basic_data.py in one_item(self, item, detach, denorm, cpu)
178 "Get `item` into a batch. Optionally `detach` and `denorm`."
179 ds = self.single_ds
--> 180 with ds.set_item(item):
181 return self.one_batch(ds_type=DatasetType.Single, detach=detach, denorm=denorm, cpu=cpu)
182
~/anaconda3/envs/pytorch_p36/lib/python3.6/contextlib.py in __enter__(self)
79 def __enter__(self):
80 try:
---> 81 return next(self.gen)
82 except StopIteration:
83 raise RuntimeError("generator didn't yield") from None
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/data_block.py in set_item(self, item)
593 def set_item(self,item):
594 "For inference, will briefly replace the dataset with one that only contains `item`."
--> 595 self.item = self.x.process_one(item)
596 yield None
597 self.item = None
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/data_block.py in process_one(self, item, processor)
80 if processor is not None: self.processor = processor
81 self.processor = listify(self.processor)
---> 82 for p in self.processor: item = p.process_one(item)
83 return item
84
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/data.py in process_one(self, item)
44 def process_one(self, item):
45 df = pd.DataFrame([item,item])
---> 46 for proc in self.procs: proc(df, test=True)
47 if len(self.cat_names) != 0:
48 codes = np.stack([c.cat.codes.values for n,c in df[self.cat_names].items()], 1).astype(np.int64) + 1
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/transform.py in __call__(self, df, test)
122 "Apply the correct function to `df` depending on `test`."
123 func = self.apply_test if test else self.apply_train
--> 124 func(df)
125
126 def apply_train(self, df:DataFrame):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/transform.py in apply_test(self, df)
175 if name+'_na' not in self.cat_names: self.cat_names.append(name+'_na')
176 df.loc[:,name] = df.loc[:,name].fillna(self.na_dict[name])
--> 177 elif pd.isnull(df[name]).sum() != 0:
178 raise Exception(f"""There are nan values in field {name} but there were none in the training set.
179 Please fix those manually.""")
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2137 return self._getitem_multilevel(key)
2138 else:
-> 2139 return self._getitem_column(key)
2140
2141 def _getitem_column(self, key):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2144 # get column
2145 if self.columns.is_unique:
-> 2146 return self._get_item_cache(key)
2147
2148 # duplicate columns & possible reduce dimensionality
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1840 res = cache.get(item)
1841 if res is None:
-> 1842 values = self._data.get(item)
1843 res = self._box_item_values(item, values)
1844 cache[item] = res
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3841
3842 if not isna(item):
-> 3843 loc = self.items.get_loc(item)
3844 else:
3845 indexer = np.arange(len(self.items))[isna(self.items)]
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2525 return self._engine.get_loc(key)
2526 except KeyError:
-> 2527 return self._engine.get_loc(self._maybe_cast_indexer(key))
2528
2529 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
KeyError: 'AWBWeight'