Hi Etienne,
Thanks a lot for your wonderful work and also for kindly sharing it with the rest of us (and special thanks to sgugger. I coincidentally found myself following the exact path you followed. I was wondering if you were able to use learn.predict method after training?
I am using a model only with tabular and image data and successfully trained it. But no luck on predict. This was sort of an effort to compile a list of predictions on validation set and “plot_top_losses” manually for inspection
But the funny thing is error I’m getting is a KeyError from pd.Categorical called by fastai’s Categorify proc on data. And below is what I think is causing the problem; the creation of a dataframe from two copies of TabularLines (fastai.tabular.data line 45). I tried to temporarily replace TabularLine inside my MixedItem with a custom object to make those two lines “happy” but ended up creating more problems that I understand less. I’ve included full traceback when I call predict.
Final note; I’m looking to see if I did some obvious mistake. Otherwise, I think the answer for my (and any) MixedItemList related Q is to wait for v2 Also, apologies for long post.
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/data.py in process_one(self, item)
44 def process_one(self, item):
45 df = pd.DataFrame([item,item])
---> 46 for proc in self.procs: proc(df, test=True)
Partial code (which is proven working with Images only) and full traceback below;
dup_valid_dl = copy.deepcopy(data.valid_dl)
for item in dup_valid_dl.dataset:
# print(f'{item}')
data_item, label = item
img_item, tab_item = data_item.obj[0], data_item.obj[1]
print(f'Tab_Item:\t{tab_item}')
print(f'Img_Item:\t{img_item}')
print(f'Label:\t{label}')
predicted = learn.predict( (data_item) )[0].data[0]
# print(f'{predicted}')
# truth = label.data
# error = truth-predicted
# predicted_list.append([item, np.abs(error), truth, predicted])
# print(f'{truth:.3f}-{predicted:.3f}={error:.3f}')
# print(f'======================================================================================')
predicted_list = sorted(predicted_list, key=lambda x: -x[1])
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()
KeyError: 'port_id'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-175-383315a78990> in <module>()
20 print(my_df)
21
---> 22 predicted = learn.predict( (data_item) )[0].data[0]
23 # print(f'{predicted}')
24 # truth = label.data
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/basic_train.py in predict(self, item, return_x, batch_first, with_dropout, **kwargs)
372 def predict(self, item:ItemBase, return_x:bool=False, batch_first:bool=True, with_dropout:bool=False, **kwargs):
373 "Return predicted class, label and probabilities for `item`."
--> 374 batch = self.data.one_item(item)
375 res = self.pred_batch(batch=batch, with_dropout=with_dropout)
376 raw_pred,x = grab_idx(res,0,batch_first=batch_first),batch[0]
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/basic_data.py in one_item(self, item, detach, denorm, cpu)
178 "Get `item` into a batch. Optionally `detach` and `denorm`."
179 ds = self.single_ds
--> 180 with ds.set_item(item):
181 return self.one_batch(ds_type=DatasetType.Single, detach=detach, denorm=denorm, cpu=cpu)
182
~/anaconda3/envs/pytorch_p36/lib/python3.6/contextlib.py in __enter__(self)
79 def __enter__(self):
80 try:
---> 81 return next(self.gen)
82 except StopIteration:
83 raise RuntimeError("generator didn't yield") from None
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/data_block.py in set_item(self, item)
606 def set_item(self,item):
607 "For inference, will briefly replace the dataset with one that only contains `item`."
--> 608 self.item = self.x.process_one(item)
609 yield None
610 self.item = None
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/data_block.py in process_one(self, item, processor)
88 if processor is not None: self.processor = processor
89 self.processor = listify(self.processor)
---> 90 for p in self.processor: item = p.process_one(item)
91 return item
92
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/data_block.py in process_one(self, item)
754 res = []
755 for procs, i in zip(self.procs, item):
--> 756 for p in procs: i = p.process_one(i)
757 res.append(i)
758 return res
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/data.py in process_one(self, item)
44 def process_one(self, item):
45 df = pd.DataFrame([item,item])
---> 46 for proc in self.procs: proc(df, test=True)
47 if len(self.cat_names) != 0:
48 codes = np.stack([c.cat.codes.values for n,c in df[self.cat_names].items()], 1).astype(np.int64) + 1
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/transform.py in __call__(self, df, test)
122 "Apply the correct function to `df` depending on `test`."
123 func = self.apply_test if test else self.apply_train
--> 124 func(df)
125
126 def apply_train(self, df:DataFrame):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/transform.py in apply_test(self, df)
143 "Transform `self.cat_names` columns in categorical using the codes decided in `apply_train`."
144 for n in self.cat_names:
--> 145 df.loc[:,n] = pd.Categorical(df[n], categories=self.categories[n], ordered=True)
146
147 FillStrategy = IntEnum('FillStrategy', 'MEDIAN COMMON CONSTANT')
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()
KeyError: 'port_id'