Custom ItemList, getting ForkingPickler broken pipe

Hi Etienne,

Thanks a lot for your wonderful work and also for kindly sharing it with the rest of us (and special thanks to sgugger. I coincidentally found myself following the exact path you followed. I was wondering if you were able to use learn.predict method after training?

I am using a model only with tabular and image data and successfully trained it. But no luck on predict. This was sort of an effort to compile a list of predictions on validation set and “plot_top_losses” manually for inspection :slight_smile:

But the funny thing is error I’m getting is a KeyError from pd.Categorical called by fastai’s Categorify proc on data. And below is what I think is causing the problem; the creation of a dataframe from two copies of TabularLines (fastai.tabular.data line 45). I tried to temporarily replace TabularLine inside my MixedItem with a custom object to make those two lines “happy” but ended up creating more problems that I understand less. I’ve included full traceback when I call predict.

Final note; I’m looking to see if I did some obvious mistake. Otherwise, I think the answer for my (and any) MixedItemList related Q is to wait for v2 :slight_smile: Also, apologies for long post.

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/data.py in process_one(self, item)
     44     def process_one(self, item):
     45         df = pd.DataFrame([item,item])
---> 46         for proc in self.procs: proc(df, test=True)

Partial code (which is proven working with Images only) and full traceback below;

dup_valid_dl = copy.deepcopy(data.valid_dl)
for item in dup_valid_dl.dataset:
#     print(f'{item}')
     data_item, label = item
     img_item, tab_item = data_item.obj[0], data_item.obj[1]
     print(f'Tab_Item:\t{tab_item}')
     print(f'Img_Item:\t{img_item}')
     print(f'Label:\t{label}')
     predicted = learn.predict( (data_item) )[0].data[0]
#     print(f'{predicted}')
#     truth = label.data
#     error = truth-predicted
#     predicted_list.append([item, np.abs(error), truth, predicted])
#     print(f'{truth:.3f}-{predicted:.3f}={error:.3f}')
#     print(f'======================================================================================')
predicted_list = sorted(predicted_list, key=lambda x: -x[1])
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2656             try:
-> 2657                 return self._engine.get_loc(key)
   2658             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()

KeyError: 'port_id'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-175-383315a78990> in <module>()
     20     print(my_df)
     21 
---> 22     predicted = learn.predict( (data_item) )[0].data[0]
     23 #     print(f'{predicted}')
     24 #     truth = label.data

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/basic_train.py in predict(self, item, return_x, batch_first, with_dropout, **kwargs)
    372     def predict(self, item:ItemBase, return_x:bool=False, batch_first:bool=True, with_dropout:bool=False, **kwargs):
    373         "Return predicted class, label and probabilities for `item`."
--> 374         batch = self.data.one_item(item)
    375         res = self.pred_batch(batch=batch, with_dropout=with_dropout)
    376         raw_pred,x = grab_idx(res,0,batch_first=batch_first),batch[0]

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/basic_data.py in one_item(self, item, detach, denorm, cpu)
    178         "Get `item` into a batch. Optionally `detach` and `denorm`."
    179         ds = self.single_ds
--> 180         with ds.set_item(item):
    181             return self.one_batch(ds_type=DatasetType.Single, detach=detach, denorm=denorm, cpu=cpu)
    182 

~/anaconda3/envs/pytorch_p36/lib/python3.6/contextlib.py in __enter__(self)
     79     def __enter__(self):
     80         try:
---> 81             return next(self.gen)
     82         except StopIteration:
     83             raise RuntimeError("generator didn't yield") from None

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/data_block.py in set_item(self, item)
    606     def set_item(self,item):
    607         "For inference, will briefly replace the dataset with one that only contains `item`."
--> 608         self.item = self.x.process_one(item)
    609         yield None
    610         self.item = None

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/data_block.py in process_one(self, item, processor)
     88         if processor is not None: self.processor = processor
     89         self.processor = listify(self.processor)
---> 90         for p in self.processor: item = p.process_one(item)
     91         return item
     92 

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/data_block.py in process_one(self, item)
    754         res = []
    755         for procs, i in zip(self.procs, item):
--> 756             for p in procs: i = p.process_one(i)
    757             res.append(i)
    758         return res

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/data.py in process_one(self, item)
     44     def process_one(self, item):
     45         df = pd.DataFrame([item,item])
---> 46         for proc in self.procs: proc(df, test=True)
     47         if len(self.cat_names) != 0:
     48             codes = np.stack([c.cat.codes.values for n,c in df[self.cat_names].items()], 1).astype(np.int64) + 1

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/transform.py in __call__(self, df, test)
    122         "Apply the correct function to `df` depending on `test`."
    123         func = self.apply_test if test else self.apply_train
--> 124         func(df)
    125 
    126     def apply_train(self, df:DataFrame):

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/tabular/transform.py in apply_test(self, df)
    143         "Transform `self.cat_names` columns in categorical using the codes decided in `apply_train`."
    144         for n in self.cat_names:
--> 145             df.loc[:,n] = pd.Categorical(df[n], categories=self.categories[n], ordered=True)
    146 
    147 FillStrategy = IntEnum('FillStrategy', 'MEDIAN COMMON CONSTANT')

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2925             if self.columns.nlevels > 1:
   2926                 return self._getitem_multilevel(key)
-> 2927             indexer = self.columns.get_loc(key)
   2928             if is_integer(indexer):
   2929                 indexer = [indexer]

~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2657                 return self._engine.get_loc(key)
   2658             except KeyError:
-> 2659                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2660         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2661         if indexer.ndim > 1 or indexer.size > 1:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()

KeyError: 'port_id'