I’ve been getting this error ever since I’ve upgraded to 1.0.38. It appears to be giving me the same error on 1.0.37, but not on 1.0.36.
However, my Learner results are really bad in 1.0.36, so maybe this error is trying to keep me from doing something stupid.
I pushed a minimal test case to: https://github.com/matanhershberg/fastai-houses/tree/tabular-data-bunch-issue
The notebook in question is called Houses.ipynb
Any help would be greatly appreciated.
Matan
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in process_one(self, item)
277 def process_one(self,item):
--> 278 try: return self.c2i[item] if item is not None else None
279 except:
KeyError: 229456
During handling of the above exception, another exception occurred:
Exception Traceback (most recent call last)
<ipython-input-8-2f10f34fa878> in <module>
----> 1 data = TabularDataBunch.from_df(path, df, dep_var, valid_idx=valid_idx, procs=procs, cat_names=cat_names)
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/tabular/data.py in from_df(cls, path, df, dep_var, valid_idx, procs, cat_names, cont_names, classes, test_df, **kwargs)
93 src = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
94 .split_by_idx(valid_idx)
---> 95 .label_from_df(cols=dep_var, classes=classes))
96 if test_df is not None: src.add_test(TabularList.from_df(test_df, cat_names=cat_names, cont_names=cont_names,
97 processor = src.train.x.processor))
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in _inner(*args, **kwargs)
391 self.valid = fv(*args, **kwargs)
392 self.__class__ = LabelLists
--> 393 self.process()
394 return self
395 return _inner
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in process(self)
438 "Process the inner datasets."
439 xp,yp = self.get_processors()
--> 440 for i,ds in enumerate(self.lists): ds.process(xp, yp, filter_missing_y=i==0)
441 return self
442
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in process(self, xp, yp, filter_missing_y)
563 def process(self, xp=None, yp=None, filter_missing_y:bool=False):
564 "Launch the processing on `self.x` and `self.y` with `xp` and `yp`."
--> 565 self.y.process(yp)
566 if filter_missing_y and (getattr(self.x, 'filter_missing_y', None)):
567 filt = array([o is None for o in self.y])
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in process(self, processor)
66 if processor is not None: self.processor = processor
67 self.processor = listify(self.processor)
---> 68 for p in self.processor: p.process(self)
69 return self
70
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in process(self, ds)
284 ds.classes = self.classes
285 ds.c2i = self.c2i
--> 286 super().process(ds)
287
288 def __getstate__(self): return {'classes':self.classes}
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in process(self, ds)
36 def __init__(self, ds:Collection=None): self.ref_ds = ds
37 def process_one(self, item:Any): return item
---> 38 def process(self, ds:Collection): ds.items = array([self.process_one(item) for item in ds.items])
39
40 class ItemList():
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in <listcomp>(.0)
36 def __init__(self, ds:Collection=None): self.ref_ds = ds
37 def process_one(self, item:Any): return item
---> 38 def process(self, ds:Collection): ds.items = array([self.process_one(item) for item in ds.items])
39
40 class ItemList():
~/miniconda3/envs/houses/lib/python3.7/site-packages/fastai/data_block.py in process_one(self, item)
278 try: return self.c2i[item] if item is not None else None
279 except:
--> 280 raise Exception("Your validation data contains a label that isn't present in the training set, please fix your data.")
281
282 def process(self, ds):
Exception: Your validation data contains a label that isn't present in the training set, please fix your data.