I’m trying to run the Rossmann notebook. I’ve downloaded the files with extra data from the referenced link.
It runs well till this step (number 78):
md = ColumnarModelData.from_data_frame(PATH, val_idx, df, yl.astype(np.float32), cat_flds=cat_vars, bs=128,
test_df=df_test)
The error says that it is missing some columns:
KeyError: "['PromoInterval' 'Events' 'Promo_fw' 'Promo_bw' 'StateHoliday_fw' 'StateHoliday_bw' 'SchoolHoliday_fw'\n 'SchoolHoliday_bw'] not in index"
How do I fix it? I would like to run it till the end.
Here is the complete stack trace:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-78-9df421910b1c> in <module>()
1 md = ColumnarModelData.from_data_frame(PATH, val_idx, df, yl.astype(np.float32), cat_flds=cat_vars, bs=128,
----> 2 test_df=df_test)
d:\dev\fastai\fastai\courses\dl1\fastai\column_data.py in from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, test_df)
64 def from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, test_df=None):
65 ((val_df, trn_df), (val_y, trn_y)) = split_by_idx(val_idxs, df, y)
---> 66 return cls.from_data_frames(path, trn_df, val_df, trn_y, val_y, cat_flds, bs, test_df=test_df)
67
68 def get_learner(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,
d:\dev\fastai\fastai\courses\dl1\fastai\column_data.py in from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, test_df)
58 def from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, test_df=None):
59 test_ds = ColumnarDataset.from_data_frame(test_df, cat_flds) if test_df is not None else None
---> 60 return cls(path, ColumnarDataset.from_data_frame(trn_df, cat_flds, trn_y),
61 ColumnarDataset.from_data_frame(val_df, cat_flds, val_y), bs, test_ds=test_ds)
62
d:\dev\fastai\fastai\courses\dl1\fastai\column_data.py in from_data_frame(cls, df, cat_flds, y)
39 @classmethod
40 def from_data_frame(cls, df, cat_flds, y=None):
---> 41 return cls.from_data_frames(df[cat_flds], df.drop(cat_flds, axis=1), y)
42
43
D:\Anaconda3\envs\fastai\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2131 if isinstance(key, (Series, np.ndarray, Index, list)):
2132 # either boolean or fancy integer index
-> 2133 return self._getitem_array(key)
2134 elif isinstance(key, DataFrame):
2135 return self._getitem_frame(key)
D:\Anaconda3\envs\fastai\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
2175 return self._take(indexer, axis=0, convert=False)
2176 else:
-> 2177 indexer = self.loc._convert_to_indexer(key, axis=1)
2178 return self._take(indexer, axis=1, convert=True)
2179
D:\Anaconda3\envs\fastai\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
1267 if mask.any():
1268 raise KeyError('{mask} not in index'
-> 1269 .format(mask=objarr[mask]))
1270
1271 return _values_from_object(indexer)
KeyError: "['PromoInterval' 'Events' 'Promo_fw' 'Promo_bw' 'StateHoliday_fw' 'StateHoliday_bw' 'SchoolHoliday_fw'\n 'SchoolHoliday_bw'] not in index"