Error running the lesson 3 Rossmann notebook: KeyError: "['PromoInterval'

I’m trying to run the Rossmann notebook. I’ve downloaded the files with extra data from the referenced link.

It runs well till this step (number 78):

md = ColumnarModelData.from_data_frame(PATH, val_idx, df, yl.astype(np.float32), cat_flds=cat_vars, bs=128,
                                       test_df=df_test)

The error says that it is missing some columns:

KeyError: "['PromoInterval' 'Events' 'Promo_fw' 'Promo_bw' 'StateHoliday_fw' 'StateHoliday_bw' 'SchoolHoliday_fw'\n 'SchoolHoliday_bw'] not in index"

How do I fix it? I would like to run it till the end.

Here is the complete stack trace:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-78-9df421910b1c> in <module>()
      1 md = ColumnarModelData.from_data_frame(PATH, val_idx, df, yl.astype(np.float32), cat_flds=cat_vars, bs=128,
----> 2                                        test_df=df_test)

d:\dev\fastai\fastai\courses\dl1\fastai\column_data.py in from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, test_df)
     64     def from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, test_df=None):
     65         ((val_df, trn_df), (val_y, trn_y)) = split_by_idx(val_idxs, df, y)
---> 66         return cls.from_data_frames(path, trn_df, val_df, trn_y, val_y, cat_flds, bs, test_df=test_df)
     67 
     68     def get_learner(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,

d:\dev\fastai\fastai\courses\dl1\fastai\column_data.py in from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, test_df)
     58     def from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, test_df=None):
     59         test_ds = ColumnarDataset.from_data_frame(test_df, cat_flds) if test_df is not None else None
---> 60         return cls(path, ColumnarDataset.from_data_frame(trn_df, cat_flds, trn_y),
     61                     ColumnarDataset.from_data_frame(val_df, cat_flds, val_y), bs, test_ds=test_ds)
     62 

d:\dev\fastai\fastai\courses\dl1\fastai\column_data.py in from_data_frame(cls, df, cat_flds, y)
     39     @classmethod
     40     def from_data_frame(cls, df, cat_flds, y=None):
---> 41         return cls.from_data_frames(df[cat_flds], df.drop(cat_flds, axis=1), y)
     42 
     43 

D:\Anaconda3\envs\fastai\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2131         if isinstance(key, (Series, np.ndarray, Index, list)):
   2132             # either boolean or fancy integer index
-> 2133             return self._getitem_array(key)
   2134         elif isinstance(key, DataFrame):
   2135             return self._getitem_frame(key)

D:\Anaconda3\envs\fastai\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
   2175             return self._take(indexer, axis=0, convert=False)
   2176         else:
-> 2177             indexer = self.loc._convert_to_indexer(key, axis=1)
   2178             return self._take(indexer, axis=1, convert=True)
   2179 

D:\Anaconda3\envs\fastai\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
   1267                 if mask.any():
   1268                     raise KeyError('{mask} not in index'
-> 1269                                    .format(mask=objarr[mask]))
   1270 
   1271                 return _values_from_object(indexer)

KeyError: "['PromoInterval' 'Events' 'Promo_fw' 'Promo_bw' 'StateHoliday_fw' 'StateHoliday_bw' 'SchoolHoliday_fw'\n 'SchoolHoliday_bw'] not in index"

I just found the answer in another post: Rossmann - NaN in processed dataframe

1 Like