I’m almost done re-using the Rossmann notebook on my own structured data.
By the end of the notebook, on this line:
md = ColumnarModelData.from_data_frame(PATH, val_idx, df, y.astype(np.float32), cat_flds=cat_vars, bs=128, test_df=df_test)
I get the error message: “[‘date’] not in index”
Full error message at the end of my post.
I don’t have a proper date format field in my structure data but instead have a unix timestamp that i use as a date, format is integer. So i set the index of my df and df_test to that field called ‘date’. For example:
df_test.index returns
Int64Index([1533822671448, 1533822727393, 1533822789815, 1533822851463, 1533822908572, 1533822970676, 1533823034248, 1533823091596, 1533823147904, 1533823210017, ... 1539507790976, 1539507849436, 1539507907015, 1539507969443, 1539508028040, 1539508091164, 1539508148007, 1539508208619, 1539508270114, 1539508332120], dtype='int64', name='date', length=94637)
Not sure what i should change. Maybe a problem with val_idx ?
Complete error message
KeyError Traceback (most recent call last)
in ()
1 #y below is gain_loss / cat_flds=cat_vars is to tell which fields are categories
2 md = ColumnarModelData.from_data_frame(PATH, val_idx, df, y.astype(np.float32), cat_flds=cat_vars, bs=128,
----> 3 test_df=df_test)~/fastai/courses/dl1/fastai/column_data.py in from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, is_reg, is_multi, test_df) 72 def from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, is_reg=True, is_multi=False, test_df=None): 73 ((val_df, trn_df), (val_y, trn_y)) = split_by_idx(val_idxs, df, y) ---> 74 return cls.from_data_frames(path, trn_df, val_df, trn_y, val_y, cat_flds, bs, is_reg, is_multi, test_df=test_df) 75 76 def get_learner(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops, ~/fastai/courses/dl1/fastai/column_data.py in from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, is_reg, is_multi, test_df) 64 @classmethod 65 def from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, is_reg, is_multi, test_df=None): ---> 66 trn_ds = ColumnarDataset.from_data_frame(trn_df, cat_flds, trn_y, is_reg, is_multi) 67 val_ds = ColumnarDataset.from_data_frame(val_df, cat_flds, val_y, is_reg, is_multi) 68 test_ds = ColumnarDataset.from_data_frame(test_df, cat_flds, None, is_reg, is_multi) if test_df is not None else None ~/fastai/courses/dl1/fastai/column_data.py in from_data_frame(cls, df, cat_flds, y, is_reg, is_multi) 45 @classmethod 46 def from_data_frame(cls, df, cat_flds, y=None, is_reg=True, is_multi=False): ---> 47 return cls.from_data_frames(df[cat_flds], df.drop(cat_flds, axis=1), y, is_reg, is_multi) 48 49 ~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key) 2680 if isinstance(key, (Series, np.ndarray, Index, list)): 2681 # either boolean or fancy integer index -> 2682 return self._getitem_array(key) 2683 elif isinstance(key, DataFrame): 2684 return self._getitem_frame(key) ~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_array(self, key) 2724 return self._take(indexer, axis=0) 2725 else: -> 2726 indexer = self.loc._convert_to_indexer(key, axis=1) 2727 return self._take(indexer, axis=1) 2728 ~/anaconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter) 1325 if mask.any(): 1326 raise KeyError('{mask} not in index' -> 1327 .format(mask=objarr[mask])) 1328 1329 return com._values_from_object(indexer) KeyError: "['date'] not in index"