Edit: I made a stupid typo and used the wrong dataframe. The library is working as it should.
I have a column of strings and use train_cats(df) to convert it to categories. I don’t understand why I have to do this:
df.garageCode = df.garageCode.cat.codes
Otherwise the code fails to create the model data
md = ColumnarModelData.from_data_frame(PATH, val_idx, df, yl.astype(np.float32), cat_flds=cat_vars, bs=128)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-62-07ea0cb85ec0> in <module>()
----> 1 md = ColumnarModelData.from_data_frame(PATH, val_idx, df, yl.astype(np.float32), cat_flds=cat_vars, bs=128)
~/fastai/courses/ml1/fastai/column_data.py in from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, is_reg, is_multi, test_df)
71 def from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, is_reg=True, is_multi=False, test_df=None):
72 ((val_df, trn_df), (val_y, trn_y)) = split_by_idx(val_idxs, df, y)
---> 73 return cls.from_data_frames(path, trn_df, val_df, trn_y, val_y, cat_flds, bs, is_reg, is_multi, test_df=test_df)
74
75 def get_learner(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,
~/fastai/courses/ml1/fastai/column_data.py in from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, is_reg, is_multi, test_df)
65 def from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, is_reg, is_multi, test_df=None):
66 test_ds = ColumnarDataset.from_data_frame(test_df, cat_flds, None, is_reg, is_multi) if test_df is not None else None
---> 67 return cls(path, ColumnarDataset.from_data_frame(trn_df, cat_flds, trn_y, is_reg, is_multi),
68 ColumnarDataset.from_data_frame(val_df, cat_flds, val_y, is_reg, is_multi), bs, test_ds=test_ds)
69
~/fastai/courses/ml1/fastai/column_data.py in from_data_frame(cls, df, cat_flds, y, is_reg, is_multi)
45 @classmethod
46 def from_data_frame(cls, df, cat_flds, y=None, is_reg=True, is_multi=False):
---> 47 return cls.from_data_frames(df[cat_flds], df.drop(cat_flds, axis=1), y, is_reg, is_multi)
48
49
~/fastai/courses/ml1/fastai/column_data.py in from_data_frames(cls, df_cat, df_cont, y, is_reg, is_multi)
41 cat_cols = [c.values for n,c in df_cat.items()]
42 cont_cols = [c.values for n,c in df_cont.items()]
---> 43 return cls(cat_cols, cont_cols, y, is_reg, is_multi)
44
45 @classmethod
~/fastai/courses/ml1/fastai/column_data.py in __init__(self, cats, conts, y, is_reg, is_multi)
24 def __init__(self, cats, conts, y, is_reg, is_multi):
25 n = len(cats[0]) if cats else len(conts[0])
---> 26 self.cats = np.stack(cats, 1).astype(np.int64) if cats else np.zeros((n,1))
27 self.conts = np.stack(conts, 1).astype(np.float32) if conts else np.zeros((n,1))
28 self.y = np.zeros((n,1)) if y is None else y
ValueError: invalid literal for int() with base 10: 'NORREPORT'