I am working on duplicating the Rossman NB. I had run that NB with fastai v1 code just a month back and gotten it to completion on collab with an exp_RMSPE score of ~.109
Now (1/23/2020) when I tried to duplicate that notebook I run into an error when I execute:
data = (TabularList.from_df(df, path=path, cat_names=cat_vars, cont_names=cont_vars, procs=procs,)
.split_by_idx(valid_idx)
.label_from_df(cols=dep_var, label_cls=FloatList, log=True)
.add_test(TabularList.from_df(test_df, path=path, cat_names=cat_vars, cont_names=cont_vars))
.databunch())
The error is:
AttributeError Traceback (most recent call last)
in ()
1 data = (TabularList.from_df(df, path=path, cat_names=cat_vars, cont_names=cont_vars, procs=procs,)
2 .split_by_idx(valid_idx)
----> 3 .label_from_df(cols=dep_var, label_cls=FloatList, log=True)
4 .add_test(TabularList.from_df(test_df, path=path, cat_names=cat_vars, cont_names=cont_vars))
5 .databunch())
7 frames
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in _inner(*args, **kwargs)
478 self.valid = fv(*args, from_item_lists=True, **kwargs)
479 self.class = LabelLists
–> 480 self.process()
481 return self
482 return _inner
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self)
532 “Process the inner datasets.”
533 xp,yp = self.get_processors()
–> 534 for ds,n in zip(self.lists, [‘train’,‘valid’,‘test’]): ds.process(xp, yp, name=n)
535 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
536 for ds in self.lists:
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, xp, yp, name, max_warn_items)
712 p.warns = []
713 self.x,self.y = self.x[~filt],self.y[~filt]
–> 714 self.x.process(xp)
715 return self
716
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, processor)
82 if processor is not None: self.processor = processor
83 self.processor = listify(self.processor)
—> 84 for p in self.processor: p.process(self)
85 return self
86
/usr/local/lib/python3.6/dist-packages/fastai/tabular/data.py in process(self, ds)
64 #cat and cont names may have been changed by transform (like Fill_NA)
65 proc = proc(ds.cat_names, ds.cont_names)
—> 66 proc(ds.inner_df)
67 ds.cat_names,ds.cont_names = proc.cat_names,proc.cont_names
68 self.procs[i] = proc
/usr/local/lib/python3.6/dist-packages/fastai/tabular/transform.py in call(self, df, test)
122 “Apply the correct function to df
depending on test
.”
123 func = self.apply_test if test else self.apply_train
–> 124 func(df)
125
126 def apply_train(self, df:DataFrame):
/usr/local/lib/python3.6/dist-packages/fastai/tabular/transform.py in apply_train(self, df)
137 self.categories = {}
138 for n in self.cat_names:
–> 139 df.loc[:,n] = df.loc[:,n].astype(‘category’).cat.as_ordered()
140 self.categories[n] = df[n].cat.categories
141
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in getattr(self, name)
5177 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5178 return self[name]
-> 5179 return object.getattribute(self, name)
5180
5181 def setattr(self, name, value):
AttributeError: ‘DataFrame’ object has no attribute ‘cat’
When I looked into it at SO I see:
The .cat
attribute is a categorical accessor associated with categorical
dtype
which is what the code seem to do in fastai
Did something change in most recent Pandas version (0.25.3) ?? Am I doing something wrong?