Does anyone know how to deal with the following error with tabularlist?
----------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-8-ca8ea327f64f> in <module>
19 .split_by_idx(valid_indx)
20 # .split_by_rand_pct(0.2)
---> 21 .label_from_df(cols=dep_var)
22 .add_test(test,label=0)
23 .databunch())
~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/data_block.py in _inner(*args, **kwargs)
475 self.valid = fv(*args, from_item_lists=True, **kwargs)
476 self.__class__ = LabelLists
--> 477 self.process()
478 return self
479 return _inner
~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/data_block.py in process(self)
529 "Process the inner datasets."
530 xp,yp = self.get_processors()
--> 531 for ds,n in zip(self.lists, ['train','valid','test']): ds.process(xp, yp, name=n)
532 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
533 for ds in self.lists:
~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/data_block.py in process(self, xp, yp, name)
709 p.warns = []
710 self.x,self.y = self.x[~filt],self.y[~filt]
--> 711 self.x.process(xp)
712 return self
713
~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/data_block.py in process(self, processor)
81 if processor is not None: self.processor = processor
82 self.processor = listify(self.processor)
---> 83 for p in self.processor: p.process(self)
84 return self
85
~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/tabular/data.py in process(self, ds)
71 self.cat_names,self.cont_names = ds.cat_names,ds.cont_names
72 if len(ds.cat_names) != 0:
---> 73 ds.codes = np.stack([c.cat.codes.values for n,c in ds.inner_df[ds.cat_names].items()], 1).astype(np.int64) + 1
74 self.classes = ds.classes = OrderedDict({n:np.concatenate([['#na#'],c.cat.categories.values])
75 for n,c in ds.inner_df[ds.cat_names].items()})
~/anaconda3/envs/econda/lib/python3.7/site-packages/fastai/tabular/data.py in <listcomp>(.0)
71 self.cat_names,self.cont_names = ds.cat_names,ds.cont_names
72 if len(ds.cat_names) != 0:
---> 73 ds.codes = np.stack([c.cat.codes.values for n,c in ds.inner_df[ds.cat_names].items()], 1).astype(np.int64) + 1
74 self.classes = ds.classes = OrderedDict({n:np.concatenate([['#na#'],c.cat.categories.values])
75 for n,c in ds.inner_df[ds.cat_names].items()})
~/anaconda3/envs/econda/lib/python3.7/site-packages/pandas/core/generic.py in __getattr__(self, name)
5174 or name in self._accessors
5175 ):
-> 5176 return object.__getattribute__(self, name)
5177 else:
5178 if self._info_axis._can_hold_identifiers_and_holds_name(name):
~/anaconda3/envs/econda/lib/python3.7/site-packages/pandas/core/accessor.py in __get__(self, obj, cls)
173 # we're accessing the attribute of the class, i.e., Dataset.geo
174 return self._accessor
--> 175 accessor_obj = self._accessor(obj)
176 # Replace the property with the accessor object. Inspired by:
177 # http://www.pydanny.com/cached-property.html
~/anaconda3/envs/econda/lib/python3.7/site-packages/pandas/core/arrays/categorical.py in __init__(self, data)
2591
2592 def __init__(self, data):
-> 2593 self._validate(data)
2594 self._parent = data.values
2595 self._index = data.index
~/anaconda3/envs/econda/lib/python3.7/site-packages/pandas/core/arrays/categorical.py in _validate(data)
2601 if not is_categorical_dtype(data.dtype):
2602 raise AttributeError(
-> 2603 "Can only use .cat accessor with a " "'category' dtype"
2604 )
2605
AttributeError: Can only use .cat accessor with a 'category' dtype
I tried running my code without procs, using my own custom function below:
def convert_types(df,cat_names,cont_names):
dtypes = {cat:'category' for cat in cat_names}
dtypes.update({cont:'float32' for cont in cont_names})
df = df.astype(dtypes)
return df
But that didn’t seem to fix anything.
Here is some of my code:
def convert_types(df,cat_names,cont_names):
dtypes = {cat:'category' for cat in cat_names}
dtypes.update({cont:'float32' for cont in cont_names})
df = df.astype(dtypes)
return df
df = convert_types(df, cat_names, cont_names+[dep_var])
test_df = convert_types(test_df, cat_names, cont_names+[dep_var])
# Percent of original dataframe
test_pct = 0
valid_pct = 0.2
# Masks for separating dataframe sets
cut_test = int(test_pct * len(df))+1
cut_valid = int(valid_pct*len(df))+cut_test
# Test Dataframe: Use this if no separate test df provided
# test_df = df.iloc[cut_test:cut_valid].copy()
valid_indx = range(cut_test,cut_valid) # range of validation indices, used for fastai
# Initialize Test Data
test = TabularList.from_df(test_df, cat_names=cat_names, cont_names=cont_names)
# Initialize All Data
data = (TabularList.from_df(df=df, path=path, cat_names=cat_names, cont_names=cont_names)
.split_by_idx(valid_indx)
# .split_by_rand_pct(0.2)
.label_from_df(cols=dep_var)
.add_test(test,label=0)
.databunch())