I have a regular pandas dataframe with following columns and datatypes:
kundensegment category
kalkDB2 float64
angebotsjahr category
angebotsvolumen category
vertriebsweg category
angebotsstatus category
produkt category
region category
ausschreibung category
dtype: object
I have defined categorical and continuous variables like so:
cat_vars = ['kundensegment', 'angebotsjahr', 'angebotsvolumen', 'vertriebsweg', 'angebotsstatus', 'produkt', 'region', 'ausschreibung']
cont_vars = ['kalkDB2']
When trying to build a TabularPandas object with to = TabularPandas(df, cat, cat_vars)
I get following error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-105-7179139bdcc4> in <module>
----> 1 to = TabularPandas(df, cat, cat_vars)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastai2/tabular/core.py in __init__(self, df, procs, cat_names, cont_names, y_names, block_y, splits, do_setup, device)
121 self.cat_names,self.cont_names,self.procs = L(cat_names),L(cont_names),Pipeline(procs)
122 self.split = len(splits[0])
--> 123 if do_setup: self.setup()
124
125 def new(self, df):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastai2/tabular/core.py in setup(self)
132 def decode_row(self, row): return self.new(pd.DataFrame(row).T).decode().items.iloc[0]
133 def show(self, max_n=10, **kwargs): display_df(self.new(self.all_cols[:max_n]).decode().items)
--> 134 def setup(self): self.procs.setup(self)
135 def process(self): self.procs(self)
136 def loc(self): return self.items.loc
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastcore/transform.py in setup(self, items, train_setup)
177 tfms = self.fs[:]
178 self.fs.clear()
--> 179 for t in tfms: self.add(t,items, train_setup)
180
181 def add(self,t, items=None, train_setup=False):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastcore/transform.py in add(self, t, items, train_setup)
180
181 def add(self,t, items=None, train_setup=False):
--> 182 t.setup(items, train_setup)
183 self.fs.append(t)
184
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastai2/tabular/core.py in setup(self, items, train_setup)
178 super().setup(getattr(items,'train',items), train_setup=False)
179 # Procs are called as soon as data is available
--> 180 return self(items.items if isinstance(items,Datasets) else items)
181
182 # Cell
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastcore/transform.py in __call__(self, x, **kwargs)
70 @property
71 def name(self): return getattr(self, '_name', _get_name(self))
---> 72 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
73 def decode (self, x, **kwargs): return self._call('decodes', x, **kwargs)
74 def __repr__(self): return f'{self.name}: {self.encodes} {self.decodes}'
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
94 "A `Transform` that modifies in-place and just returns whatever it's passed"
95 def _call(self, fn, x, split_idx=None, **kwargs):
---> 96 super()._call(fn,x,split_idx,**kwargs)
97 return x
98
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
81 if split_idx!=self.split_idx and self.split_idx is not None: return x
82 f = getattr(self, fn)
---> 83 if not _is_tuple(x): return self._do_call(f, x, **kwargs)
84 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
85 return retain_type(res, x)
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
86
87 def _do_call(self, f, x, **kwargs):
---> 88 return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
89
90 add_docs(Transform, decode="Delegate to `decodes` to undo transform", setup="Delegate to `setups` to set up transform")
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
96 if not f: return args[0]
97 if self.inst is not None: f = MethodType(f, self.inst)
---> 98 return f(*args, **kwargs)
99
100 def __get__(self, inst, owner):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastai2/tabular/core.py in encodes(self, to)
194 self.classes = {n:CategoryMap(to.iloc[:,n].items, add_na=(n in to.cat_names)) for n in to.cat_names}
195
--> 196 def encodes(self, to): to.transform(to.cat_names, partial(_apply_cats, self.classes, 1))
197 def decodes(self, to): to.transform(to.cat_names, partial(_decode_cats, self.classes))
198 def __getitem__(self,k): return self.classes[k]
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastai2/tabular/core.py in transform(self, cols, f, all_col)
155 def transform(self, cols, f, all_col=True):
156 if not all_col: cols = [c for c in cols if c in self.items.columns]
--> 157 if len(cols) > 0: self[cols] = self[cols].transform(f)
158
159 # Cell
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/fastcore/foundation.py in __getitem__(self, k)
268 def __init__(self, items): self.items = items
269 def __len__(self): return len(self.items)
--> 270 def __getitem__(self, k): return self.items[k]
271 def __setitem__(self, k, v): self.items[list(k) if isinstance(k,CollBase) else k] = v
272 def __delitem__(self, i): del(self.items[i])
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2686 return self._getitem_multilevel(key)
2687 else:
-> 2688 return self._getitem_column(key)
2689
2690 def _getitem_column(self, key):
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2693 # get column
2694 if self.columns.is_unique:
-> 2695 return self._get_item_cache(key)
2696
2697 # duplicate columns & possible reduce dimensionality
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
2485 """Return the cached item, item represents a label indexer."""
2486 cache = self._item_cache
-> 2487 res = cache.get(item)
2488 if res is None:
2489 values = self._data.get(item)
TypeError: unhashable type: 'L'
I can’t figure out where fastai fails to build the TabularPandas object… any hints?