Hello,
I am trying to use a Tabular model, but I keep geeting error. When I define the databunch like this:
data = (TabularList.from_df(df_new, cat_names=cat_names, cont_names=cont_names, procs=procs)
.split_by_idx(list(range(800,1000)))
.label_from_df(cols=dep_var, label_cls=FloatList)
I get error
KeyError Traceback (most recent call last)
c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: ‘a’
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
in
2 # .split_by_idx(list(idx))
3 .split_by_idx(list(range(800,1000)))
----> 4 .label_from_df(cols=dep_var, label_cls=FloatList)
5 # .add_test(test)
6 # .databunch()c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in _inner(*args, **kwargs)
475 self.valid = fv(*args, from_item_lists=True, **kwargs)
476 self.class = LabelLists
–> 477 self.process()
478 return self
479 return _innerc:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self)
529 “Process the inner datasets.”
530 xp,yp = self.get_processors()
–> 531 for ds,n in zip(self.lists, [‘train’,‘valid’,‘test’]): ds.process(xp, yp, name=n)
532 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
533 for ds in self.lists:c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self, xp, yp, name)
709 p.warns = []
710 self.x,self.y = self.x[~filt],self.y[~filt]
–> 711 self.x.process(xp)
712 return self
713c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self, processor)
81 if processor is not None: self.processor = processor
82 self.processor = listify(self.processor)
—> 83 for p in self.processor: p.process(self)
84 return self
85c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\data.py in process(self, ds)
66 #cat and cont names may have been changed by transform (like Fill_NA)
67 proc = proc(ds.cat_names, ds.cont_names)
—> 68 proc(ds.inner_df)
69 ds.cat_names,ds.cont_names = proc.cat_names,proc.cont_names
70 self.procs[i] = procc:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\transform.py in call(self, df, test)
122 “Apply the correct function todf
depending ontest
.”
123 func = self.apply_test if test else self.apply_train
–> 124 func(df)
125
126 def apply_train(self, df:DataFrame):c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\transform.py in apply_train(self, df)
157 self.na_dict = {}
158 for name in self.cont_names:
–> 159 if pd.isnull(df[name]).sum():
160 if self.add_col:
161 df[name+’_na’] = pd.isnull(df[name])c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\pandas\core\frame.py in getitem(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: ‘a’
When I define the databunch only with cont values I get:
data = (TabularList.from_df(df_new, cont_names=cont_names)
.split_by_idx(list(range(800,1000)))
.label_from_df(cols=dep_var, label_cls=FloatList)
I get the error
AttributeError Traceback (most recent call last)
in
2 # .split_by_idx(list(idx))
3 .split_by_idx(list(range(800,1000)))
----> 4 .label_from_df(cols=dep_var, label_cls=FloatList)
5 # .add_test(test)
6 # .databunch()c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in _inner(*args, **kwargs)
475 self.valid = fv(*args, from_item_lists=True, **kwargs)
476 self.class = LabelLists
–> 477 self.process()
478 return self
479 return _innerc:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self)
529 “Process the inner datasets.”
530 xp,yp = self.get_processors()
–> 531 for ds,n in zip(self.lists, [‘train’,‘valid’,‘test’]): ds.process(xp, yp, name=n)
532 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
533 for ds in self.lists:c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self, xp, yp, name)
709 p.warns = []
710 self.x,self.y = self.x[~filt],self.y[~filt]
–> 711 self.x.process(xp)
712 return self
713c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self, processor)
81 if processor is not None: self.processor = processor
82 self.processor = listify(self.processor)
—> 83 for p in self.processor: p.process(self)
84 return self
85c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\data.py in process(self, ds)
77 else: ds.codes,ds.classes,self.classes,cat_cols = None,None,None,[]
78 if len(ds.cont_names) != 0:
—> 79 ds.conts = np.stack([c.astype(‘float32’).values for n,c in ds.inner_df[ds.cont_names].items()], 1)
80 cont_cols = list(ds.inner_df[ds.cont_names].columns.values)
81 else: ds.conts,cont_cols = None,[]c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\data.py in (.0)
77 else: ds.codes,ds.classes,self.classes,cat_cols = None,None,None,[]
78 if len(ds.cont_names) != 0:
—> 79 ds.conts = np.stack([c.astype(‘float32’).values for n,c in ds.inner_df[ds.cont_names].items()], 1)
80 cont_cols = list(ds.inner_df[ds.cont_names].columns.values)
81 else: ds.conts,cont_cols = None,[]AttributeError: ‘float’ object has no attribute ‘astype’
I have looked into the source code, and I find that when I define
temp= (TabularList.from_df(df_new, cont_names=cont_names)
.split_by_idx(list(idx))
.split_by_idx(list(range(800,1000)))
and calling
temp.get_label_cls
I have that it returns the function
<function fastai.data_block.ItemLists.__getattr__.<locals>._inner(*args, **kwargs)>
I have looked into the source code but I can’t understand why this doesn’t work.
The example from the lessons in part 1 lesson 4 work perfectly. So libraries and dependencies works.
More information about data:
dep_var = "ro"
cont_names = "at"
cat_names = ["Month"]
procs = [FillMissing, Categorify, Normalize]
df_new.head()
returns
||ro|at|Month|
|0|659.5086|1.7277|dec|
|1|659.5152|1.8265|dec|
|2|659.5218|1.8368|dec|
|3|659.5284|1.8082|dec|
|4|659.5350|1.7563|dec|
and
df_new.dtypes
returns
ro float64
at float64
Month object
dtype: object
There are no NA’s, nulls or NaN in data.
Hope someone has the solution. I have looked through the forum, but I haven’t seen anyone which has meet this problem. There was some wit wrong type when using fit, but the solution did not fit this problem.
Regards,
s
Edit
Solution
It worked for me to make the dep_var, cont_names and cat_names as lists. Hence by typing:
dep_var = ['ro']
cont_names = ['at']
cat_names = ['Month']
procs = [FillMissing, Categorify, Normalize]
I don’t know why this is working. Is it because python, fastai or pytorch behave different on lists and string? I would very much like to learn why this is the case so please respond if you know the answer.