Tabular error: AttributeError: 'float' object has no attribute 'astype' (Edited with solution)

Hello,

I am trying to use a Tabular model, but I keep geeting error. When I define the databunch like this:

data = (TabularList.from_df(df_new, cat_names=cat_names, cont_names=cont_names, procs=procs)
.split_by_idx(list(range(800,1000)))
.label_from_df(cols=dep_var, label_cls=FloatList)

I get error


KeyError Traceback (most recent call last)
c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2656 try:
-> 2657 return self._engine.get_loc(key)
2658 except KeyError:

pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: ‘a’

During handling of the above exception, another exception occurred:

KeyError Traceback (most recent call last)
in
2 # .split_by_idx(list(idx))
3 .split_by_idx(list(range(800,1000)))
----> 4 .label_from_df(cols=dep_var, label_cls=FloatList)
5 # .add_test(test)
6 # .databunch()

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in _inner(*args, **kwargs)
475 self.valid = fv(*args, from_item_lists=True, **kwargs)
476 self.class = LabelLists
–> 477 self.process()
478 return self
479 return _inner

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self)
529 “Process the inner datasets.”
530 xp,yp = self.get_processors()
–> 531 for ds,n in zip(self.lists, [‘train’,‘valid’,‘test’]): ds.process(xp, yp, name=n)
532 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
533 for ds in self.lists:

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self, xp, yp, name)
709 p.warns = []
710 self.x,self.y = self.x[~filt],self.y[~filt]
–> 711 self.x.process(xp)
712 return self
713

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self, processor)
81 if processor is not None: self.processor = processor
82 self.processor = listify(self.processor)
—> 83 for p in self.processor: p.process(self)
84 return self
85

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\data.py in process(self, ds)
66 #cat and cont names may have been changed by transform (like Fill_NA)
67 proc = proc(ds.cat_names, ds.cont_names)
—> 68 proc(ds.inner_df)
69 ds.cat_names,ds.cont_names = proc.cat_names,proc.cont_names
70 self.procs[i] = proc

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\transform.py in call(self, df, test)
122 “Apply the correct function to df depending on test.”
123 func = self.apply_test if test else self.apply_train
–> 124 func(df)
125
126 def apply_train(self, df:DataFrame):

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\transform.py in apply_train(self, df)
157 self.na_dict = {}
158 for name in self.cont_names:
–> 159 if pd.isnull(df[name]).sum():
160 if self.add_col:
161 df[name+’_na’] = pd.isnull(df[name])

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\pandas\core\frame.py in getitem(self, key)
2925 if self.columns.nlevels > 1:
2926 return self._getitem_multilevel(key)
-> 2927 indexer = self.columns.get_loc(key)
2928 if is_integer(indexer):
2929 indexer = [indexer]

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2657 return self._engine.get_loc(key)
2658 except KeyError:
-> 2659 return self._engine.get_loc(self._maybe_cast_indexer(key))
2660 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2661 if indexer.ndim > 1 or indexer.size > 1:

pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: ‘a’

When I define the databunch only with cont values I get:

data = (TabularList.from_df(df_new, cont_names=cont_names)
.split_by_idx(list(range(800,1000)))
.label_from_df(cols=dep_var, label_cls=FloatList)

I get the error


AttributeError Traceback (most recent call last)
in
2 # .split_by_idx(list(idx))
3 .split_by_idx(list(range(800,1000)))
----> 4 .label_from_df(cols=dep_var, label_cls=FloatList)
5 # .add_test(test)
6 # .databunch()

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in _inner(*args, **kwargs)
475 self.valid = fv(*args, from_item_lists=True, **kwargs)
476 self.class = LabelLists
–> 477 self.process()
478 return self
479 return _inner

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self)
529 “Process the inner datasets.”
530 xp,yp = self.get_processors()
–> 531 for ds,n in zip(self.lists, [‘train’,‘valid’,‘test’]): ds.process(xp, yp, name=n)
532 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
533 for ds in self.lists:

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self, xp, yp, name)
709 p.warns = []
710 self.x,self.y = self.x[~filt],self.y[~filt]
–> 711 self.x.process(xp)
712 return self
713

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\data_block.py in process(self, processor)
81 if processor is not None: self.processor = processor
82 self.processor = listify(self.processor)
—> 83 for p in self.processor: p.process(self)
84 return self
85

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\data.py in process(self, ds)
77 else: ds.codes,ds.classes,self.classes,cat_cols = None,None,None,[]
78 if len(ds.cont_names) != 0:
—> 79 ds.conts = np.stack([c.astype(‘float32’).values for n,c in ds.inner_df[ds.cont_names].items()], 1)
80 cont_cols = list(ds.inner_df[ds.cont_names].columns.values)
81 else: ds.conts,cont_cols = None,[]

c:\users\sosc\appdata\local\programs\python\python37\lib\site-packages\fastai\tabular\data.py in (.0)
77 else: ds.codes,ds.classes,self.classes,cat_cols = None,None,None,[]
78 if len(ds.cont_names) != 0:
—> 79 ds.conts = np.stack([c.astype(‘float32’).values for n,c in ds.inner_df[ds.cont_names].items()], 1)
80 cont_cols = list(ds.inner_df[ds.cont_names].columns.values)
81 else: ds.conts,cont_cols = None,[]

AttributeError: ‘float’ object has no attribute ‘astype’

I have looked into the source code, and I find that when I define

temp= (TabularList.from_df(df_new, cont_names=cont_names)
.split_by_idx(list(idx))
.split_by_idx(list(range(800,1000)))

and calling

 temp.get_label_cls

I have that it returns the function

<function fastai.data_block.ItemLists.__getattr__.<locals>._inner(*args, **kwargs)>

I have looked into the source code but I can’t understand why this doesn’t work.

The example from the lessons in part 1 lesson 4 work perfectly. So libraries and dependencies works.

More information about data:

dep_var = "ro"
cont_names = "at"
cat_names = ["Month"]
procs = [FillMissing, Categorify, Normalize]

df_new.head()

returns

||ro|at|Month|
|0|659.5086|1.7277|dec|
|1|659.5152|1.8265|dec|
|2|659.5218|1.8368|dec|
|3|659.5284|1.8082|dec|
|4|659.5350|1.7563|dec|

and

df_new.dtypes

returns

ro       float64
at       float64
Month     object
dtype: object

There are no NA’s, nulls or NaN in data.

Hope someone has the solution. I have looked through the forum, but I haven’t seen anyone which has meet this problem. There was some wit wrong type when using fit, but the solution did not fit this problem.

Regards,
s

Edit

Solution

It worked for me to make the dep_var, cont_names and cat_names as lists. Hence by typing:

dep_var = ['ro']
cont_names = ['at']
cat_names = ['Month']
procs = [FillMissing, Categorify, Normalize]

I don’t know why this is working. Is it because python, fastai or pytorch behave different on lists and string? I would very much like to learn why this is the case so please respond if you know the answer.

Solution

It worked for me to make the dep_var, cont_names and cat_names as lists. Hence by typing:

dep_var = ['ro']
cont_names = ['at']
cat_names = ['Month']
procs = [FillMissing, Categorify, Normalize]

I don’t know why this is working. Is it because python, fastai or pytorch behave different on lists and string? I would very much like to learn why this is the case so please respond if you know the answer.

The variables are expected to be lists as the transforms are applied to lists. The dependent variable I believe is the only one that is the exception. That’s why you were getting the error for A, as a string is a list of characters so it’ll do list[x] when applying, and a string of ‘at’ at index 0 is a.

1 Like