TypeError from None dates

MathiasKindberg · December 14, 2020, 3:25pm

Edit: Looks like I fixed it for now manually finding and converting the columns.

df['date_Week'] = df['date_Week'].astype(float)

This seems like some leaky abstraction between Fast.ai and Pandas doing the week conversion, the conversion back to float is given in the documentation regarding the experimental nullable integer type here: https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html#integer-na

Original post:

I’m essentially following Chapter 9, tabular data, though using my own data to get a baseline to start working with Fast.ai from.

I’m running into issues with missing dates.

According to https://docs.fast.ai/tabular.core.html#add_datepart None dates should be handled, but for me the week part seems to be wrong. Any easy workaround to this or am I missing something?

The data is loaded into TabularPandas pretty much exactly as given in the book.

# Earlier defining my data
make_date(df, 'date')
df = add_datepart(df, 'date')

# Later loading it.

splits = RandomSplitter()(range_of(sd_df))
procs = [Categorify, FillMissing]
cont, cat = cont_cat_split(df, 1, dep_var=dep_var)
to = TabularPandas(df, procs, cat, cont, y_names=dep_var, splits=splits)

The weird week part.

df['date_Week'].unique()

<IntegerArray>
[<NA>,   13,   17,    5,   22,   15,   31,   44,   47,   11,   27,   52,   49,
40,   23,   43,   26,   48,   12,   37,   36,    1,   30,   25,   42,   45,
28,   46,   33,   41,    7,   29,   39,   21,    6,   24,   16,    3,   50,
19,    8,    2,    9,   38,   51,   10,   35,   14,    4,   32,   34,   18,
20,   53]
Length: 54, dtype: UInt32

While day which seems to be working correctly giving:

df['date_day'].unique()

array([nan, 30., 21.,  6.,  1., 14., 31., 28., 15.,  3.,  5., 12., 22., 23., 29., 19., 11., 27.,  7., 20., 25., 16., 18.,  8., 17., 24., 10., 13.,  4.,  2., 26.,  9.])

The entire error is as follows:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-27-d26047aee92e> in <module>
    8 procs = [Categorify, FillMissing]
    9 cont, cat = cont_cat_split(df, 1, dep_var=dep_var)
---> 10 to = TabularPandas(df, procs, cat, cont, y_names=dep_var, splits=splits)
    11 
    12 save_pickle('./tabular_data.pkl',to)

~\Anaconda3\envs\fastai\lib\site-packages\fastai\tabular\core.py in __init__(self, df, procs, cat_names, cont_names, y_names, y_block, splits, do_setup, device, inplace, reduce_memory)
    163         self.cat_names,self.cont_names,self.procs = L(cat_names),L(cont_names),Pipeline(procs)
    164         self.split = len(df) if splits is None else len(splits[0])
--> 165         if do_setup: self.setup()
    166 
    167     def new(self, df):

~\Anaconda3\envs\fastai\lib\site-packages\fastai\tabular\core.py in setup(self)
    174     def decode_row(self, row): return self.new(pd.DataFrame(row).T).decode().items.iloc[0]
    175     def show(self, max_n=10, **kwargs): display_df(self.new(self.all_cols[:max_n]).decode().items)
--> 176     def setup(self): self.procs.setup(self)
    177     def process(self): self.procs(self)
    178     def loc(self): return self.items.loc

~\Anaconda3\envs\fastai\lib\site-packages\fastcore\transform.py in setup(self, items, train_setup)
    190         tfms = self.fs[:]
    191         self.fs.clear()
--> 192         for t in tfms: self.add(t,items, train_setup)
    193 
    194     def add(self,t, items=None, train_setup=False):

~\Anaconda3\envs\fastai\lib\site-packages\fastcore\transform.py in add(self, t, items, train_setup)
    193 
    194     def add(self,t, items=None, train_setup=False):
--> 195         t.setup(items, train_setup)
    196         self.fs.append(t)
    197 

~\Anaconda3\envs\fastai\lib\site-packages\fastai\tabular\core.py in setup(self, items, train_setup)
    219     "Base class to write a non-lazy tabular processor for dataframes"
    220     def setup(self, items=None, train_setup=False): #TODO: properly deal with train_setup
--> 221         super().setup(getattr(items,'train',items), train_setup=False)
    222         # Procs are called as soon as data is available
    223         return self(items.items if isinstance(items,Datasets) else items)

~\Anaconda3\envs\fastai\lib\site-packages\fastcore\transform.py in setup(self, items, train_setup)
    77     def setup(self, items=None, train_setup=False):
    78         train_setup = train_setup if self.train_setup is None else self.train_setup
---> 79         return self.setups(getattr(items, 'train', items) if train_setup else items)
    80 
    81     def _call(self, fn, x, split_idx=None, **kwargs):

~\Anaconda3\envs\fastai\lib\site-packages\fastcore\dispatch.py in __call__(self, *args, **kwargs)
    115         elif self.inst is not None: f = MethodType(f, self.inst)
    116         elif self.owner is not None: f = MethodType(f, self.owner)
--> 117         return f(*args, **kwargs)
    118 
    119     def __get__(self, inst, owner):

~\Anaconda3\envs\fastai\lib\site-packages\fastai\tabular\core.py in setups(self, to)
    238     order = 1
    239     def setups(self, to):
--> 240         store_attr(classes={n:CategoryMap(to.iloc[:,n].items, add_na=(n in to.cat_names)) for n in to.cat_names})
    241 
    242     def encodes(self, to): to.transform(to.cat_names, partial(_apply_cats, self.classes, 1))

~\Anaconda3\envs\fastai\lib\site-packages\fastai\tabular\core.py in <dictcomp>(.0)
    238     order = 1
    239     def setups(self, to):
--> 240         store_attr(classes={n:CategoryMap(to.iloc[:,n].items, add_na=(n in to.cat_names)) for n in to.cat_names})
    241 
    242     def encodes(self, to): to.transform(to.cat_names, partial(_apply_cats, self.classes, 1))

~\Anaconda3\envs\fastai\lib\site-packages\fastai\data\transforms.py in __init__(self, col, sort, add_na, strict)
    212             if not hasattr(col,'unique'): col = L(col, use_list=True)
    213             # `o==o` is the generalized definition of non-NaN used by Pandas
--> 214             items = L(o for o in col.unique() if o==o)
    215             if sort: items = items.sorted()
    216         self.items = '#na#' + items if add_na else items

~\Anaconda3\envs\fastai\lib\site-packages\fastcore\foundation.py in __call__(cls, x, *args, **kwargs)
    120     def __call__(cls, x=None, *args, **kwargs):
    121         if not args and not kwargs and x is not None and isinstance(x,cls): return x
--> 122         return super().__call__(x, *args, **kwargs)
    123 
    124 # Cell

~\Anaconda3\envs\fastai\lib\site-packages\fastcore\foundation.py in __init__(self, items, use_list, match, *rest)
    128     def __init__(self, items=None, *rest, use_list=False, match=None):
    129         if (use_list is not None) or not is_array(items):
--> 130             items = listify(items, *rest, use_list=use_list, match=match)
    131         super().__init__(items)
    132 

~\Anaconda3\envs\fastai\lib\site-packages\fastcore\basics.py in listify(o, use_list, match, *rest)
    54     elif isinstance(o, list): res = o
    55     elif isinstance(o, str) or is_array(o): res = [o]
---> 56     elif is_iter(o): res = list(o)
    57     else: res = [o]
    58     if match is not None:

~\Anaconda3\envs\fastai\lib\site-packages\fastai\data\transforms.py in <genexpr>(.0)
    212             if not hasattr(col,'unique'): col = L(col, use_list=True)
    213             # `o==o` is the generalized definition of non-NaN used by Pandas
--> 214             items = L(o for o in col.unique() if o==o)
    215             if sort: items = items.sorted()
    216         self.items = '#na#' + items if add_na else items

pandas\_libs\missing.pyx in pandas._libs.missing.NAType.__bool__()

TypeError: boolean value of NA is ambiguous