Tabular data fails on to.dataloaders(bs=64)

Hi
I’m trying to see if I can do some ML on tennis betting predictions. The info going in is of the follwoing types

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18944 entries, 1 to 26374
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Tournament           18944 non-null  object 
 1   Category1            18944 non-null  object 
 2   TournamentType1      18944 non-null  object 
 3   MenWomen             18944 non-null  object 
 4   ZConf                18944 non-null  float64
 5   ZWinFav              18944 non-null  float64
 6   ZWinDog              18944 non-null  float64
 7   BWinFav              18944 non-null  float64
 8   BWinDog              18944 non-null  float64
 9   BWinWig              18944 non-null  Float64
 10  BWinSpread           18944 non-null  float64
 11  ZWinFavMinusBWinFav  18944 non-null  float64
 12  BetResult            18944 non-null  Int64  
dtypes: Float64(1), Int64(1), float64(7), object(4)
memory usage: 2.6+ MB

I get the data to tablularpandas, and then when I try dls=to.dataloaders(bs=64) it fails. here is some code

to = TabularPandas(df2, procs=[Categorify, FillMissing,Normalize], cat_names=['Tournament', 'Category1','TournamentType1', 'MenWomen'],
                   cont_names=['ZConf','ZWinFav', 'ZWinDog', 'BWinFav','BWinDog','BWinWig','BWinSpread','ZWinFavMinusBWinFav'], y_names='BetResult', 
                   splits = RandomSplitter(valid_pct=0.2)(range_of(df2)))
to.xs.iloc[:2]

works - I see two rows with what seems to be correct data

but when I try

dls=to.dataloaders(bs=64)

Could not do one pass in your dataloader, there is something wrong in it. Please see the stack trace below:
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[132], line 1
----> 1 dls=to.dataloaders(bs=64)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:337, in FilteredBase.dataloaders(self, bs, shuffle_train, shuffle, val_shuffle, n, path, dl_type, dl_kwargs, device, drop_last, val_bs, **kwargs)
    335 dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
    336 def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337 dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
    338               for i in range(1, self.n_subsets)]
    339 return self._dbunch_type(*dls, path=path, device=device)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:337, in <listcomp>(.0)
    335 dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
    336 def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337 dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
    338               for i in range(1, self.n_subsets)]
    339 return self._dbunch_type(*dls, path=path, device=device)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:97, in TfmdDL.new(self, dataset, cls, **kwargs)
     95 if not hasattr(self, '_n_inp') or not hasattr(self, '_types'):
     96     try:
---> 97         self._one_pass()
     98         res._n_inp,res._types = self._n_inp,self._types
     99     except Exception as e: 

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:80, in TfmdDL._one_pass(self)
     78 b = self.do_batch([self.do_item(None)])
     79 if self.device is not None: b = to_device(b, self.device)
---> 80 its = self.after_batch(b)
     81 self._n_inp = 1 if not isinstance(its, (list,tuple)) or len(its)==1 else len(its)-1
     82 self._types = explode_types(its)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:208, in Pipeline.__call__(self, o)
--> 208 def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:158, in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    156 for f in tfms:
    157     if not is_enc: f = f.decode
--> 158     x = f(x, **kwargs)
    159 return x

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:121, in ItemTransform.__call__(self, x, **kwargs)
--> 121 def __call__(self, x, **kwargs): return self._call1(x, '__call__', **kwargs)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:124, in ItemTransform._call1(self, x, name, **kwargs)
    123 def _call1(self, x, name, **kwargs):
--> 124     if not _is_tuple(x): return getattr(super(), name)(x, **kwargs)
    125     y = getattr(super(), name)(list(x), **kwargs)
    126     if not self._retain: return y

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:81, in Transform.__call__(self, x, **kwargs)
---> 81 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:91, in Transform._call(self, fn, x, split_idx, **kwargs)
     89 def _call(self, fn, x, split_idx=None, **kwargs):
     90     if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 91     return self._do_call(getattr(self, fn), x, **kwargs)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:97, in Transform._do_call(self, f, x, **kwargs)
     95     if f is None: return x
     96     ret = f.returns(x) if hasattr(f,'returns') else None
---> 97     return retain_type(f(x, **kwargs), x, ret)
     98 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     99 return retain_type(res, x)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\dispatch.py:120, in TypeDispatch.__call__(self, *args, **kwargs)
    118 elif self.inst is not None: f = MethodType(f, self.inst)
    119 elif self.owner is not None: f = MethodType(f, self.owner)
--> 120 return f(*args, **kwargs)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\tabular\core.py:327, in ReadTabBatch.encodes(self, to)
    325 def encodes(self, to):
    326     if not to.with_cont: res = (tensor(to.cats).long(),)
--> 327     else: res = (tensor(to.cats).long(),tensor(to.conts).float())
    328     ys = [n for n in to.y_names if n in to.items.columns]
    329     if len(ys) == len(to.y_names): res = res + (tensor(to.targ),)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\torch_core.py:152, in tensor(x, *rest, **kwargs)
    146     if len(rest): x = (x,)+rest
    147     # There was a Pytorch bug in dataloader using num_workers>0. Haven't confirmed if fixed
    148     # if isinstance(x, (tuple,list)) and len(x)==0: return tensor(0)
    149     res = (x if isinstance(x, Tensor)
    150            else torch.tensor(x, **kwargs) if isinstance(x, (tuple,list,numbers.Number))
    151            else _array2tensor(x, **kwargs) if isinstance(x, ndarray)
--> 152            else as_tensor(x.values, **kwargs) if isinstance(x, (pd.Series, pd.DataFrame))
    153 #            else as_tensor(array(x, **kwargs)) if hasattr(x, '__array__') or is_iter(x)
    154            else _array2tensor(array(x), **kwargs))
    155     if res.dtype is torch.float64: return res.float()
    156     return res

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

So I have checked the catagorical columns only contain values or null.

Category1 values are contain 974 values of ‘USA Open’, I’ll switch this to ‘USAOpen’, my thought is maybe it is seeing ‘USA Open’ as an array? lol. Fixed that same error.

to.xs.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 18944 entries, 21824 to 5315
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Tournament           18944 non-null  int8   
 1   Category1            18944 non-null  int8   
 2   TournamentType1      18944 non-null  int8   
 3   MenWomen             18944 non-null  int8   
 4   ZConf                18944 non-null  float64
 5   ZWinFav              18944 non-null  float64
 6   ZWinDog              18944 non-null  float64
 7   BWinFav              18944 non-null  float64
 8   BWinDog              18944 non-null  float64
 9   BWinWig              18944 non-null  Float64
 10  BWinSpread           18944 non-null  float64
 11  ZWinFavMinusBWinFav  18944 non-null  float64
dtypes: Float64(1), float64(7), int8(4)

So Im not sure where it is seeing an array. I’m trying to explore this TO object. Like where is the ‘y-column’? Maybe to.ys.info()? Yes, there it is.

I also have tried changing the datatypes going in from object to string as shown below, still same issue

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18944 entries, 1 to 26374
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Tournament           18944 non-null  |S80   
 1   Category1            18944 non-null  |S80   
 2   TournamentType1      18944 non-null  |S80   
 3   MenWomen             18944 non-null  |S80   
 4   ZConf                18944 non-null  float64
 5   ZWinFav              18944 non-null  float64
 6   ZWinDog              18944 non-null  float64
 7   BWinFav              18944 non-null  float64
 8   BWinDog              18944 non-null  float64
 9   BWinWig              18944 non-null  Float64
 10  BWinSpread           18944 non-null  float64
 11  ZWinFavMinusBWinFav  18944 non-null  float64
 12  BetResult            18944 non-null  object 
dtypes: Float64(1), bytes640(4), float64(7), object(1)
memory usage: 7.7+ MB

Actually maybe the error message changes


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[46], line 1
----> 1 dls=to.dataloaders(bs=64)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:337, in FilteredBase.dataloaders(self, bs, shuffle_train, shuffle, val_shuffle, n, path, dl_type, dl_kwargs, device, drop_last, val_bs, **kwargs)
    335 dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
    336 def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337 dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
    338               for i in range(1, self.n_subsets)]
    339 return self._dbunch_type(*dls, path=path, device=device)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:337, in <listcomp>(.0)
    335 dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
    336 def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337 dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
    338               for i in range(1, self.n_subsets)]
    339 return self._dbunch_type(*dls, path=path, device=device)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:97, in TfmdDL.new(self, dataset, cls, **kwargs)
     95 if not hasattr(self, '_n_inp') or not hasattr(self, '_types'):
     96     try:
---> 97         self._one_pass()
     98         res._n_inp,res._types = self._n_inp,self._types
     99     except Exception as e: 

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:80, in TfmdDL._one_pass(self)
     78 b = self.do_batch([self.do_item(None)])
     79 if self.device is not None: b = to_device(b, self.device)
---> 80 its = self.after_batch(b)
     81 self._n_inp = 1 if not isinstance(its, (list,tuple)) or len(its)==1 else len(its)-1
     82 self._types = explode_types(its)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:208, in Pipeline.__call__(self, o)
--> 208 def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:158, in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    156 for f in tfms:
    157     if not is_enc: f = f.decode
--> 158     x = f(x, **kwargs)
    159 return x

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:121, in ItemTransform.__call__(self, x, **kwargs)
--> 121 def __call__(self, x, **kwargs): return self._call1(x, '__call__', **kwargs)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:124, in ItemTransform._call1(self, x, name, **kwargs)
    123 def _call1(self, x, name, **kwargs):
--> 124     if not _is_tuple(x): return getattr(super(), name)(x, **kwargs)
    125     y = getattr(super(), name)(list(x), **kwargs)
    126     if not self._retain: return y

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:81, in Transform.__call__(self, x, **kwargs)
---> 81 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:91, in Transform._call(self, fn, x, split_idx, **kwargs)
     89 def _call(self, fn, x, split_idx=None, **kwargs):
     90     if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 91     return self._do_call(getattr(self, fn), x, **kwargs)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:97, in Transform._do_call(self, f, x, **kwargs)
     95     if f is None: return x
     96     ret = f.returns(x) if hasattr(f,'returns') else None
---> 97     return retain_type(f(x, **kwargs), x, ret)
     98 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     99 return retain_type(res, x)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\dispatch.py:120, in TypeDispatch.__call__(self, *args, **kwargs)
    118 elif self.inst is not None: f = MethodType(f, self.inst)
    119 elif self.owner is not None: f = MethodType(f, self.owner)
--> 120 return f(*args, **kwargs)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\tabular\core.py:327, in ReadTabBatch.encodes(self, to)
    325 def encodes(self, to):
    326     if not to.with_cont: res = (tensor(to.cats).long(),)
--> 327     else: res = (tensor(to.cats).long(),tensor(to.conts).float())
    328     ys = [n for n in to.y_names if n in to.items.columns]
    329     if len(ys) == len(to.y_names): res = res + (tensor(to.targ),)

File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\torch_core.py:152, in tensor(x, *rest, **kwargs)
    146     if len(rest): x = (x,)+rest
    147     # There was a Pytorch bug in dataloader using num_workers>0. Haven't confirmed if fixed
    148     # if isinstance(x, (tuple,list)) and len(x)==0: return tensor(0)
    149     res = (x if isinstance(x, Tensor)
    150            else torch.tensor(x, **kwargs) if isinstance(x, (tuple,list,numbers.Number))
    151            else _array2tensor(x, **kwargs) if isinstance(x, ndarray)
--> 152            else as_tensor(x.values, **kwargs) if isinstance(x, (pd.Series, pd.DataFrame))
    153 #            else as_tensor(array(x, **kwargs)) if hasattr(x, '__array__') or is_iter(x)
    154            else _array2tensor(array(x), **kwargs))
    155     if res.dtype is torch.float64: return res.float()
    156     return res

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.```

Any help appreciated. Thanks