Hi
I’m trying to see if I can do some ML on tennis betting predictions. The info going in is of the follwoing types
<class 'pandas.core.frame.DataFrame'>
Int64Index: 18944 entries, 1 to 26374
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Tournament 18944 non-null object
1 Category1 18944 non-null object
2 TournamentType1 18944 non-null object
3 MenWomen 18944 non-null object
4 ZConf 18944 non-null float64
5 ZWinFav 18944 non-null float64
6 ZWinDog 18944 non-null float64
7 BWinFav 18944 non-null float64
8 BWinDog 18944 non-null float64
9 BWinWig 18944 non-null Float64
10 BWinSpread 18944 non-null float64
11 ZWinFavMinusBWinFav 18944 non-null float64
12 BetResult 18944 non-null Int64
dtypes: Float64(1), Int64(1), float64(7), object(4)
memory usage: 2.6+ MB
I get the data to tablularpandas, and then when I try dls=to.dataloaders(bs=64) it fails. here is some code
to = TabularPandas(df2, procs=[Categorify, FillMissing,Normalize], cat_names=['Tournament', 'Category1','TournamentType1', 'MenWomen'],
cont_names=['ZConf','ZWinFav', 'ZWinDog', 'BWinFav','BWinDog','BWinWig','BWinSpread','ZWinFavMinusBWinFav'], y_names='BetResult',
splits = RandomSplitter(valid_pct=0.2)(range_of(df2)))
to.xs.iloc[:2]
works - I see two rows with what seems to be correct data
but when I try
dls=to.dataloaders(bs=64)
Could not do one pass in your dataloader, there is something wrong in it. Please see the stack trace below:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[132], line 1
----> 1 dls=to.dataloaders(bs=64)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:337, in FilteredBase.dataloaders(self, bs, shuffle_train, shuffle, val_shuffle, n, path, dl_type, dl_kwargs, device, drop_last, val_bs, **kwargs)
335 dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
336 def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337 dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
338 for i in range(1, self.n_subsets)]
339 return self._dbunch_type(*dls, path=path, device=device)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:337, in <listcomp>(.0)
335 dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
336 def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337 dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
338 for i in range(1, self.n_subsets)]
339 return self._dbunch_type(*dls, path=path, device=device)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:97, in TfmdDL.new(self, dataset, cls, **kwargs)
95 if not hasattr(self, '_n_inp') or not hasattr(self, '_types'):
96 try:
---> 97 self._one_pass()
98 res._n_inp,res._types = self._n_inp,self._types
99 except Exception as e:
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:80, in TfmdDL._one_pass(self)
78 b = self.do_batch([self.do_item(None)])
79 if self.device is not None: b = to_device(b, self.device)
---> 80 its = self.after_batch(b)
81 self._n_inp = 1 if not isinstance(its, (list,tuple)) or len(its)==1 else len(its)-1
82 self._types = explode_types(its)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:208, in Pipeline.__call__(self, o)
--> 208 def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:158, in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
156 for f in tfms:
157 if not is_enc: f = f.decode
--> 158 x = f(x, **kwargs)
159 return x
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:121, in ItemTransform.__call__(self, x, **kwargs)
--> 121 def __call__(self, x, **kwargs): return self._call1(x, '__call__', **kwargs)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:124, in ItemTransform._call1(self, x, name, **kwargs)
123 def _call1(self, x, name, **kwargs):
--> 124 if not _is_tuple(x): return getattr(super(), name)(x, **kwargs)
125 y = getattr(super(), name)(list(x), **kwargs)
126 if not self._retain: return y
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:81, in Transform.__call__(self, x, **kwargs)
---> 81 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:91, in Transform._call(self, fn, x, split_idx, **kwargs)
89 def _call(self, fn, x, split_idx=None, **kwargs):
90 if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 91 return self._do_call(getattr(self, fn), x, **kwargs)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:97, in Transform._do_call(self, f, x, **kwargs)
95 if f is None: return x
96 ret = f.returns(x) if hasattr(f,'returns') else None
---> 97 return retain_type(f(x, **kwargs), x, ret)
98 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
99 return retain_type(res, x)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\dispatch.py:120, in TypeDispatch.__call__(self, *args, **kwargs)
118 elif self.inst is not None: f = MethodType(f, self.inst)
119 elif self.owner is not None: f = MethodType(f, self.owner)
--> 120 return f(*args, **kwargs)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\tabular\core.py:327, in ReadTabBatch.encodes(self, to)
325 def encodes(self, to):
326 if not to.with_cont: res = (tensor(to.cats).long(),)
--> 327 else: res = (tensor(to.cats).long(),tensor(to.conts).float())
328 ys = [n for n in to.y_names if n in to.items.columns]
329 if len(ys) == len(to.y_names): res = res + (tensor(to.targ),)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\torch_core.py:152, in tensor(x, *rest, **kwargs)
146 if len(rest): x = (x,)+rest
147 # There was a Pytorch bug in dataloader using num_workers>0. Haven't confirmed if fixed
148 # if isinstance(x, (tuple,list)) and len(x)==0: return tensor(0)
149 res = (x if isinstance(x, Tensor)
150 else torch.tensor(x, **kwargs) if isinstance(x, (tuple,list,numbers.Number))
151 else _array2tensor(x, **kwargs) if isinstance(x, ndarray)
--> 152 else as_tensor(x.values, **kwargs) if isinstance(x, (pd.Series, pd.DataFrame))
153 # else as_tensor(array(x, **kwargs)) if hasattr(x, '__array__') or is_iter(x)
154 else _array2tensor(array(x), **kwargs))
155 if res.dtype is torch.float64: return res.float()
156 return res
TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.
So I have checked the catagorical columns only contain values or null.
Category1 values are contain 974 values of ‘USA Open’, I’ll switch this to ‘USAOpen’, my thought is maybe it is seeing ‘USA Open’ as an array? lol. Fixed that same error.
to.xs.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 18944 entries, 21824 to 5315
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Tournament 18944 non-null int8
1 Category1 18944 non-null int8
2 TournamentType1 18944 non-null int8
3 MenWomen 18944 non-null int8
4 ZConf 18944 non-null float64
5 ZWinFav 18944 non-null float64
6 ZWinDog 18944 non-null float64
7 BWinFav 18944 non-null float64
8 BWinDog 18944 non-null float64
9 BWinWig 18944 non-null Float64
10 BWinSpread 18944 non-null float64
11 ZWinFavMinusBWinFav 18944 non-null float64
dtypes: Float64(1), float64(7), int8(4)
So Im not sure where it is seeing an array. I’m trying to explore this TO object. Like where is the ‘y-column’? Maybe to.ys.info()? Yes, there it is.
I also have tried changing the datatypes going in from object to string as shown below, still same issue
<class 'pandas.core.frame.DataFrame'>
Int64Index: 18944 entries, 1 to 26374
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Tournament 18944 non-null |S80
1 Category1 18944 non-null |S80
2 TournamentType1 18944 non-null |S80
3 MenWomen 18944 non-null |S80
4 ZConf 18944 non-null float64
5 ZWinFav 18944 non-null float64
6 ZWinDog 18944 non-null float64
7 BWinFav 18944 non-null float64
8 BWinDog 18944 non-null float64
9 BWinWig 18944 non-null Float64
10 BWinSpread 18944 non-null float64
11 ZWinFavMinusBWinFav 18944 non-null float64
12 BetResult 18944 non-null object
dtypes: Float64(1), bytes640(4), float64(7), object(1)
memory usage: 7.7+ MB
Actually maybe the error message changes
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[46], line 1
----> 1 dls=to.dataloaders(bs=64)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:337, in FilteredBase.dataloaders(self, bs, shuffle_train, shuffle, val_shuffle, n, path, dl_type, dl_kwargs, device, drop_last, val_bs, **kwargs)
335 dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
336 def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337 dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
338 for i in range(1, self.n_subsets)]
339 return self._dbunch_type(*dls, path=path, device=device)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:337, in <listcomp>(.0)
335 dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
336 def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337 dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
338 for i in range(1, self.n_subsets)]
339 return self._dbunch_type(*dls, path=path, device=device)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:97, in TfmdDL.new(self, dataset, cls, **kwargs)
95 if not hasattr(self, '_n_inp') or not hasattr(self, '_types'):
96 try:
---> 97 self._one_pass()
98 res._n_inp,res._types = self._n_inp,self._types
99 except Exception as e:
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\data\core.py:80, in TfmdDL._one_pass(self)
78 b = self.do_batch([self.do_item(None)])
79 if self.device is not None: b = to_device(b, self.device)
---> 80 its = self.after_batch(b)
81 self._n_inp = 1 if not isinstance(its, (list,tuple)) or len(its)==1 else len(its)-1
82 self._types = explode_types(its)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:208, in Pipeline.__call__(self, o)
--> 208 def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:158, in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
156 for f in tfms:
157 if not is_enc: f = f.decode
--> 158 x = f(x, **kwargs)
159 return x
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:121, in ItemTransform.__call__(self, x, **kwargs)
--> 121 def __call__(self, x, **kwargs): return self._call1(x, '__call__', **kwargs)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:124, in ItemTransform._call1(self, x, name, **kwargs)
123 def _call1(self, x, name, **kwargs):
--> 124 if not _is_tuple(x): return getattr(super(), name)(x, **kwargs)
125 y = getattr(super(), name)(list(x), **kwargs)
126 if not self._retain: return y
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:81, in Transform.__call__(self, x, **kwargs)
---> 81 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:91, in Transform._call(self, fn, x, split_idx, **kwargs)
89 def _call(self, fn, x, split_idx=None, **kwargs):
90 if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 91 return self._do_call(getattr(self, fn), x, **kwargs)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\transform.py:97, in Transform._do_call(self, f, x, **kwargs)
95 if f is None: return x
96 ret = f.returns(x) if hasattr(f,'returns') else None
---> 97 return retain_type(f(x, **kwargs), x, ret)
98 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
99 return retain_type(res, x)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastcore\dispatch.py:120, in TypeDispatch.__call__(self, *args, **kwargs)
118 elif self.inst is not None: f = MethodType(f, self.inst)
119 elif self.owner is not None: f = MethodType(f, self.owner)
--> 120 return f(*args, **kwargs)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\tabular\core.py:327, in ReadTabBatch.encodes(self, to)
325 def encodes(self, to):
326 if not to.with_cont: res = (tensor(to.cats).long(),)
--> 327 else: res = (tensor(to.cats).long(),tensor(to.conts).float())
328 ys = [n for n in to.y_names if n in to.items.columns]
329 if len(ys) == len(to.y_names): res = res + (tensor(to.targ),)
File C:\DataVenv\DataCarpentary\venv\Lib\site-packages\fastai\torch_core.py:152, in tensor(x, *rest, **kwargs)
146 if len(rest): x = (x,)+rest
147 # There was a Pytorch bug in dataloader using num_workers>0. Haven't confirmed if fixed
148 # if isinstance(x, (tuple,list)) and len(x)==0: return tensor(0)
149 res = (x if isinstance(x, Tensor)
150 else torch.tensor(x, **kwargs) if isinstance(x, (tuple,list,numbers.Number))
151 else _array2tensor(x, **kwargs) if isinstance(x, ndarray)
--> 152 else as_tensor(x.values, **kwargs) if isinstance(x, (pd.Series, pd.DataFrame))
153 # else as_tensor(array(x, **kwargs)) if hasattr(x, '__array__') or is_iter(x)
154 else _array2tensor(array(x), **kwargs))
155 if res.dtype is torch.float64: return res.float()
156 return res
TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.```
Any help appreciated. Thanks