I am experiencing a problem with the DataBlock.datasets()
using a list of tuples (numpy.ndarray, str) as a source. Using fastai2 v0.0.10, I was able to create dataloaders using both Datasets
and DataBlock
classes. When I updraded to v.0.0.11 this issue appeared.
I updated all my transforms that were using as_item=True
to ItemTransform
. By doing so, I was able to create a dataset using Datasets
class. Datasets.vocab
works fine and I can plot a timeseries from my dataset using show_at
.
My items are basically a list of tuples. The tuples are numpy.ndarray
and string
. The error occur in the Categorize.setups()
(in fastai2.data.transforms.py
). It says TypeError: unhashable type: 'numpy.ndarray'
. It looks like the numpy array is passed in instead of the strings. I suspect that it has to do with the DataBlock.getters()
but all my attempts were not successful.
Source code and the error stack are here below
class TensorTS(TensorBase):
"Transform a 2D array into a Tensor"
def show(self, ctx=None, title=None, chs=None, leg=True, **kwargs):
....
class ToTensorTS(ItemTransform):
"xy : tuple representing (2D numpy array, Label)"
def encodes(self, xy): x,y=xy; return TensorTS(x)
def TSBlock():
"`TransformBlock` for timeseries : Transform np array to TensorTS type"
return TransformBlock(type_tfms=ToTensorTS())
class LabelTS(ItemTransform):
"x : tuple representing (2D numpy array, Label)"
def encodes(self, xy): x,y=xy; return y
getters = [lambda xy: xy[0], lambda xy: xy[1]]
tsdb = DataBlock(blocks=(TSBlock, CategoryBlock),
get_items=get_ts_items,
getters=getters,
splitter=RandomSplitter(seed=seed),
batch_tfms = batch_tfms)
tsdb.datasets(fnames, verbose=True)
=======================
Collecting items from [Path('C:/Users/fh/.fastai/data/NATOPS/NATOPS_TRAIN.arff'), Path('C:/Users/fh/.fastai/data/NATOPS/NATOPS_TEST.arff')]
Found 360 items
2 datasets of sizes 288,72
Setting up Pipeline: <lambda> -> ToTensorTSBlock
TYPE of xy : <class 'list'>
LENTGH of xy : 2
[array([-0.540579, -0.54101 , -0.540603, -0.540807, -0.540564, -0.540681,
-0.540665, -0.541065, -0.540593, -0.540723, -0.54044 , -0.540232,
-0.540191, -0.540209, -0.53981 , -0.539904, -0.540259, -0.540194,
-0.54002 , -0.54033 , -0.546852, -0.551316, -0.553834, -0.558104,
-0.560738, -0.558948, -0.559712, -0.565038, -0.560855, -0.55511 ,
-0.553586, -0.55047 , -0.555965, -0.554922, -0.55378 , -0.557211,
-0.556262, -0.558439, -0.560581, -0.560134, -0.55657 , -0.564141,
-0.56727 , -0.568937, -0.572611, -0.570396, -0.569147, -0.564811,
-0.56305 , -0.566314, -0.553712], dtype=float32), '2']
TYPE of x : <class 'numpy.ndarray'>
TYPE of y : <class 'str'>
x : [-0.540579 -0.54101 -0.540603 -0.540807 -0.540564 -0.540681 -0.540665
-0.541065 -0.540593 -0.540723 -0.54044 -0.540232 -0.540191 -0.540209
-0.53981 -0.539904 -0.540259 -0.540194 -0.54002 -0.54033 -0.546852
-0.551316 -0.553834 -0.558104 -0.560738 -0.558948 -0.559712 -0.565038
-0.560855 -0.55511 -0.553586 -0.55047 -0.555965 -0.554922 -0.55378
-0.557211 -0.556262 -0.558439 -0.560581 -0.560134 -0.55657 -0.564141
-0.56727 -0.568937 -0.572611 -0.570396 -0.569147 -0.564811 -0.56305
-0.566314 -0.553712]
y : 2
Setting up Pipeline: <lambda> -> Categorize
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
in
----> 1 tsdb.datasets(fnames, verbose=True)
c:\users\fh\dev\fastai2\fastai2\fastai2\data\block.py in datasets(self, source, verbose)
93 splits = (self.splitter or RandomSplitter())(items)
94 pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
---> 95 return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
96
97 def dataloaders(self, source, path='.', verbose=False, **kwargs):
c:\users\fh\dev\fastai2\fastai2\fastai2\data\core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
259 def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
260 super().__init__(dl_type=dl_type)
--> 261 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
262 self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp
263
c:\users\fh\dev\fastai2\fastai2\fastai2\data\core.py in (.0)
259 def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
260 super().__init__(dl_type=dl_type)
--> 261 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
262 self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp
263
c:\users\fh\dev\fastai2\fastcore\fastcore\foundation.py in __call__(cls, x, *args, **kwargs)
39 return x
40
---> 41 res = super().__call__(*((x,) + args), **kwargs)
42 res._newchk = 0
43 return res
c:\users\fh\dev\fastai2\fastai2\fastai2\data\core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose)
200 if do_setup:
201 pv(f"Setting up {self.tfms}", verbose)
--> 202 self.setup(train_setup=train_setup)
203
204 def _new(self, items, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, types=self.types, **kwargs)
c:\users\fh\dev\fastai2\fastai2\fastai2\data\core.py in setup(self, train_setup)
213
214 def setup(self, train_setup=True):
--> 215 self.tfms.setup(self, train_setup)
216 if len(self) != 0:
217 x = super().__getitem__(0) if self.splits is None else super().__getitem__(self.splits[0])[0]
c:\users\fh\dev\fastai2\fastcore\fastcore\transform.py in setup(self, items, train_setup)
177 tfms = self.fs[:]
178 self.fs.clear()
--> 179 for t in tfms: self.add(t,items, train_setup)
180
181 def add(self,t, items=None, train_setup=False):
c:\users\fh\dev\fastai2\fastcore\fastcore\transform.py in add(self, t, items, train_setup)
180
181 def add(self,t, items=None, train_setup=False):
--> 182 t.setup(items, train_setup)
183 self.fs.append(t)
184
c:\users\fh\dev\fastai2\fastcore\fastcore\transform.py in setup(self, items, train_setup)
76 def setup(self, items=None, train_setup=False):
77 train_setup = train_setup if self.train_setup is None else self.train_setup
---> 78 return self.setups(getattr(items, 'train', items) if train_setup else items)
79
80 def _call(self, fn, x, split_idx=None, **kwargs):
c:\users\fh\dev\fastai2\fastcore\fastcore\dispatch.py in __call__(self, *args, **kwargs)
96 if not f: return args[0]
97 if self.inst is not None: f = MethodType(f, self.inst)
---> 98 return f(*args, **kwargs)
99
100 def __get__(self, inst, owner):
c:\users\fh\dev\fastai2\fastai2\fastai2\data\transforms.py in setups(self, dsets)
184
185 def setups(self, dsets):
--> 186 if self.vocab is None and dsets is not None: self.vocab = CategoryMap(dsets, add_na=self.add_na)
187 self.c = len(self.vocab)
188
c:\users\fh\dev\fastai2\fastai2\fastai2\data\transforms.py in __init__(self, col, sort, add_na)
169 if not hasattr(col,'unique'): col = L(col, use_list=True)
170 # `o==o` is the generalized definition of non-NaN used by Pandas
--> 171 items = L(o for o in col.unique() if o==o)
172 if sort: items = items.sorted()
173 self.items = '#na#' + items if add_na else items
c:\users\fh\dev\fastai2\fastcore\fastcore\foundation.py in unique(self)
372 return self._new(i for i,o in enumerate(self) if f(o))
373
--> 374 def unique(self): return L(dict.fromkeys(self).keys())
375 def enumerate(self): return L(enumerate(self))
376 def val2idx(self): return {v:k for k,v in self.enumerate()}
TypeError: unhashable type: 'numpy.ndarray'
======================
Thank you in advance for your help