Hi @sgugger,
I’ve noticed a change has been made in fastcore (commit 1cea43e01c277b96b24a0586353089aca1ccd650)
that breaks the datablock logic I was applying. I’m not sure if this is a bug, or I need to use an alternative approach.
X and y are numpy arrays (float and int respectively).
Until now this:
X = np.random.rand(5, 1, 2)
y = np.random.randint(1, 3, 5)
items = L(X,y).zip()
TfmdLists(items, [ItemGetter(1)])[0]
returned a single int. Now returns this error:
TypeError Traceback (most recent call last)
[<ipython-input-33-c94f8c4a2ea5>](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in <module>() 2 y = np.random.randint(1, 3, 5) 3 items = L(X,y).zip() ----> 4 TfmdLists(items, ItemGetter(1))[0]
3 frames
[/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in __call__(cls, x, *args, **kwargs) 39 return x 40 ---> 41 res = super().__call__(*((x,) + args), **kwargs) 42 res._newchk = 0 43 return res
[/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose) 216 if do_setup: 217 pv(f"Setting up {self.tfms}", verbose) --> 218 self.setup(train_setup=train_setup) 219 220 def _new(self, items, split_idx=None, **kwargs):
[/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in setup(self, train_setup) 238 for f in self.tfms.fs: 239 self.types.append(getattr(f, 'input_types', type(x))) --> 240 x = f(x) 241 self.types.append(type(x)) 242 types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()
[/usr/local/lib/python3.6/dist-packages/fastcore/transform.py](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in __call__(self, x, **kwargs) 102 def __call__(self, x, **kwargs): 103 if not _is_tuple(x): return super().__call__(x, **kwargs) --> 104 return retain_type(tuple(super().__call__(list(x), **kwargs)), x) 105 106 def decode(self, x, **kwargs):
TypeError: 'numpy.int64' object is not iterable
This behavior breaks the DataBlock I was using:
getters = [ItemGetter(0), ItemGetter(1)]
dblock = DataBlock(blocks=(TransformBlock, CategoryBlock()),
getters=getters,
splitter=IndexSplitter(splits[1]),
item_tfms=None,
batch_tfms=None)
source = itemify(X_in_memory, y_in_memory)
dls = dblock.dataloaders(source, bs=64, val_bs=128, num_workers=0)
This is the error I get:
KeyError Traceback (most recent call last)
<ipython-input-35-77456207c564> in <module>()
6 batch_tfms=None)
7 source = itemify(X_in_memory, y_in_memory)
----> 8 dls = dblock.dataloaders(source, bs=64, val_bs=128, num_workers=0)
13 frames
/usr/local/lib/python3.6/dist-packages/fastai2/data/block.py in dataloaders(self, source, path, verbose, **kwargs)
105
106 def dataloaders(self, source, path='.', verbose=False, **kwargs):
--> 107 dsets = self.datasets(source)
108 kwargs = {**self.dls_kwargs, **kwargs, 'verbose': verbose}
109 return dsets.dataloaders(path=path, after_item=self.item_tfms, after_batch=self.batch_tfms, **kwargs)
/usr/local/lib/python3.6/dist-packages/fastai2/data/block.py in datasets(self, source, verbose)
102 splits = (self.splitter or RandomSplitter())(items)
103 pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
--> 104 return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
105
106 def dataloaders(self, source, path='.', verbose=False, **kwargs):
/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
278 def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
279 super().__init__(dl_type=dl_type)
--> 280 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
281 self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
282
/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in <listcomp>(.0)
278 def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
279 super().__init__(dl_type=dl_type)
--> 280 self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
281 self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
282
/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
39 return x
40
---> 41 res = super().__call__(*((x,) + args), **kwargs)
42 res._newchk = 0
43 return res
/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose)
216 if do_setup:
217 pv(f"Setting up {self.tfms}", verbose)
--> 218 self.setup(train_setup=train_setup)
219
220 def _new(self, items, split_idx=None, **kwargs):
/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in setup(self, train_setup)
238 for f in self.tfms.fs:
239 self.types.append(getattr(f, 'input_types', type(x)))
--> 240 x = f(x)
241 self.types.append(type(x))
242 types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()
/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in __call__(self, x, **kwargs)
70 @property
71 def name(self): return getattr(self, '_name', _get_name(self))
---> 72 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
73 def decode (self, x, **kwargs): return self._call('decodes', x, **kwargs)
74 def __repr__(self): return f'{self.name}: {self.encodes} {self.decodes}'
/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
80 def _call(self, fn, x, split_idx=None, **kwargs):
81 if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 82 return self._do_call(getattr(self, fn), x, **kwargs)
83
84 def _do_call(self, f, x, **kwargs):
/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
85 if not _is_tuple(x):
86 return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
---> 87 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
88 return retain_type(res, x)
89
/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in <genexpr>(.0)
85 if not _is_tuple(x):
86 return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
---> 87 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
88 return retain_type(res, x)
89
/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
84 def _do_call(self, f, x, **kwargs):
85 if not _is_tuple(x):
---> 86 return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
87 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
88 return retain_type(res, x)
/usr/local/lib/python3.6/dist-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
96 if not f: return args[0]
97 if self.inst is not None: f = MethodType(f, self.inst)
---> 98 return f(*args, **kwargs)
99
100 def __get__(self, inst, owner):
/usr/local/lib/python3.6/dist-packages/fastai2/data/transforms.py in encodes(self, o)
217 self.c = len(self.vocab)
218
--> 219 def encodes(self, o): return TensorCategory(self.vocab.o2i[o])
220 def decodes(self, o): return Category (self.vocab [o])
221
KeyError: 3
Is this a bug, or is it supposed to work this way?