Issue with ItemTransform returning tuples

Hi @sgugger,

I’ve noticed a change has been made in fastcore (commit 1cea43e01c277b96b24a0586353089aca1ccd650)
that breaks the datablock logic I was applying. I’m not sure if this is a bug, or I need to use an alternative approach.

X and y are numpy arrays (float and int respectively).
Until now this:

X = np.random.rand(5, 1, 2)
y = np.random.randint(1, 3, 5)
items = L(X,y).zip()
TfmdLists(items, [ItemGetter(1)])[0]

returned a single int. Now returns this error:

TypeError Traceback (most recent call last)

[<ipython-input-33-c94f8c4a2ea5>](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in <module>() 2 y = np.random.randint(1, 3, 5) 3 items = L(X,y).zip() ----> 4 TfmdLists(items, ItemGetter(1))[0]

3 frames

[/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in __call__(cls, x, *args, **kwargs) 39 return x 40 ---> 41 res = super().__call__(*((x,) + args), **kwargs) 42 res._newchk = 0 43 return res

[/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose) 216 if do_setup: 217 pv(f"Setting up {self.tfms}", verbose) --> 218 self.setup(train_setup=train_setup) 219 220 def _new(self, items, split_idx=None, **kwargs):

[/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in setup(self, train_setup) 238 for f in self.tfms.fs: 239 self.types.append(getattr(f, 'input_types', type(x))) --> 240 x = f(x) 241 self.types.append(type(x)) 242 types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()

[/usr/local/lib/python3.6/dist-packages/fastcore/transform.py](https://zk9fbdq4b2h-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab-20200402-085600-RC00_304410412#) in __call__(self, x, **kwargs) 102 def __call__(self, x, **kwargs): 103 if not _is_tuple(x): return super().__call__(x, **kwargs) --> 104 return retain_type(tuple(super().__call__(list(x), **kwargs)), x) 105 106 def decode(self, x, **kwargs):

TypeError: 'numpy.int64' object is not iterable

This behavior breaks the DataBlock I was using:

getters = [ItemGetter(0), ItemGetter(1)]
dblock = DataBlock(blocks=(TransformBlock, CategoryBlock()),
                   getters=getters,
                   splitter=IndexSplitter(splits[1]),
                   item_tfms=None,
                   batch_tfms=None)
source = itemify(X_in_memory, y_in_memory)
dls = dblock.dataloaders(source, bs=64, val_bs=128, num_workers=0)

This is the error I get:

KeyError                                  Traceback (most recent call last)
<ipython-input-35-77456207c564> in <module>()
      6                    batch_tfms=None)
      7 source = itemify(X_in_memory, y_in_memory)
----> 8 dls = dblock.dataloaders(source, bs=64, val_bs=128, num_workers=0)

13 frames
/usr/local/lib/python3.6/dist-packages/fastai2/data/block.py in dataloaders(self, source, path, verbose, **kwargs)
    105 
    106     def dataloaders(self, source, path='.', verbose=False, **kwargs):
--> 107         dsets = self.datasets(source)
    108         kwargs = {**self.dls_kwargs, **kwargs, 'verbose': verbose}
    109         return dsets.dataloaders(path=path, after_item=self.item_tfms, after_batch=self.batch_tfms, **kwargs)

/usr/local/lib/python3.6/dist-packages/fastai2/data/block.py in datasets(self, source, verbose)
    102         splits = (self.splitter or RandomSplitter())(items)
    103         pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
--> 104         return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
    105 
    106     def dataloaders(self, source, path='.', verbose=False, **kwargs):

/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
    278     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    279         super().__init__(dl_type=dl_type)
--> 280         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    281         self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    282 

/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in <listcomp>(.0)
    278     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    279         super().__init__(dl_type=dl_type)
--> 280         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    281         self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    282 

/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     39             return x
     40 
---> 41         res = super().__call__(*((x,) + args), **kwargs)
     42         res._newchk = 0
     43         return res

/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose)
    216         if do_setup:
    217             pv(f"Setting up {self.tfms}", verbose)
--> 218             self.setup(train_setup=train_setup)
    219 
    220     def _new(self, items, split_idx=None, **kwargs):

/usr/local/lib/python3.6/dist-packages/fastai2/data/core.py in setup(self, train_setup)
    238             for f in self.tfms.fs:
    239                 self.types.append(getattr(f, 'input_types', type(x)))
--> 240                 x = f(x)
    241             self.types.append(type(x))
    242         types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in __call__(self, x, **kwargs)
     70     @property
     71     def name(self): return getattr(self, '_name', _get_name(self))
---> 72     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     73     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     74     def __repr__(self): return f'{self.name}: {self.encodes} {self.decodes}'

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
     80     def _call(self, fn, x, split_idx=None, **kwargs):
     81         if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 82         return self._do_call(getattr(self, fn), x, **kwargs)
     83 
     84     def _do_call(self, f, x, **kwargs):

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
     85         if not _is_tuple(x):
     86             return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
---> 87         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     88         return retain_type(res, x)
     89 

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in <genexpr>(.0)
     85         if not _is_tuple(x):
     86             return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
---> 87         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     88         return retain_type(res, x)
     89 

/usr/local/lib/python3.6/dist-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
     84     def _do_call(self, f, x, **kwargs):
     85         if not _is_tuple(x):
---> 86             return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
     87         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     88         return retain_type(res, x)

/usr/local/lib/python3.6/dist-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
     96         if not f: return args[0]
     97         if self.inst is not None: f = MethodType(f, self.inst)
---> 98         return f(*args, **kwargs)
     99 
    100     def __get__(self, inst, owner):

/usr/local/lib/python3.6/dist-packages/fastai2/data/transforms.py in encodes(self, o)
    217         self.c = len(self.vocab)
    218 
--> 219     def encodes(self, o): return TensorCategory(self.vocab.o2i[o])
    220     def decodes(self, o): return Category      (self.vocab    [o])
    221 

KeyError: 3

Is this a bug, or is it supposed to work this way?

I can’t read your stack traces. In general, please copy them without any adjustment and put them between ``` (like I did in the edit of your post, but it’s still not readable because you made some things to it).

I can’t say anything for your second error since I don’t know what TSTensorBlock is, but your first snippet of code works perfectly for me. Are you sure you have an editable install of fastcore / fastai2 ?

Sorry about that. It’s the first time I report an issue :slight_smile:

I have updated the previous post. I hope you can read the error now.

I’ve replaced TSTensorBlock by TransformBlock as it does the same thing.

In my home pc it works fine. I have installed fastai2 0.0.16 and fast core 0.1.17 .
The issue appears in Colab, where I have fastai2 0.0.17 and fast core 0.1.17.

Eh, my fastcore editable install was broken, so I wasn’t seeing your bug. I know why it’s happening, will follow up with a fix soon.

And this is fixed in fastcore master.

Wow, that was fast, man!! :clap: :clap: :clap:

I’ve just tested it and it works again.

Thanks a lot for your help and your continuous support Sylvain!! Have a great day!

1 Like

Hi guys,

I don’t know if it’s related, but I get a similar error and this is the only post a research leads to that mentions this error:

TypeError: 'numpy.int64' object is not iterable

Here is the full error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-27-a69b020813b3> in <module>
----> 1 dls = TextDataLoaders.from_df(df2)

~/anaconda3/lib/python3.7/site-packages/fastai2/text/data.py in from_df(cls, df, path, valid_pct, seed, text_col, label_col, label_delim, y_block, text_vocab, is_lm, valid_col, tok_tfm, seq_len, **kwargs)
    235                            get_y=None if is_lm else ColReader(label_col, label_delim=label_delim),
    236                            splitter=splitter)
--> 237         return cls.from_dblock(dblock, df, path=path, seq_len=seq_len, **kwargs)
    238 
    239     @classmethod

~/anaconda3/lib/python3.7/site-packages/fastai2/data/core.py in from_dblock(cls, dblock, source, path, bs, val_bs, shuffle_train, device, **kwargs)
    161     @classmethod
    162     def from_dblock(cls, dblock, source, path='.',  bs=64, val_bs=None, shuffle_train=True, device=None, **kwargs):
--> 163         return dblock.dataloaders(source, path=path, bs=bs, val_bs=val_bs, shuffle_train=shuffle_train, device=device, **kwargs)
    164 
    165     _docs=dict(__getitem__="Retrieve `DataLoader` at `i` (`0` is training, `1` is validation)",

~/anaconda3/lib/python3.7/site-packages/fastai2/data/block.py in dataloaders(self, source, path, verbose, **kwargs)
    105 
    106     def dataloaders(self, source, path='.', verbose=False, **kwargs):
--> 107         dsets = self.datasets(source)
    108         kwargs = {**self.dls_kwargs, **kwargs, 'verbose': verbose}
    109         return dsets.dataloaders(path=path, after_item=self.item_tfms, after_batch=self.batch_tfms, **kwargs)

~/anaconda3/lib/python3.7/site-packages/fastai2/data/block.py in datasets(self, source, verbose)
    102         splits = (self.splitter or RandomSplitter())(items)
    103         pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
--> 104         return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
    105 
    106     def dataloaders(self, source, path='.', verbose=False, **kwargs):

~/anaconda3/lib/python3.7/site-packages/fastai2/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
    278     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    279         super().__init__(dl_type=dl_type)
--> 280         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    281         self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    282 

~/anaconda3/lib/python3.7/site-packages/fastai2/data/core.py in <listcomp>(.0)
    278     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    279         super().__init__(dl_type=dl_type)
--> 280         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    281         self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    282 

~/anaconda3/lib/python3.7/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     39             return x
     40 
---> 41         res = super().__call__(*((x,) + args), **kwargs)
     42         res._newchk = 0
     43         return res

~/anaconda3/lib/python3.7/site-packages/fastai2/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose)
    216         if do_setup:
    217             pv(f"Setting up {self.tfms}", verbose)
--> 218             self.setup(train_setup=train_setup)
    219 
    220     def _new(self, items, split_idx=None, **kwargs):

~/anaconda3/lib/python3.7/site-packages/fastai2/data/core.py in setup(self, train_setup)
    238             for f in self.tfms.fs:
    239                 self.types.append(getattr(f, 'input_types', type(x)))
--> 240                 x = f(x)
    241             self.types.append(type(x))
    242         types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()

~/anaconda3/lib/python3.7/site-packages/fastcore/transform.py in __call__(self, x, **kwargs)
     70     @property
     71     def name(self): return getattr(self, '_name', _get_name(self))
---> 72     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     73     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     74     def __repr__(self): return f'{self.name}: {self.encodes} {self.decodes}'

~/anaconda3/lib/python3.7/site-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
     80     def _call(self, fn, x, split_idx=None, **kwargs):
     81         if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 82         return self._do_call(getattr(self, fn), x, **kwargs)
     83 
     84     def _do_call(self, f, x, **kwargs):

~/anaconda3/lib/python3.7/site-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
     84     def _do_call(self, f, x, **kwargs):
     85         if not _is_tuple(x):
---> 86             return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
     87         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     88         return retain_type(res, x)

~/anaconda3/lib/python3.7/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
     96         if not f: return args[0]
     97         if self.inst is not None: f = MethodType(f, self.inst)
---> 98         return f(*args, **kwargs)
     99 
    100     def __get__(self, inst, owner):

~/anaconda3/lib/python3.7/site-packages/fastai2/text/data.py in encodes(self, o)
     40             self.o2i = defaultdict(int, {v:k for k,v in enumerate(self.vocab) if v != 'xxfake'})
     41 
---> 42     def encodes(self, o): return TensorText(tensor([self.o2i  [o_] for o_ in o]))
     43     def decodes(self, o): return L(self.vocab[o_] for o_ in o if self.vocab[o_] != PAD)
     44 

TypeError: 'numpy.int64' object is not iterable

I’m just trying to load a dataset in a csv file, that can be found here :

Here is the code (I just load train.csv and subset it) :slight_smile:

df2 = df2[['comment_text', 'toxic']]
dls = TextDataLoaders.from_df(df2)