Random_split_by_pct > IndexError

After updating the library (Google Cloud), my data_block API doesn’t work. The random_split_by_pct() returns an error. Using path_img.ls() the directory seems correct: a bunch of folders with images.

The error: IndexError: index 0 is out of bounds for axis 0 with size 0

Here is the complete Traceback:


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-10-78f0ff0ee252> in <module>
      1 data = (ImageItemList.from_folder(path_img, extensions='.jpg')
      2        .use_partial_data(sample_pct = .75, seed = 17)
----> 3        .random_split_by_pct(valid_pct=0.2, seed = 4)
      4        .label_from_folder()
      5        .transform(tfms, size=299)

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in _inner(*args, **kwargs)
    427         assert isinstance(fv, Callable)
    428         def _inner(*args, **kwargs):
--> 429             self.train = ft(*args, **kwargs)
    430             assert isinstance(self.train, LabelList)
    431             kwargs['label_cls'] = self.train.y.__class__

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in label_from_folder(self, label_cls, **kwargs)
    260     def label_from_folder(self, label_cls:Callable=None, **kwargs)->'LabelList':
    261         "Give a label to each filename depending on its folder."
--> 262         return self.label_from_func(func=lambda o: o.parts[-2], label_cls=label_cls, **kwargs)
    263 
    264     def label_from_re(self, pat:str, full_path:bool=False, label_cls:Callable=None, **kwargs)->'LabelList':

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in label_from_func(self, func, label_cls, **kwargs)
    256     def label_from_func(self, func:Callable, label_cls:Callable=None, **kwargs)->'LabelList':
    257         "Apply `func` to every input to get its label."
--> 258         return self.label_from_list([func(o) for o in self.items], label_cls=label_cls, **kwargs)
    259 
    260     def label_from_folder(self, label_cls:Callable=None, **kwargs)->'LabelList':

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in label_from_list(self, labels, label_cls, **kwargs)
    232         "Label `self.items` with `labels`."
    233         labels = array(labels, dtype=object)
--> 234         label_cls = self.get_label_cls(labels, label_cls=label_cls, **kwargs)
    235         y = label_cls(labels, path=self.path, **kwargs)
    236         res = self._label_list(x=self, y=y)

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in get_label_cls(self, labels, label_cls, label_delim, **kwargs)
    222         if label_cls is not None:               return label_cls
    223         if self.label_cls is not None:          return self.label_cls
--> 224         it = index_row(labels,0)
    225         if label_delim is not None:             return MultiCategoryList
    226         if isinstance(it, (float, np.float32)): return FloatList

/opt/anaconda3/lib/python3.7/site-packages/fastai/core.py in index_row(a, idxs)
    238         if isinstance(res,(pd.DataFrame,pd.Series)): return res.copy()
    239         return res
--> 240     return a[idxs]
    241 
    242 def func_args(func)->bool:

IndexError: index 0 is out of bounds for axis 0 with size 0

I don’t have a clue! I tried no split() as well and changed ImageItemList to ImageList already.
It seems so straight forward, what am I missing?

Please any help is welcome! :confused:

Hi,
First, try to confirm whether you have read the images?
you can do them step by step:
data = ImageItemList.from_folder(path_img,extensions = ‘.jpg’)
data = data.split_by_rand_pct(valid_pct=0.2,seed=4) #instead random_split_by_pct

Yep,thats it!
I checked

data = ImageItemList.from_folder(path)
I was not reading any images. Then I remounted my drive and It worked.