(I could not find the same issue on the forum so I started a new post. Please let me know if this was already discussed somewhere.)
I was trying to use ImageDataBunch.from_df
on a sampled dataframe and encountered an error. It looks like it’s because np.concatenate
only takes series starting from zero.
path = untar_data(URLs.PLANET)
df = pd.read_csv(path/'labels.csv', index_col=0)
df = df.sample(100)
data = ImageDataBunch.from_df(path, folder='train', sep=' ', df=df, suffix='.jpg', ds_tfms=get_transforms(), tfms=imagenet_norm, size=224)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-30-028e7e0fa75b> in <module>
1 df = pd.read_csv(path/'labels.csv', index_col=0)
2 df = df.sample(100)
----> 3 data = ImageDataBunch.from_df(path, folder='train', sep=' ', df=df, suffix='.jpg', ds_tfms=get_transforms(), tfms=imagenet_norm, size=224)
~/git/fastai/fastai/vision/data.py in from_df(cls, path, df, folder, sep, valid_pct, fn_col, label_col, test, suffix, **kwargs)
284 fnames, labels = _df_to_fns_labels(df, suffix=suffix, label_delim=sep, fn_col=fn_col, label_col=label_col)
285 if sep:
--> 286 classes = uniqueify(np.concatenate(labels))
287 datasets = ImageMultiDataset.from_folder(path, folder, fnames, labels, valid_pct=valid_pct, classes=classes)
288 if test: datasets.append(ImageMultiDataset.from_single_folder(path/test, classes=datasets[0].classes))
~/anaconda3/envs/fastaidev/lib/python3.7/site-packages/pandas/core/series.py in __getitem__(self, key)
765 key = com._apply_if_callable(key, self)
766 try:
--> 767 result = self.index.get_value(self, key)
768
769 if not is_scalar(result):
~/anaconda3/envs/fastaidev/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
3116 try:
3117 return self._engine.get_value(s, k,
-> 3118 tz=getattr(series.dtype, 'tz', None))
3119 except KeyError as e1:
3120 if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 0
I have made a notebook showing the error and suggesting a possible change: https://gist.github.com/yang-zhang/0d74788f82c210a75fd0f549be948b98