BK201
February 16, 2019, 9:18am
1
Why is labels_from_df not working in this case?
I am trying to load labels from this dataframe after making this ImageItemList from a csv file. Why is this happening?
df.head()
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
Image
Id
0
0000e88ab.jpg
w_f48451c
1
0001f9222.jpg
w_c3d896a
2
00029d126.jpg
w_20df2c5
3
00050a15a.jpg
new_whale
4
0005c1ef8.jpg
new_whale
dataset = ImageItemList.from_csv(path/'train', csv_name='../train.csv')
dataset.xtra.head()
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
Image
Id
0
0000e88ab.jpg
w_f48451c
1
0001f9222.jpg
w_c3d896a
2
00029d126.jpg
w_20df2c5
3
00050a15a.jpg
new_whale
4
0005c1ef8.jpg
new_whale
The fastai verison is 1.0.42
from fastai.version import __version__
print(__version__)
1.0.42
dataset.label_from_df(cols='Id')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/anaconda3/envs/Torch10cuda/lib/python3.7/site-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~/anaconda3/envs/Torch10cuda/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj)
400 if cls is not object \
401 and callable(cls.__dict__.get('__repr__')):
--> 402 return _repr_pprint(obj, self, cycle)
403
404 return _default_pprint(obj, self, cycle)
~/anaconda3/envs/Torch10cuda/lib/python3.7/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
695 """A pprint that just redirects to the normal repr function."""
696 # Find newlines and replace them with p.break_()
--> 697 output = repr(obj)
698 for idx,output_line in enumerate(output.splitlines()):
699 if idx:
~/anaconda3/envs/Torch10cuda/lib/python3.7/site-packages/fastai/data_block.py in __repr__(self)
534 def __repr__(self)->str:
535 x = f'{self.x}' # force this to happen first
--> 536 return f'{self.__class__.__name__}\ny: {self.y}\nx: {x}'
537 def predict(self, res):
538 "Delegates predict call on `res` to `self.y`."
~/anaconda3/envs/Torch10cuda/lib/python3.7/site-packages/fastai/data_block.py in __repr__(self)
59 return self.items[i]
60 def __repr__(self)->str:
---> 61 items = [self[i] for i in range(min(5,len(self.items)))]
62 return f'{self.__class__.__name__} ({len(self.items)} items)\n{items}...\nPath: {self.path}'
63
~/anaconda3/envs/Torch10cuda/lib/python3.7/site-packages/fastai/data_block.py in <listcomp>(.0)
59 return self.items[i]
60 def __repr__(self)->str:
---> 61 items = [self[i] for i in range(min(5,len(self.items)))]
62 return f'{self.__class__.__name__} ({len(self.items)} items)\n{items}...\nPath: {self.path}'
63
~/anaconda3/envs/Torch10cuda/lib/python3.7/site-packages/fastai/data_block.py in __getitem__(self, idxs)
92 def __getitem__(self,idxs:int)->Any:
93 idxs = try_int(idxs)
---> 94 if isinstance(idxs, numbers.Integral): return self.get(idxs)
95 else: return self.new(self.items[idxs], xtra=index_row(self.xtra, idxs))
96
~/anaconda3/envs/Torch10cuda/lib/python3.7/site-packages/fastai/data_block.py in get(self, i)
320 o = self.items[i]
321 if o is None: return None
--> 322 return Category(o, self.classes[o])
323
324 def analyze_pred(self, pred, thresh:float=0.5): return pred.argmax()
TypeError: 'NoneType' object is not subscriptable
BK201
February 16, 2019, 3:54pm
2
OK! I got it, it seems very dumb in hindsight, I need to first split the ImageItemList into validation and training set. The data block api only works in a particular order.
sgugger
February 16, 2019, 7:19pm
3
Yes, it used to throw a clear error message for that. Let me try to see why it doesn’t in this case.