I am trying to run the lesson3-imdb notebook (the most recent version available, as of ~2018-12-13) but when I get to the last part about classification I cannot create the data_clas object, even when deleting the ~/.fastai/data/imdb folder and let it be untar’ed again.
It seems as if the imdb data set is not in the format that the cell is expecting, but the the imdb data works fine in the first part of the notebook.
Any ideas? Thank you!
Here is the stacktrace again as text:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in process_one(self, item)
277 def process_one(self,item):
--> 278 try: return self.c2i[item] if item is not None else None
279 except:
KeyError: 'unsup'
During handling of the above exception, another exception occurred:
Exception Traceback (most recent call last)
<ipython-input-20-10dab77f990b> in <module>
3 .split_by_folder(valid='test')
4 #split by train and valid folder (that only keeps 'train' and 'test' so no need to filter)
----> 5 .label_from_folder(classes=['neg', 'pos'])
6 #label them all with their folders
7 .databunch(bs=bs))
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in _inner(*args, **kwargs)
391 self.valid = fv(*args, **kwargs)
392 self.__class__ = LabelLists
--> 393 self.process()
394 return self
395 return _inner
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in process(self)
438 "Process the inner datasets."
439 xp,yp = self.get_processors()
--> 440 for i,ds in enumerate(self.lists): ds.process(xp, yp, filter_missing_y=i==0)
441 return self
442
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in process(self, xp, yp, filter_missing_y)
563 def process(self, xp=None, yp=None, filter_missing_y:bool=False):
564 "Launch the processing on `self.x` and `self.y` with `xp` and `yp`."
--> 565 self.y.process(yp)
566 if filter_missing_y and (getattr(self.x, 'filter_missing_y', None)):
567 filt = array([o is None for o in self.y])
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in process(self, processor)
66 if processor is not None: self.processor = processor
67 self.processor = listify(self.processor)
---> 68 for p in self.processor: p.process(self)
69 return self
70
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in process(self, ds)
284 ds.classes = self.classes
285 ds.c2i = self.c2i
--> 286 super().process(ds)
287
288 def __getstate__(self): return {'classes':self.classes}
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in process(self, ds)
36 def __init__(self, ds:Collection=None): self.ref_ds = ds
37 def process_one(self, item:Any): return item
---> 38 def process(self, ds:Collection): ds.items = array([self.process_one(item) for item in ds.items])
39
40 class ItemList():
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in <listcomp>(.0)
36 def __init__(self, ds:Collection=None): self.ref_ds = ds
37 def process_one(self, item:Any): return item
---> 38 def process(self, ds:Collection): ds.items = array([self.process_one(item) for item in ds.items])
39
40 class ItemList():
~/fastaiv1/notebooks/03.imdb/fastai/data_block.py in process_one(self, item)
278 try: return self.c2i[item] if item is not None else None
279 except:
--> 280 raise Exception("Your validation data contains a label that isn't present in the training set, please fix your data.")
281
282 def process(self, ds):
Exception: Your validation data contains a label that isn't present in the training set, please fix your data.