Hi,
I am new to this forum and DL. I am trying to run a classification problem using the code from Lesson 4. I get the below error. I know its something with data on files, but do not know how to handle this upfront -
UnicodeDecodeError Traceback (most recent call last)
in ()
4 .filter_by_folder(include=[‘train’, ‘test’])
5 #We may have other temp folders that contain text files so we only keep what’s in train and test
----> 6 .random_split_by_pct(0.1)
7 #We randomly split and keep 10% (10,000 reviews) for validation
8 .label_for_lm()
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in _inner(*args, **kwargs)
423 self.valid = fv(*args, **kwargs)
424 self.class = LabelLists
–> 425 self.process()
426 return self
427 return _inner
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self)
470 “Process the inner datasets.”
471 xp,yp = self.get_processors()
–> 472 for ds,n in zip(self.lists, [‘train’,‘valid’,‘test’]): ds.process(xp, yp, name=n)
473 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
474 for ds in self.lists:
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, xp, yp, name)
625 p.warns = []
626 self.x,self.y = self.x[~filt],self.y[~filt]
–> 627 self.x.process(xp)
628 return self
629
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, processor)
66 if processor is not None: self.processor = processor
67 self.processor = listify(self.processor)
—> 68 for p in self.processor: p.process(self)
69 return self
70
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, ds)
36 def init(self, ds:Collection=None): self.ref_ds = ds
37 def process_one(self, item:Any): return item
—> 38 def process(self, ds:Collection): ds.items = array([self.process_one(item) for item in ds.items])
39
40 class ItemList():
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in (.0)
36 def init(self, ds:Collection=None): self.ref_ds = ds
37 def process_one(self, item:Any): return item
—> 38 def process(self, ds:Collection): ds.items = array([self.process_one(item) for item in ds.items])
39
40 class ItemList():
/usr/local/lib/python3.6/dist-packages/fastai/text/data.py in process_one(self, item)
301 "PreProcessor
that opens the filenames and read the texts."
302 def process_one(self,item):
–> 303 return open_text(item) if isinstance(item, Path) else item
304
305 class TextList(ItemList):
/usr/local/lib/python3.6/dist-packages/fastai/text/data.py in open_text(fn, enc)
266 def open_text(fn:PathOrStr, enc=‘utf-8’):
267 “Read the text in fn
.”
–> 268 with open(fn,‘r’, encoding = enc) as f: return ‘’.join(f.readlines())
269
270 class Text(ItemBase):
/usr/lib/python3.6/codecs.py in decode(self, input, final)
319 # decode input (taking the buffer into account)
320 data = self.buffer + input
–> 321 (result, consumed) = self._buffer_decode(data, self.errors, final)
322 # keep undecoded input until the next call
323 self.buffer = data[consumed:]
UnicodeDecodeError: ‘utf-8’ codec can’t decode byte 0xae in position 185: invalid start byte