PATH='data/aclImdb/'
TRN_PATH = 'train/all/'
VAL_PATH = 'test/all/'
TRN = f'{PATH}{TRN_PATH}'
VAL = f'{PATH}{VAL_PATH}'
README imdb.vocab imdbEr.txt test/ train/
TEXT = data.Field(lower=True, tokenize=spacy_tok)
bs=64; bptt=70
FILES = dict(train=TRN_PATH, validation=VAL_PATH, test=VAL_PATH)
md = LanguageModelData(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=10)
UnicodeDecodeError: ‘ascii’ codec can’t decode byte 0xc3 in position 916: ordinal not in range(128)
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
in ()
----> 1 md = LanguageModelData(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=10)
~/app/DeepLogs/fastai/courses/dl1/fastai/nlp.py in __init__(self, path, field, train, validation, test, bs, bptt, **kwargs)
191 self.path,self.bs = path,bs
192 self.trn_ds,self.val_ds,self.test_ds = ConcatTextDataset.splits(
--> 193 path, text_field=field, train=train, validation=validation, test=test)
194 field.build_vocab(self.trn_ds, **kwargs)
195 self.pad_idx = field.vocab.stoi[field.pad_token]
/usr/local/lib/python3.6/dist-packages/torchtext/data/dataset.py in splits(cls, path, root, train, validation, test, **kwargs)
67 path = cls.download(root)
68 train_data = None if train is None else cls(
---> 69 os.path.join(path, train), **kwargs)
70 val_data = None if validation is None else cls(
71 os.path.join(path, validation), **kwargs)
~/app/DeepLogs/fastai/courses/dl1/fastai/nlp.py in __init__(self, path, text_field, newline_eos, **kwargs)
180 else: paths=[path]
181 for p in paths:
--> 182 for line in open(p): text += text_field.preprocess(line)
183 if newline_eos: text.append('<eos>')
184
/usr/lib/python3.6/encodings/ascii.py in decode(self, input, final)
24 class IncrementalDecoder(codecs.IncrementalDecoder):
25 def decode(self, input, final=False):
---> 26 return codecs.ascii_decode(input, self.errors)[0]
27
28 class StreamWriter(Codec,codecs.StreamWriter):
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 916: ordinal not in range(128)