I was going over the NLP chapter of the book and when I was getting to subword tokenizations I encounter an issue that I don’t know why its being cause
The error is happening after the function definition of subword in the 10_nlp chapter of the book I’m hoping someone can help me figure out whats going on or what I’m doing wrong.
I’ve installed sentencepiece and tried to reinstall but not working. Currently working on paperspace gradient.
Thanks a lot for your help
def subword(sz):
sp = SubwordTokenizer(vocab_sz=sz)
sp.setup(txts)
return ' '.join(first(sp([txt]))[:40])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-17-d9dfabdbc5e1> in <module>
----> 1 subword(116)
<ipython-input-15-8b64e195bd76> in subword(sz)
1 def subword(sz):
2 sp = SubwordTokenizer(vocab_sz=sz)
----> 3 sp.setup(txts)
4 return ' '.join(first(sp([txt]))[:40])
/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/text/core.py in setup(self, items, rules)
356 for t in progress_bar(maps(*rules, items), total=len(items), leave=False):
357 f.write(f'{t}\n')
--> 358 sp_model = self.train(raw_text_path)
359 self.tok = SentencePieceProcessor()
360 self.tok.Load(str(sp_model))
/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/text/core.py in train(self, raw_text_path)
344 f"--character_coverage={self.char_coverage} --model_type={self.model_type}",
345 f"--unk_id={len(spec_tokens)} --pad_id=-1 --bos_id=-1 --eos_id=-1",
--> 346 f"--user_defined_symbols={','.join(spec_tokens)}"]))
347 raw_text_path.unlink()
348 return self.cache_dir/'spm.model'
/opt/conda/envs/fastai/lib/python3.7/site-packages/sentencepiece.py in Train(arg, **kwargs)
402 """Train Sentencepiece model. Accept both kwargs and legacy string arg."""
403 if arg is not None and type(arg) is str:
--> 404 return SentencePieceTrainer._TrainFromString(arg)
405
406 def _encode(value):
/opt/conda/envs/fastai/lib/python3.7/site-packages/sentencepiece.py in _TrainFromString(arg)
380 @staticmethod
381 def _TrainFromString(arg):
--> 382 return _sentencepiece.SentencePieceTrainer__TrainFromString(arg)
383
384 @staticmethod
RuntimeError: Permission denied: ""tmp/spm".model": No such file or directory Error #2