Beginning of NLP

juliaroz · November 12, 2018, 12:19pm

You can run some terminal commands in the notebook, for example:
!pip install fastai
!pip install torch
Adjust the commands to the libs / versions you need.

howkhang · November 12, 2018, 12:35pm

Try this from the course docs.

cudawarped · November 20, 2018, 6:31pm

Do you mean that to calculate the perplexity for a language model I need to loop through the validation set word by word and compare the predicted word to the next word I am going to use to get a prediction?

raghavanm · November 24, 2018, 6:21am

I am trying to do similar thing.
I am trying to create TextLMDataBunch from a csv file with only one column of text and its failing with this error.
TLMDB = TextLMDataBunch.from_csv(path=’./LM_Train/’, csv_name=‘trainLM.txt’ ,bs=40)

_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
“”"
Traceback (most recent call last):
File “/opt/anaconda3/lib/python3.7/concurrent/futures/process.py”, line 232, in _process_worker
r = call_item.fn(*call_item.args, **call_item.kwargs)
File “/opt/anaconda3/lib/python3.7/concurrent/futures/process.py”, line 191, in _process_chunk
return [fn(*args) for args in chunk]
File “/opt/anaconda3/lib/python3.7/concurrent/futures/process.py”, line 191, in
return [fn(*args) for args in chunk]
File “/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py”, line 103, in _process_all_1
return [self.process_text(t, tok) for t in texts]
File “/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py”, line 103, in
return [self.process_text(t, tok) for t in texts]
File “/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py”, line 94, in process_text
for rule in self.pre_rules: t = rule(t)
File “/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py”, line 58, in fix_html
x = x.replace(’#39;’, “’”).replace(‘amp;’, ‘&’).replace(’#146;’, “’”).replace(
AttributeError: ‘float’ object has no attribute ‘replace’
“”"

The above exception was the direct cause of the following exception:

AttributeError Traceback (most recent call last)
in
----> 1 TLMDB = TextLMDataBunch.from_csv(path=’./LM_Train/’, csv_name=‘trainLM.txt’ ,bs=40)

/opt/anaconda3/lib/python3.7/site-packages/fastai/text/data.py in from_csv(cls, path, csv_name, valid_pct, test, tokenizer, vocab, classes, header, text_cols, label_cols, label_delim, **kwargs)
172 test_df = None if test is None else pd.read_csv(Path(path)/test, header=header)
173 return cls.from_df(path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols,
–> 174 label_cols, label_delim, **kwargs)
175
176 @classmethod

/opt/anaconda3/lib/python3.7/site-packages/fastai/text/data.py in from_df(cls, path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols, label_cols, label_delim, **kwargs)
157 src = ItemLists(path, TextList.from_df(train_df, path, cols=text_cols, processor=processor),
158 TextList.from_df(valid_df, path, cols=text_cols, processor=processor))
–> 159 src = src.label_for_lm() if cls==TextLMDataBunch else src.label_from_df(cols=label_cols, classes=classes, sep=label_delim)
160 if test_df is not None: src.add_test(TextList.from_df(test_df, path, cols=text_cols))
161 return src.databunch(**kwargs)

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in _inner(*args, **kwargs)
316 self.valid = fv(*args, **kwargs)
317 self.class = LabelLists
–> 318 self.process()
319 return self
320 return _inner

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in process(self)
360 def process(self):
361 xp,yp = self.get_processors()
–> 362 for i,ds in enumerate(self.lists): ds.process(xp, yp, filter_missing_y=i==0)
363 return self
364

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in process(self, xp, yp, filter_missing_y)
428 filt = array([o is None for o in self.y])
429 if filt.sum()>0: self.x,self.y = self.x[~filt],self.y[~filt]
–> 430 self.x.process(xp)
431 return self
432

/opt/anaconda3/lib/python3.7/site-packages/fastai/data_block.py in process(self, processor)
58 if processor is not None: self.processor = processor
59 self.processor = listify(self.processor)
—> 60 for p in self.processor: p.process(self)
61 return self
62

/opt/anaconda3/lib/python3.7/site-packages/fastai/text/data.py in process(self, ds)
278 tokens = []
279 for i in progress_bar(range(0,len(ds),self.chunksize), leave=False):
–> 280 tokens += self.tokenizer.process_all(ds.items[i:i+self.chunksize])
281 ds.items = tokens
282

/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py in process_all(self, texts)
107 if self.n_cpus <= 1: return self._process_all_1(texts)
108 with ProcessPoolExecutor(self.n_cpus) as e:
–> 109 return sum(e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), [])
110
111 class Vocab():

/opt/anaconda3/lib/python3.7/concurrent/futures/process.py in _chain_from_iterable_of_lists(iterable)
474 careful not to keep references to yielded objects.
475 “”"
–> 476 for element in iterable:
477 element.reverse()
478 while element:

/opt/anaconda3/lib/python3.7/concurrent/futures/_base.py in result_iterator()
584 # Careful not to keep a reference to the popped future
585 if timeout is None:
–> 586 yield fs.pop().result()
587 else:
588 yield fs.pop().result(end_time - time.time())

/opt/anaconda3/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
423 raise CancelledError()
424 elif self._state == FINISHED:
–> 425 return self.__get_result()
426
427 self._condition.wait(timeout)

/opt/anaconda3/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
–> 384 raise self._exception
385 else:
386 return self._result

AttributeError: ‘float’ object has no attribute ‘replace’

raghavanm · November 24, 2018, 8:24am

TL = (TextList.from_df(text_data_frame, Path(), cols=[‘statement’])
.random_split_by_pct(0.1)
.label_for_lm()
.databunch(bs=44))
@sgugger Could you please take a look? Thanks in advance. This problem seems to be recurring.
Error
Screen Shot 2018-11-24 at 12.23.33 AM.png1596×844 85.3 KB

Error message:

_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
“”"
Traceback (most recent call last):
File “/opt/anaconda3/lib/python3.7/concurrent/futures/process.py”, line 232, in _process_worker
r = call_item.fn(*call_item.args, **call_item.kwargs)
File “/opt/anaconda3/lib/python3.7/concurrent/futures/process.py”, line 191, in _process_chunk
return [fn(*args) for args in chunk]
File “/opt/anaconda3/lib/python3.7/concurrent/futures/process.py”, line 191, in
return [fn(*args) for args in chunk]
File “/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py”, line 103, in _process_all_1
return [self.process_text(t, tok) for t in texts]
File “/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py”, line 103, in
return [self.process_text(t, tok) for t in texts]
File “/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py”, line 94, in process_text
for rule in self.pre_rules: t = rule(t)
File “/opt/anaconda3/lib/python3.7/site-packages/fastai/text/transform.py”, line 58, in fix_html
x = x.replace(’#39;’, “’”).replace(‘amp;’, ‘&’).replace(’#146;’, “’”).replace(
AttributeError: ‘float’ object has no attribute ‘replace’
“”"

The above exception was the direct cause of the following exception:

AttributeError Traceback (most recent call last)
in
2 #TLMDB = TextLMDataBunch.from_df(train_df=train_lm_df, valid_df=valid_lm_df,)
3 TL = (TextList.from_df(text_data_frame, Path(), cols=[‘statement’])
----> 4 .random_split_by_pct(0.1)
5 .label_for_lm()
6 .databunch(bs=44))