I was following the Quick Start: Training an IMDb sentiment model with ULMFiT from https://docs.fast.ai/text.html
I can run all the commands before
Language model data
data_lm = TextLMDataBunch.from_csv(path, ‘texts.csv’)
and I have encountered the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
1011 try:
-> 1012 result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
1013 except TypeError:
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr, **eval_kwargs)
204 if use_numexpr:
--> 205 return _evaluate(op, op_str, a, b, **eval_kwargs)
206 return _evaluate_standard(op, op_str, a, b)
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b, truediv, reversed, **eval_kwargs)
119 if result is None:
--> 120 result = _evaluate_standard(op, op_str, a, b)
121
~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b, **eval_kwargs)
64 with np.errstate(all='ignore'):
---> 65 return op(a, b)
66
~\Anaconda3\lib\site-packages\pandas\core\ops.py in radd(left, right)
112 def radd(left, right):
--> 113 return right + left
114
TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U6') dtype('<U6') dtype('<U6')
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-5-9201f37b2988> in <module>()
1 # Language model data
----> 2 data_lm = TextLMDataBunch.from_csv('',filename)
~\Anaconda3\lib\site-packages\fastai\text\data.py in from_csv(cls, path, csv_name, valid_pct, test, tokenizer, vocab, classes, header, text_cols, label_cols, label_delim, **kwargs)
180 test_df = None if test is None else pd.read_csv(Path(path)/test, header=header)
181 return cls.from_df(path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols,
--> 182 label_cols, label_delim, **kwargs)
183
184 @classmethod
~\Anaconda3\lib\site-packages\fastai\text\data.py in from_df(cls, path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols, label_cols, label_delim, **kwargs)
165 src = ItemLists(path, TextList.from_df(train_df, path, cols=text_cols, processor=processor),
166 TextList.from_df(valid_df, path, cols=text_cols, processor=processor))
--> 167 src = src.label_for_lm() if cls==TextLMDataBunch else src.label_from_df(cols=label_cols, classes=classes, sep=label_delim)
168 if test_df is not None: src.add_test(TextList.from_df(test_df, path, cols=text_cols))
169 return src.databunch(**kwargs)
~\Anaconda3\lib\site-packages\fastai\data_block.py in _inner(*args, **kwargs)
391 self.valid = fv(*args, **kwargs)
392 self.__class__ = LabelLists
--> 393 self.process()
394 return self
395 return _inner
~\Anaconda3\lib\site-packages\fastai\data_block.py in process(self)
438 "Process the inner datasets."
439 xp,yp = self.get_processors()
--> 440 for i,ds in enumerate(self.lists): ds.process(xp, yp, filter_missing_y=i==0)
441 return self
442
~\Anaconda3\lib\site-packages\fastai\data_block.py in process(self, xp, yp, filter_missing_y)
567 filt = array([o is None for o in self.y])
568 if filt.sum()>0: self.x,self.y = self.x[~filt],self.y[~filt]
--> 569 self.x.process(xp)
570 return self
571
~\Anaconda3\lib\site-packages\fastai\data_block.py in process(self, processor)
66 if processor is not None: self.processor = processor
67 self.processor = listify(self.processor)
---> 68 for p in self.processor: p.process(self)
69 return self
70
~\Anaconda3\lib\site-packages\fastai\text\data.py in process(self, ds)
237 def process_one(self, item): return self.tokenizer._process_all_1([item])[0]
238 def process(self, ds):
--> 239 ds.items = _join_texts(ds.items, self.mark_fields)
240 tokens = []
241 for i in progress_bar(range(0,len(ds),self.chunksize), leave=False):
~\Anaconda3\lib\site-packages\fastai\text\data.py in _join_texts(texts, mark_fields)
324 if is1d(texts): texts = texts[:,None]
325 df = pd.DataFrame({i:texts[:,i] for i in range(texts.shape[1])})
--> 326 text_col = f'{BOS} {FLD} {1} ' + df[0] if mark_fields else f'{BOS} ' + df[0]
327 for i in range(1,len(df.columns)):
328 text_col += (f' {FLD} {i+1} ' if mark_fields else ' ') + df[i]
~\Anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(left, right)
1067 rvalues = rvalues.values
1068
-> 1069 result = safe_na_op(lvalues, rvalues)
1070 return construct_result(left, result,
1071 index=left.index, name=res_name, dtype=None)
~\Anaconda3\lib\site-packages\pandas\core\ops.py in safe_na_op(lvalues, rvalues)
1031 try:
1032 with np.errstate(all='ignore'):
-> 1033 return na_op(lvalues, rvalues)
1034 except Exception:
1035 if is_object_dtype(lvalues):
~\Anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
1021 result = np.empty(len(x), dtype=x.dtype)
1022 mask = notna(x)
-> 1023 result[mask] = op(x[mask], y)
1024
1025 result, changed = maybe_upcast_putmask(result, ~mask, np.nan)
~\Anaconda3\lib\site-packages\pandas\core\ops.py in radd(left, right)
111
112 def radd(left, right):
--> 113 return right + left
114
115
TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U6') dtype('<U6') dtype('<U6')