Issue when running TextLMDataBunch.from_csv

raymonduui · December 26, 2018, 4:23pm

I was following the Quick Start: Training an IMDb sentiment model with ULMFiT from https://docs.fast.ai/text.html

I can run all the commands before

Language model data

data_lm = TextLMDataBunch.from_csv(path, ‘texts.csv’)

and I have encountered the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
1011 try:
-> 1012 result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
1013 except TypeError:

~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr, **eval_kwargs)
    204     if use_numexpr:
--> 205         return _evaluate(op, op_str, a, b, **eval_kwargs)
    206     return _evaluate_standard(op, op_str, a, b)

~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b, truediv, reversed, **eval_kwargs)
    119     if result is None:
--> 120         result = _evaluate_standard(op, op_str, a, b)
    121 

~\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b, **eval_kwargs)
     64     with np.errstate(all='ignore'):
---> 65         return op(a, b)
     66 

~\Anaconda3\lib\site-packages\pandas\core\ops.py in radd(left, right)
    112 def radd(left, right):
--> 113     return right + left
    114 

TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U6') dtype('<U6') dtype('<U6')

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-5-9201f37b2988> in <module>()
      1 # Language model data
----> 2 data_lm = TextLMDataBunch.from_csv('',filename)

~\Anaconda3\lib\site-packages\fastai\text\data.py in from_csv(cls, path, csv_name, valid_pct, test, tokenizer, vocab, classes, header, text_cols, label_cols, label_delim, **kwargs)
    180         test_df = None if test is None else pd.read_csv(Path(path)/test, header=header)
    181         return cls.from_df(path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols,
--> 182                            label_cols, label_delim, **kwargs)
    183 
    184     @classmethod

~\Anaconda3\lib\site-packages\fastai\text\data.py in from_df(cls, path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols, label_cols, label_delim, **kwargs)
    165         src = ItemLists(path, TextList.from_df(train_df, path, cols=text_cols, processor=processor),
    166                         TextList.from_df(valid_df, path, cols=text_cols, processor=processor))
--> 167         src = src.label_for_lm() if cls==TextLMDataBunch else src.label_from_df(cols=label_cols, classes=classes, sep=label_delim)
    168         if test_df is not None: src.add_test(TextList.from_df(test_df, path, cols=text_cols))
    169         return src.databunch(**kwargs)

~\Anaconda3\lib\site-packages\fastai\data_block.py in _inner(*args, **kwargs)
    391             self.valid = fv(*args, **kwargs)
    392             self.__class__ = LabelLists
--> 393             self.process()
    394             return self
    395         return _inner

~\Anaconda3\lib\site-packages\fastai\data_block.py in process(self)
    438         "Process the inner datasets."
    439         xp,yp = self.get_processors()
--> 440         for i,ds in enumerate(self.lists): ds.process(xp, yp, filter_missing_y=i==0)
    441         return self
    442 

~\Anaconda3\lib\site-packages\fastai\data_block.py in process(self, xp, yp, filter_missing_y)
    567             filt = array([o is None for o in self.y])
    568             if filt.sum()>0: self.x,self.y = self.x[~filt],self.y[~filt]
--> 569         self.x.process(xp)
    570         return self
    571 

~\Anaconda3\lib\site-packages\fastai\data_block.py in process(self, processor)
     66         if processor is not None: self.processor = processor
     67         self.processor = listify(self.processor)
---> 68         for p in self.processor: p.process(self)
     69         return self
     70 

~\Anaconda3\lib\site-packages\fastai\text\data.py in process(self, ds)
    237     def process_one(self, item):  return self.tokenizer._process_all_1([item])[0]
    238     def process(self, ds):
--> 239         ds.items = _join_texts(ds.items, self.mark_fields)
    240         tokens = []
    241         for i in progress_bar(range(0,len(ds),self.chunksize), leave=False):

~\Anaconda3\lib\site-packages\fastai\text\data.py in _join_texts(texts, mark_fields)
    324     if is1d(texts): texts = texts[:,None]
    325     df = pd.DataFrame({i:texts[:,i] for i in range(texts.shape[1])})
--> 326     text_col = f'{BOS} {FLD} {1} ' + df[0] if mark_fields else  f'{BOS} ' + df[0]
    327     for i in range(1,len(df.columns)):
    328         text_col += (f' {FLD} {i+1} ' if mark_fields else ' ') + df[i]

~\Anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(left, right)
   1067             rvalues = rvalues.values
   1068 
-> 1069         result = safe_na_op(lvalues, rvalues)
   1070         return construct_result(left, result,
   1071                                 index=left.index, name=res_name, dtype=None)

~\Anaconda3\lib\site-packages\pandas\core\ops.py in safe_na_op(lvalues, rvalues)
   1031         try:
   1032             with np.errstate(all='ignore'):
-> 1033                 return na_op(lvalues, rvalues)
   1034         except Exception:
   1035             if is_object_dtype(lvalues):

~\Anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
   1021                 result = np.empty(len(x), dtype=x.dtype)
   1022                 mask = notna(x)
-> 1023                 result[mask] = op(x[mask], y)
   1024 
   1025             result, changed = maybe_upcast_putmask(result, ~mask, np.nan)

~\Anaconda3\lib\site-packages\pandas\core\ops.py in radd(left, right)
    111 
    112 def radd(left, right):
--> 113     return right + left
    114 
    115 

TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U6') dtype('<U6') dtype('<U6')