I’m trying to build a regressor for tabular data, when I create a databunch as follows:
data = (TabularList.from_df(train_df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
.random_split_by_pct(valid_pct=0.2)
.label_from_df(cols=dep_var)
.add_test(test, label=0)
.databunch())
I get the following error
ERROR:root:An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line string', (1, 42))
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-15-06fcbb6e7104> in <module>()
2 .split_by_idx(list(range(800,1000)))
3 .label_from_df(cols=dep_var)
----> 4 .add_test(test, label=0)
5 .databunch())
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in add_test(self, items, label)
433 if label is None: label = self.train[0][1].obj
434 labels = [label for _ in range_of(items)]
--> 435 if isinstance(items, ItemList): self.test = self.valid.new(items.items, labels, xtra=items.xtra)
436 else: self.test = self.valid.new(items, labels)
437 return self
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in new(self, x, y, **kwargs)
473 return self.__class__(x, y, tfms=self.tfms, tfm_y=self.tfm_y, **self.tfmargs)
474 else:
--> 475 return self.new(self.x.new(x, **kwargs), self.y.new(y, **kwargs)).process()
476
477 def __getattr__(self,k:str)->Any:
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, xp, yp, filter_missing_y)
519 filt = array([o is None for o in self.y])
520 if filt.sum()>0: self.x,self.y = self.x[~filt],self.y[~filt]
--> 521 self.x.process(xp)
522 return self
523
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in process(self, processor)
65 if processor is not None: self.processor = processor
66 self.processor = listify(self.processor)
---> 67 for p in self.processor: p.process(self)
68 return self
69
/usr/local/lib/python3.6/dist-packages/fastai/tabular/data.py in process(self, ds)
60 return
61 for i,proc in enumerate(self.procs):
---> 62 if isinstance(proc, TabularProc): proc(ds.xtra, test=True)
63 else:
64 #cat and cont names may have been changed by transform (like Fill_NA)
/usr/local/lib/python3.6/dist-packages/fastai/tabular/transform.py in __call__(self, df, test)
30 "Apply the correct function to `df` depending on `test`."
31 func = self.apply_test if test else self.apply_train
---> 32 func(df)
33
34 def apply_train(self, df:DataFrame):
/usr/local/lib/python3.6/dist-packages/fastai/tabular/transform.py in apply_test(self, df)
81 elif pd.isnull(df[name]).sum() != 0:
82 raise Exception(f"""There are nan values in field {name} but there were none in the training set.
---> 83 Please fix those manually.""")
84
85 class Normalize(TabularProc):
Exception: There are nan values in field BsmtFinSF2 but there were none in the training set.
Please fix those manually.
How can fix this? Any hints would be appreciated.