I have a dataset very similar to Rossman. Getting an error at this step
valid_idx = range(70,88)
path = os.path.join("./","data")
trim_df = df[cat_vars+cont_vars+[dep_var]]
data = (TabularList.from_df(trim_df, path=path, cat_names=cat_vars, cont_names=cont_vars, procs=procs)
.split_by_idx(valid_idx)
.label_from_df(cols=dep_var)
.databunch())
Here’s the error. Would love to understand what I am doing wrong
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-66-eb2da1ad29dd> in <module>()
4 data = (TabularList.from_df(trim_df, path=path, cat_names=cat_vars, cont_names=cont_vars, procs=procs)
5 .split_by_idx(valid_idx)
----> 6 .label_from_df(cols=dep_var, label_cls=FloatList, log=True)
7 .databunch())
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in _inner(*args, **kwargs)
427 assert isinstance(fv, Callable)
428 def _inner(*args, **kwargs):
--> 429 self.train = ft(*args, **kwargs)
430 assert isinstance(self.train, LabelList)
431 kwargs['label_cls'] = self.train.y.__class__
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in label_from_df(self, cols, label_cls, **kwargs)
239 def label_from_df(self, cols:IntsOrStrs=1, label_cls:Callable=None, **kwargs):
240 "Label `self.items` from the values in `cols` in `self.xtra`."
--> 241 labels = self.xtra.iloc[:,df_names_to_idx(cols, self.xtra)]
242 assert labels.isna().sum().sum() == 0, f"You have NaN values in column(s) {cols} of your dataframe, please fix it."
243 if is_listy(cols) and len(cols) > 1 and (label_cls is None or label_cls == MultiCategoryList):
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in __getitem__(self, key)
1365 except (KeyError, IndexError):
1366 pass
-> 1367 return self._getitem_tuple(key)
1368 else:
1369 # we by definition only have the 0th axis
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
1751 continue
1752
-> 1753 retval = getattr(retval, self.name)._getitem_axis(key, axis=axis)
1754
1755 # if the dim was reduced, then pass a lower-dim the next time
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1813 if is_bool_indexer(key):
1814 self._has_valid_type(key, axis)
-> 1815 return self._getbool_axis(key, axis=axis)
1816
1817 # a list of integers
/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py in _getbool_axis(self, key, axis)
1387 labels = self.obj._get_axis(axis)
1388 key = check_bool_indexer(labels, key)
-> 1389 inds, = key.nonzero()
1390 try:
1391 return self.obj._take(inds, axis=axis, convert=False)
ValueError: too many values to unpack (expected 1)
Thank you