I am trying to build a Language model. The CSV file is only having one column which is containing comments from which the Language model is to be made. But I’m getting the following error with this code.
data_lm = TextDataBunch.from_csv(path, "data_.csv")
IndexError: positional indexers are out-of-bounds
This is a complete stack trace
IndexError Traceback (most recent call last)
in
----> 1 data_lm = TextLMDataBunch.from_csv(path, “data_.csv”)
~/miniconda3/envs/dask-scipy/lib/python3.6/site-packages/fastai/text/data.py in from_csv(cls, path, csv_name, valid_pct, test, tokenizer, vocab, classes, delimiter, header, text_cols, label_cols, label_delim, chunksize, max_vocab, min_freq, mark_fields, include_bos, include_eos, **kwargs)
221 label_cols=label_cols, label_delim=label_delim, chunksize=chunksize, max_vocab=max_vocab,
222 min_freq=min_freq, mark_fields=mark_fields,
--> 223 include_bos=include_bos, include_eos=include_eos, **kwargs)
224
225 @classmethod
~/miniconda3/envs/dask-scipy/lib/python3.6/site-packages/fastai/text/data.py in from_df(cls, path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols, label_cols, label_delim, chunksize, max_vocab, min_freq, mark_fields, include_bos, include_eos, **kwargs)
197 include_bos=include_bos, include_eos=include_eos)
198 if classes is None and is_listy(label_cols) and len(label_cols) > 1: classes = label_cols
--> 199 src = ItemLists(path, TextList.from_df(train_df, path, cols=text_cols, processor=processor),
200 TextList.from_df(valid_df, path, cols=text_cols, processor=processor))
201 if cls==TextLMDataBunch: src = src.label_for_lm()
~/miniconda3/envs/dask-scipy/lib/python3.6/site-packages/fastai/data_block.py in from_df(cls, df, path, cols, processor, **kwargs)
121 def from_df(cls, df:DataFrame, path:PathOrStr='.', cols:IntsOrStrs=0, processor:PreProcessors=None, **kwargs)->'ItemList':
122 "Create an `ItemList` in `path` from the inputs in the `cols` of `df`."
--> 123 inputs = df.iloc[:,df_names_to_idx(cols, df)]
124 assert inputs.isna().sum().sum() == 0, f"You have NaN values in column(s) {cols} of your dataframe, please fix it."
125 res = cls(items=_maybe_squeeze(inputs.values), path=path, inner_df=df, processor=processor, **kwargs)
~/miniconda3/envs/dask-scipy/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1492 except (KeyError, IndexError, AttributeError):
1493 pass
-> 1494 return self._getitem_tuple(key)
1495 else:
1496 # we by definition only have the 0th axis
~/miniconda3/envs/dask-scipy/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
2141 def _getitem_tuple(self, tup):
2142
-> 2143 self._has_valid_tuple(tup)
2144 try:
2145 return self._getitem_lowerdim(tup)
~/miniconda3/envs/dask-scipy/lib/python3.6/site-packages/pandas/core/indexing.py in _has_valid_tuple(self, key)
221 raise IndexingError('Too many indexers')
222 try:
--> 223 self._validate_key(k, i)
224 except ValueError:
225 raise ValueError("Location based indexing can only have "
~/miniconda3/envs/dask-scipy/lib/python3.6/site-packages/pandas/core/indexing.py in _validate_key(self, key, axis)
2079
2080 if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
-> 2081 raise IndexError("positional indexers are out-of-bounds")
2082 else:
2083 raise ValueError("Can only index by location with "
This how the data looks like .
Could someone please tell me the possible cause and solution? If I’ve missed out anything, over- or under-emphasized a specific point, let me know in the comments.