@dangraf I can’t recreate the error in Colab on the regular install (I haven’t tried dev yet).
Edit: Okay, now I can. It’s a bug inside of the dev
version.
@sgugger it seems to be from the fact that the idxs are a very long list when calling a batch, whereas simply doing TabularPandas
returns something different (The root of the bug is TabIloc
, I put the print statement in idxs
like so:
#export
class _TabIloc:
"Get/set rows by iloc and cols by name"
def __init__(self,to): self.to = to
def __getitem__(self, idxs):
df = self.to.items
print(idxs)
if isinstance(idxs,tuple):
rows,cols = idxs
cols = df.columns.isin(cols) if is_listy(cols) else df.columns.get_loc(cols)
else: rows,cols = idxs,slice(None)
return self.to.new(df.iloc[rows, cols])
What is expected when just doing a TabularPandas
:
to = TabularPandas(df_main, procs, cat_names, cont_names, y_names="salary", splits=splits)
(slice(None, None, None), 'workclass')
(slice(None, None, None), 'education')
(slice(None, None, None), 'marital-status')
(slice(None, None, None), 'occupation')
(slice(None, None, None), 'relationship')
(slice(None, None, None), 'race')
(slice(None, None, None), 'age_na')
(slice(None, None, None), 'fnlwgt_na')
(slice(None, None, None), 'education-num_na')
(slice(None, None, None), 'salary')
Behavior on dls.one_batch()
:
(1829, 6000, 4754, 3678, 823, 4682, 3525, 3136, 4430, 6376, 3077, 5487, 4382, 1594, 3501, 4306, 258, 7924, 6271, 7174, 5970, 1363, 7407, 4908, 2201, 7369, 3305, 7116, 499, 4439, 5406, 4046, 3743, 6204, 639, 1232, 3675, 256, 5134, 4411, 7563, 6902, 5661, 3314, 1243, 5573, 3327, 750, 6232, 3363, 2840, 5906, 4775, 7995, 4008, 3089, 7674, 4214, 5414, 5955, 7726, 3045, 7570, 3432)
ValueError Traceback (most recent call last)
<ipython-input-90-ccb93b9fbe07> in <module>()
----> 1 dls.one_batch()
9 frames
/usr/local/lib/python3.6/dist-packages/fastai2/data/load.py in one_batch(self)
128 def one_batch(self):
129 if self.n is not None and len(self)==0: raise ValueError(f'This DataLoader does not contain any batches')
--> 130 with self.fake_l.no_multiproc(): res = first(self)
131 if hasattr(self, 'it'): delattr(self, 'it')
132 return res
/usr/local/lib/python3.6/dist-packages/fastcore/utils.py in first(x)
174 def first(x):
175 "First element of `x`, or None if missing"
--> 176 try: return next(iter(x))
177 except StopIteration: return None
178
/usr/local/lib/python3.6/dist-packages/fastai2/data/load.py in __iter__(self)
95 self.randomize()
96 self.before_iter()
---> 97 for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
98 if self.device is not None: b = to_device(b, self.device)
99 yield self.after_batch(b)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
343
344 def __next__(self):
--> 345 data = self._next_data()
346 self._num_yielded += 1
347 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
383 def _next_data(self):
384 index = self._next_index() # may raise StopIteration
--> 385 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
386 if self._pin_memory:
387 data = _utils.pin_memory.pin_memory(data)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
32 raise StopIteration
33 else:
---> 34 data = next(self.dataset_iter)
35 return self.collate_fn(data)
36
/usr/local/lib/python3.6/dist-packages/fastai2/data/load.py in create_batches(self, samps)
104 self.it = iter(self.dataset) if self.dataset is not None else None
105 res = filter(lambda o:o is not None, map(self.do_item, samps))
--> 106 yield from map(self.do_batch, self.chunkify(res))
107
108 def new(self, dataset=None, cls=None, **kwargs):
/usr/local/lib/python3.6/dist-packages/fastai2/data/load.py in do_batch(self, b)
125 def create_item(self, s): return next(self.it) if s is None else self.dataset[s]
126 def create_batch(self, b): return (fa_collate,fa_convert)[self.prebatched](b)
--> 127 def do_batch(self, b): return self.retain(self.create_batch(self.before_batch(b)), b)
128 def one_batch(self):
129 if self.n is not None and len(self)==0: raise ValueError(f'This DataLoader does not contain any batches')
<ipython-input-46-cad3c12e3ff5> in create_batch(self, b)
6 super().__init__(dataset, bs=bs, shuffle=shuffle, after_batch=after_batch, num_workers=num_workers, **kwargs)
7
----> 8 def create_batch(self, b): return self.dataset.iloc[b]
9
10 TabularPandas._dl_type = TabDataLoader
<ipython-input-52-8847abb6a04f> in __getitem__(self, idxs)
6 print(idxs)
7 if isinstance(idxs,tuple):
----> 8 rows,cols = idxs
9 cols = df.columns.isin(cols) if is_listy(cols) else df.columns.get_loc(cols)
10 else: rows,cols = idxs,slice(None)
ValueError: too many values to unpack (expected 2)
Hope that helps with debugging
(as I’m unsure what to do here)