I want to predict on tabular learner on the kaggle housing dataset, however when I run
dl = learn.dls.test_dl(test)
res= learn.get_preds(dl=dl)
I get an error
IndexError Traceback (most recent call last)
in
1 dl = learn.dls.test_dl(test)
----> 2 res= learn.get_preds(dl=dl)C:\ProgramData\Miniconda3\lib\site-packages\fastai2\learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, **kwargs)
217 for mgr in ctx_mgrs: stack.enter_context(mgr)
218 self(event.begin_epoch if inner else _before_epoch)
–> 219 self._do_epoch_validate(dl=dl)
220 self(event.after_epoch if inner else _after_epoch)
221 if act is None: act = getattr(self.loss_func, ‘activation’, noop)C:\ProgramData\Miniconda3\lib\site-packages\fastai2\learner.py in _do_epoch_validate(self, ds_idx, dl)
173 dl,old,has = change_attrs(dl, names, [False,False])
174 self.dl = dl; self(‘begin_validate’)
–> 175 with torch.no_grad(): self.all_batches()
176 except CancelValidException: self(‘after_cancel_validate’)
177 finally:C:\ProgramData\Miniconda3\lib\site-packages\fastai2\learner.py in all_batches(self)
141 def all_batches(self):
142 self.n_iter = len(self.dl)
–> 143 for o in enumerate(self.dl): self.one_batch(*o)
144
145 def one_batch(self, i, b):C:\ProgramData\Miniconda3\lib\site-packages\fastai2\learner.py in one_batch(self, i, b)
147 try:
148 self._split(b); self(‘begin_batch’)
–> 149 self.pred = self.model(*self.xb); self(‘after_pred’)
150 if len(self.yb) == 0: return
151 self.loss = self.loss_func(self.pred, *self.yb); self(‘after_loss’)C:\ProgramData\Miniconda3\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)C:\ProgramData\Miniconda3\lib\site-packages\fastai2\tabular\model.py in forward(self, x_cat, x_cont)
46 def forward(self, x_cat, x_cont=None):
47 if self.n_emb != 0:
—> 48 x = [e(x_cat[:,i]) for i,e in enumerate(self.embeds)]
49 x = torch.cat(x, 1)
50 x = self.emb_drop(x)C:\ProgramData\Miniconda3\lib\site-packages\fastai2\tabular\model.py in (.0)
46 def forward(self, x_cat, x_cont=None):
47 if self.n_emb != 0:
—> 48 x = [e(x_cat[:,i]) for i,e in enumerate(self.embeds)]
49 x = torch.cat(x, 1)
50 x = self.emb_drop(x)C:\ProgramData\Miniconda3\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)C:\ProgramData\Miniconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
112 return F.embedding(
113 input, self.weight, self.padding_idx, self.max_norm,
–> 114 self.norm_type, self.scale_grad_by_freq, self.sparse)
115
116 def extra_repr(self):C:\ProgramData\Miniconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1722 # remove once script supports set_grad_enabled
1723 no_grad_embedding_renorm(weight, input, max_norm, norm_type)
-> 1724 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1725
1726IndexError: index out of range in self
The error is due to some categorical feature ‘MiscFeature’, which has the value ‘Shed’. This value also occurs in the training set.
df[‘MiscFeature’].value_counts(dropna=False)
NaN 1373
Shed 40
Othr 1
Name: MiscFeature, dtype: int64
Setting all the values of this feature to np.nan works:
test[‘MiscFeature’]=np.nan
dl = learn.dls.test_dl(test)
res= learn.get_preds(dl=dl)
But I’m don’t understand why the error was generated in the first place? The df has other categorical columns as well and doesn’t trip on them