Tabular data: Tensors must have same number of dimensions:

Hi there,

I’m trying to do a binary classification but have a problem,

Data:
It’s a tabular dataset, train: 90000 x 2, validation: 10000 x 1

My code looks like this

data = TabularDataBunch.from_df(df = df, path=’.’, cat_names=cat_vars, cont_names=cont_vars, procs=procs,
dep_var = dep_var,test_df = test,
valid_idx = valid_idx,device=torch.device(‘cpu’))
learn = tabular_learner(data, layers=[100,20], ps=None,
emb_drop=0.0,metrics=auc_roc_score)
learn.fit_one_cycle(2, 1e-3, wd=0.2)

Error message:

RuntimeError Traceback (most recent call last)
in
----> 1 learn = main(debug=False)

in main(debug)
44 df = pd.read_csv(‘engineered_df.csv’)
45 with timer(“Fastai deep learning framework”):
—> 46 kfold_fastai_deeplearning(df,categorical_feature=None)

in kfold_fastai_deeplearning(data, categorical_feature)
700 # learn.recorder.plot()
701 print (learn.model)
–> 702 learn.fit_one_cycle(2, 1e-3, wd=0.2) #weight decay
703 break
704 # oof_preds[valid_idx] = clf.predict_proba(valid_x, num_iteration=clf.best_iteration_)[:, 1]

~/.local/lib/python3.7/site-packages/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, final_div, wd, callbacks, tot_epochs, start_epoch)
21 callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor, pct_start=pct_start,
22 final_div=final_div, tot_epochs=tot_epochs, start_epoch=start_epoch))
—> 23 learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
24
25 def fit_fc(learn:Learner, tot_epochs:int=1, lr:float=defaults.lr, moms:Tuple[float,float]=(0.95,0.85), start_pct:float=0.72,

~/.local/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
198 else: self.opt.lr,self.opt.wd = lr,wd
199 callbacks = [cb(self) for cb in self.callback_fns + listify(defaults.extra_callback_fns)] + listify(callbacks)
–> 200 fit(epochs, self, metrics=self.metrics, callbacks=self.callbacks+callbacks)
201
202 def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/.local/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, learn, callbacks, metrics)
104 if not cb_handler.skip_validate and not learn.data.empty_val:
105 val_loss = validate(learn.model, learn.data.valid_dl, loss_func=learn.loss_func,
–> 106 cb_handler=cb_handler, pbar=pbar)
107 else: val_loss=None
108 if cb_handler.on_epoch_end(val_loss): break

~/.local/lib/python3.7/site-packages/fastai/basic_train.py in validate(model, dl, loss_func, cb_handler, pbar, average, n_batch)
61 if not is_listy(yb): yb = [yb]
62 nums.append(first_el(yb).shape[0])
—> 63 if cb_handler and cb_handler.on_batch_end(val_losses[-1]): break
64 if n_batch and (len(nums)>=n_batch): break
65 nums = np.array(nums, dtype=np.float32)

~/.local/lib/python3.7/site-packages/fastai/callback.py in on_batch_end(self, loss)
306 “Handle end of processing one batch with loss.”
307 self.state_dict[‘last_loss’] = loss
–> 308 self(‘batch_end’, call_mets = not self.state_dict[‘train’])
309 if self.state_dict[‘train’]:
310 self.state_dict[‘iteration’] += 1

~/.local/lib/python3.7/site-packages/fastai/callback.py in call(self, cb_name, call_mets, **kwargs)
248 “Call through to all of the CallbakHandler functions.”
249 if call_mets:
–> 250 for met in self.metrics: self._call_and_update(met, cb_name, **kwargs)
251 for cb in self.callbacks: self._call_and_update(cb, cb_name, **kwargs)
252

~/.local/lib/python3.7/site-packages/fastai/callback.py in _call_and_update(self, cb, cb_name, **kwargs)
239 def call_and_update(self, cb, cb_name, **kwargs)->None:
240 “Call cb_name on cb and update the inner state.”
–> 241 new = ifnone(getattr(cb, f’on
{cb_name}’)(**self.state_dict, **kwargs), dict())
242 for k,v in new.items():
243 if k not in self.state_dict:

~/.local/lib/python3.7/site-packages/fastai/callback.py in on_batch_end(self, last_output, last_target, **kwargs)
342 if not is_listy(last_target): last_target=[last_target]
343 self.count += first_el(last_target).size(0)
–> 344 val = self.func(last_output, *last_target)
345 if self.world:
346 val = val.clone()

~/.local/lib/python3.7/site-packages/fastai/metrics.py in auc_roc_score(input, targ)
278 def auc_roc_score(input:Tensor, targ:Tensor):
279 “Computes the area under the receiver operator characteristic (ROC) curve using the trapezoid method. Restricted binary classification tasks.”
–> 280 fpr, tpr = roc_curve(input, targ)
281 d = fpr[1:] - fpr[:-1]
282 sl1, sl2 = [slice(None)], [slice(None)]

~/.local/lib/python3.7/site-packages/fastai/metrics.py in roc_curve(input, targ)
296 fps = (1 + threshold_idxs - tps)
297 if tps[0] != 0 or fps[0] != 0:
–> 298 fps = torch.cat((LongTensor([0]), fps))
299 tps = torch.cat((LongTensor([0]), tps))
300 fpr, tpr = fps.float() / fps[-1], tps.float() / tps[-1]

RuntimeError: invalid argument 0: Tensors must have same number of dimensions: got 1 and 2 at /pytorch/aten/src/TH/generic/THTensor.cpp:680

I tried to search for solutions in this forum finding that all of the similar errors are related to image classification problems.

Any ideas would be appreciated!

Are there any categories that show up in your validation set that aren’t in your training set? Also does it work without using auroc? (IE something like cross entropy)?

@muellerzr
Hi Mueller, thanks for your reply!

  1. No, everything that shows up in the validation set also shows up in the training set
  2. Yes, accuracy works, is it because something in the confusion matrix is null?

When you plot the confusion matrix (post training with accuracy) it’s null?

Wondering if the dimension mismatch has anything to do with the size of your tabular dataset (train: 90000 x 2, validation: 10000 x 1) . Did you mean 90000 rows with 1 y value (which can have 2 states for binary classification)

1 Like

I find the cause to the problem. The data set is so imbalanced that for some batches there is no ‘1’.

@muellerzr @beecoder thanks for your advice

2 Likes