Moving from TabularList to TabularPandas Char error on fit

RogerS49 · December 14, 2022, 11:19am

I place this here as it may have more focus, although it’s not strictly 2022P2 issue.


0.00% [0/5 00:00<?]
epoch 	train_loss 	valid_loss 	time

0.00% [0/252 00:00<?]

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [115], in <cell line: 2>()
      1 #%pdb?
----> 2 learn.fit_one_cycle(5, 1e-2)

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/callback/schedule.py:119, in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt, start_epoch)
    116 lr_max = np.array([h['lr'] for h in self.opt.hypers])
    117 scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    118           'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 119 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd, start_epoch=start_epoch)

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:256, in Learner.fit(self, n_epoch, lr, wd, cbs, reset_opt, start_epoch)
    254 self.opt.set_hypers(lr=self.lr if lr is None else lr)
    255 self.n_epoch = n_epoch
--> 256 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
    192 def _with_events(self, f, event_type, ex, final=noop):
--> 193     try: self(f'before_{event_type}');  f()
    194     except ex: self(f'after_cancel_{event_type}')
    195     self(f'after_{event_type}');  final()

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:245, in Learner._do_fit(self)
    243 for epoch in range(self.n_epoch):
    244     self.epoch=epoch
--> 245     self._with_events(self._do_epoch, 'epoch', CancelEpochException)

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
    192 def _with_events(self, f, event_type, ex, final=noop):
--> 193     try: self(f'before_{event_type}');  f()
    194     except ex: self(f'after_cancel_{event_type}')
    195     self(f'after_{event_type}');  final()

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:239, in Learner._do_epoch(self)
    238 def _do_epoch(self):
--> 239     self._do_epoch_train()
    240     self._do_epoch_validate()

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:231, in Learner._do_epoch_train(self)
    229 def _do_epoch_train(self):
    230     self.dl = self.dls.train
--> 231     self._with_events(self.all_batches, 'train', CancelTrainException)

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
    192 def _with_events(self, f, event_type, ex, final=noop):
--> 193     try: self(f'before_{event_type}');  f()
    194     except ex: self(f'after_cancel_{event_type}')
    195     self(f'after_{event_type}');  final()

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:199, in Learner.all_batches(self)
    197 def all_batches(self):
    198     self.n_iter = len(self.dl)
--> 199     for o in enumerate(self.dl): self.one_batch(*o)

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:227, in Learner.one_batch(self, i, b)
    225 b = self._set_device(b)
    226 self._split(b)
--> 227 self._with_events(self._do_one_batch, 'batch', CancelBatchException)

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
    192 def _with_events(self, f, event_type, ex, final=noop):
--> 193     try: self(f'before_{event_type}');  f()
    194     except ex: self(f'after_cancel_{event_type}')
    195     self(f'after_{event_type}');  final()

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:212, in Learner._do_one_batch(self)
    210 self('after_loss')
    211 if not self.training or not len(self.yb): return
--> 212 self._with_events(self._backward, 'backward', CancelBackwardException)
    213 self._with_events(self._step, 'step', CancelStepException)
    214 self.opt.zero_grad()

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:193, in Learner._with_events(self, f, event_type, ex, final)
    192 def _with_events(self, f, event_type, ex, final=noop):
--> 193     try: self(f'before_{event_type}');  f()
    194     except ex: self(f'after_cancel_{event_type}')
    195     self(f'after_{event_type}');  final()

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/fastai/learner.py:201, in Learner._backward(self)
--> 201 def _backward(self): self.loss_grad.backward()

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/torch/_tensor.py:396, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
    387 if has_torch_function_unary(self):
    388     return handle_torch_function(
    389         Tensor.backward,
    390         (self,),
   (...)
    394         create_graph=create_graph,
    395         inputs=inputs)
--> 396 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)

File ~/mambaforge/envs/bk/lib/python3.10/site-packages/torch/autograd/__init__.py:173, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    168     retain_graph = create_graph
    170 # The reason we repeat same the comment below is that
    171 # some Python versions print out the first line of a multi-line function
    172 # calls in the traceback and some print out the last line
--> 173 Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    174     tensors, grad_tensors_, retain_graph, create_graph, inputs,
    175     allow_unreachable=True, accumulate_grad=True)

RuntimeError: Found dtype Char but expected Float

Trace above, debug of last entry of trace below

> /home/amamba/mambaforge/envs/bk/lib/python3.10/site-packages/torch/autograd/__init__.py(173)backward()
    171     # some Python versions print out the first line of a multi-line function
    172     # calls in the traceback and some print out the last line
--> 173     Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    174         tensors, grad_tensors_, retain_graph, create_graph, inputs,
    175         allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass

ipdb> pp tensors
(tensor(1002.9806, device='cuda:0', grad_fn=<MseLossBackward0>),)
ipdb> pp grad_tensors_
(tensor(1., device='cuda:0'),)
ipdb> pp retain_graph
False
ipdb> pp create_graph
False
ipdb> pp inputs
()
ipdb> pp grad_variables
None

Can’t find a char data type any where in my data, very few variables in trace back to investigate, although I may be a bit naive about the python debugger .
In general terms when I use Categorify in my tabular pandas I get this error when I don’t use any procs I get an AttributeError classes
All my cat_vars have been categorized individually and all cont_vars have been Normalised individually
in the data frame before appying to the TabularPandas.
I probably need to be at a bit deeper level of code than the more readily usable tabular functions in the current version. Also I am moving from python 3.6 were the code ran for many years, to 3.10 as I want to play with some Torch Disributions.

Jeremy if this topic is in the wrong place please move it to a better one

RogerS49 · May 14, 2023, 7:54am

Apologise I should have mentioned how I resolved my issues above. Unfortunately for me my browser tech is getting left behind and is not supported in many webs sites, to numerous to mention.

However I used the below link to move forward with my project. Sorry I can’t remeber precisely how I did it but the information there was extremely helpful.

A-walk-with-fastai2-tabular