Hello everybody!
I’ve been trying to apply the tabular_learner model to the Riid competition on Kaggle. But I’m running into some weird issues where on validation my model is running into a Char instead of a Float? I’ve been smashing my head against the wall trying to figure this out. I thought it was running into a ‘#na#’ because a validation category wasn’t in the training set. But I reduced the example to include only two validation items, both of which contain values that are seen in the training set.
dep_var = 'answered_correctly'
simple = X[['user_id', 'user_mean', 'answered_correctly']]
cont, cat = cont_cat_split(simple, 1, dep_var=dep_var)
cont, cat
# (['user_mean'], ['user_id'])
# Create a split for a really small validation set
split = int(0.99999 * len(simple))
train_index = simple.iloc[:split].index
test_index = simple.iloc[split:].index
splits = (list(train_index), list(test_index))
simple.loc[train_index].tail()
"""
user_id user_mean answered_correctly
4022163 0.716216 1
4022163 0.716216 1
"""
# So the validation and training sets both contain the user_id 4022163
simple.loc[test_index]
"""
user_id user_mean answered_correctly
4022163 0.716216 1
4022163 0.716216 1
"""
procs = [Categorify, FillMissing, Normalize]
to = TabularPandas(simple, procs, cat, cont, y_names=dep_var, splits=splits)
dls = to.dataloaders(2)
learn = tabular_learner(dls, t_range=(0,1), layers=[500, 250], n_out=1, loss_func=F.mse_loss)
learn.fit_one_cycle(5, 1e-2)
The stack trace I get for this is
epoch train_loss valid_loss time
0 0.620800 00:00
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-192-24cb127eeafb> in <module>
----> 1 learn.fit_one_cycle(5, 1e-2)
~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastcore/logargs.py in _f(*args, **kwargs)
54 init_args.update(log)
55 setattr(inst, 'init_args', init_args)
---> 56 return inst if to_return else f(*args, **kwargs)
57 return _f
~/.local/lib/python3.8/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
111 scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
112 'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 113 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
114
115 # Cell
~/anaconda3/envs/fastai2/lib/python3.8/site-packages/fastcore/logargs.py in _f(*args, **kwargs)
54 init_args.update(log)
55 setattr(inst, 'init_args', init_args)
---> 56 return inst if to_return else f(*args, **kwargs)
57 return _f
~/.local/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
205 self.opt.set_hypers(lr=self.lr if lr is None else lr)
206 self.n_epoch = n_epoch
--> 207 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
208
209 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
~/.local/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
153
154 def _with_events(self, f, event_type, ex, final=noop):
--> 155 try: self(f'before_{event_type}') ;f()
156 except ex: self(f'after_cancel_{event_type}')
157 finally: self(f'after_{event_type}') ;final()
~/.local/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
195 for epoch in range(self.n_epoch):
196 self.epoch=epoch
--> 197 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
198
199 @log_args(but='cbs')
~/.local/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
153
154 def _with_events(self, f, event_type, ex, final=noop):
--> 155 try: self(f'before_{event_type}') ;f()
156 except ex: self(f'after_cancel_{event_type}')
157 finally: self(f'after_{event_type}') ;final()
~/.local/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
189
190 def _do_epoch(self):
--> 191 self._do_epoch_train()
192 self._do_epoch_validate()
193
~/.local/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
181 def _do_epoch_train(self):
182 self.dl = self.dls.train
--> 183 self._with_events(self.all_batches, 'train', CancelTrainException)
184
185 def _do_epoch_validate(self, ds_idx=1, dl=None):
~/.local/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
153
154 def _with_events(self, f, event_type, ex, final=noop):
--> 155 try: self(f'before_{event_type}') ;f()
156 except ex: self(f'after_cancel_{event_type}')
157 finally: self(f'after_{event_type}') ;final()
~/.local/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
159 def all_batches(self):
160 self.n_iter = len(self.dl)
--> 161 for o in enumerate(self.dl): self.one_batch(*o)
162
163 def _do_one_batch(self):
~/.local/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
177 self.iter = i
178 self._split(b)
--> 179 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
180
181 def _do_epoch_train(self):
~/.local/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
153
154 def _with_events(self, f, event_type, ex, final=noop):
--> 155 try: self(f'before_{event_type}') ;f()
156 except ex: self(f'after_cancel_{event_type}')
157 finally: self(f'after_{event_type}') ;final()
~/.local/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
168 if not self.training or not len(self.yb): return
169 self('before_backward')
--> 170 self._backward()
171 self('after_backward')
172 self._step()
~/.local/lib/python3.8/site-packages/fastai/learner.py in _backward(self)
150
151 def _step(self): self.opt.step()
--> 152 def _backward(self): self.loss.backward()
153
154 def _with_events(self, f, event_type, ex, final=noop):
~/.local/lib/python3.8/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
183 products. Defaults to ``False``.
184 """
--> 185 torch.autograd.backward(self, gradient, retain_graph, create_graph)
186
187 def register_hook(self, hook):
~/.local/lib/python3.8/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
123 retain_graph = create_graph
124
--> 125 Variable._execution_engine.run_backward(
126 tensors, grad_tensors, retain_graph, create_graph,
127 allow_unreachable=True) # allow_unreachable flag
RuntimeError: Found dtype Char but expected Float
Exception raised from compute_types at /pytorch/aten/src/ATen/native/TensorIterator.cpp:183 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x42 (0x7fda948cc1e2 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libc10.so)
frame #1: at::TensorIterator::compute_types(at::TensorIteratorConfig const&) + 0x259 (0x7fdad07b7849 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #2: at::TensorIterator::build(at::TensorIteratorConfig&) + 0x6b (0x7fdad07bafeb in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #3: at::TensorIterator::TensorIterator(at::TensorIteratorConfig&) + 0xdd (0x7fdad07bb65d in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #4: at::native::mse_loss_backward_out(at::Tensor&, at::Tensor const&, at::Tensor const&, at::Tensor const&, long) + 0x18a (0x7fdad06202ba in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #5: at::native::mse_loss_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, long) + 0x90 (0x7fdad061cce0 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #6: <unknown function> + 0x10fa2f9 (0x7fdad0a2f2f9 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #7: <unknown function> + 0xa9ac76 (0x7fdad03cfc76 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #8: at::mse_loss_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, long) + 0x119 (0x7fdad0adf949 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #9: <unknown function> + 0x2e03469 (0x7fdad2738469 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #10: <unknown function> + 0xa9ac76 (0x7fdad03cfc76 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #11: at::mse_loss_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, long) + 0x119 (0x7fdad0adf949 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #12: torch::autograd::generated::MseLossBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x1af (0x7fdad26740cf in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #13: <unknown function> + 0x3375bb7 (0x7fdad2caabb7 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #14: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&, std::shared_ptr<torch::autograd::ReadyQueue> const&) + 0x1400 (0x7fdad2ca6400 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #15: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&) + 0x451 (0x7fdad2ca6fa1 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #16: torch::autograd::Engine::execute_with_graph_task(std::shared_ptr<torch::autograd::GraphTask> const&, std::shared_ptr<torch::autograd::Node>) + 0x37c (0x7fdad2ca46bc in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #17: torch::autograd::python::PythonEngine::execute_with_graph_task(std::shared_ptr<torch::autograd::GraphTask> const&, std::shared_ptr<torch::autograd::Node>) + 0x3c (0x7fdae043acac in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_python.so)
frame #18: torch::autograd::Engine::execute(std::vector<torch::autograd::Edge, std::allocator<torch::autograd::Edge> > const&, std::vector<at::Tensor, std::allocator<at::Tensor> > const&, bool, bool, std::vector<torch::autograd::Edge, std::allocator<torch::autograd::Edge> > const&) + 0x803 (0x7fdad2ca39f3 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #19: torch::autograd::python::PythonEngine::execute(std::vector<torch::autograd::Edge, std::allocator<torch::autograd::Edge> > const&, std::vector<at::Tensor, std::allocator<at::Tensor> > const&, bool, bool, std::vector<torch::autograd::Edge, std::allocator<torch::autograd::Edge> > const&) + 0x4e (0x7fdae043aaae in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_python.so)
frame #20: THPEngine_run_backward(THPEngine*, _object*, _object*) + 0x984 (0x7fdae043b6c4 in /home/gannon/.local/lib/python3.8/site-packages/torch/lib/libtorch_python.so)
frame #21: PyCFunction_Call + 0x56 (0x55a157eb3f76 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #22: _PyObject_MakeTpCall + 0x22f (0x55a157e7185f in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #23: _PyEval_EvalFrameDefault + 0x11d0 (0x55a157ef5b90 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #24: _PyEval_EvalCodeWithName + 0x2d2 (0x55a157ebea92 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #25: _PyFunction_Vectorcall + 0x1e3 (0x55a157ebf943 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #26: <unknown function> + 0x10077f (0x55a157e3477f in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #27: _PyEval_EvalCodeWithName + 0x2d2 (0x55a157ebea92 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #28: _PyFunction_Vectorcall + 0x1e3 (0x55a157ebf943 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #29: <unknown function> + 0x10075e (0x55a157e3475e in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #30: _PyFunction_Vectorcall + 0x10b (0x55a157ebf86b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #31: <unknown function> + 0x10075e (0x55a157e3475e in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #32: <unknown function> + 0x18bc0b (0x55a157ebfc0b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #33: <unknown function> + 0xfeb84 (0x55a157e32b84 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #34: _PyEval_EvalCodeWithName + 0x2d2 (0x55a157ebea92 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #35: _PyFunction_Vectorcall + 0x1e3 (0x55a157ebf943 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #36: <unknown function> + 0x10075e (0x55a157e3475e in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #37: _PyFunction_Vectorcall + 0x10b (0x55a157ebf86b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #38: <unknown function> + 0x18be79 (0x55a157ebfe79 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #39: PyVectorcall_Call + 0x71 (0x55a157e71041 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #40: _PyEval_EvalFrameDefault + 0x1fdb (0x55a157ef699b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #41: <unknown function> + 0x18bc0b (0x55a157ebfc0b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #42: <unknown function> + 0xfeb84 (0x55a157e32b84 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #43: _PyEval_EvalCodeWithName + 0x2d2 (0x55a157ebea92 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #44: _PyFunction_Vectorcall + 0x1e3 (0x55a157ebf943 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #45: <unknown function> + 0x10075e (0x55a157e3475e in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #46: _PyFunction_Vectorcall + 0x10b (0x55a157ebf86b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #47: <unknown function> + 0x10075e (0x55a157e3475e in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #48: <unknown function> + 0x18bc0b (0x55a157ebfc0b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #49: <unknown function> + 0xfeb84 (0x55a157e32b84 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #50: _PyEval_EvalCodeWithName + 0x2d2 (0x55a157ebea92 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #51: _PyFunction_Vectorcall + 0x1e3 (0x55a157ebf943 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #52: <unknown function> + 0x10075e (0x55a157e3475e in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #53: <unknown function> + 0x18bc0b (0x55a157ebfc0b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #54: <unknown function> + 0xfeb84 (0x55a157e32b84 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #55: _PyEval_EvalCodeWithName + 0x2d2 (0x55a157ebea92 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #56: _PyFunction_Vectorcall + 0x1e3 (0x55a157ebf943 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #57: <unknown function> + 0x10075e (0x55a157e3475e in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #58: _PyEval_EvalCodeWithName + 0x2d2 (0x55a157ebea92 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #59: _PyFunction_Vectorcall + 0x1e3 (0x55a157ebf943 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #60: PyVectorcall_Call + 0x71 (0x55a157e71041 in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #61: _PyEval_EvalFrameDefault + 0x1fdb (0x55a157ef699b in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #62: _PyEval_EvalCodeWithName + 0x7df (0x55a157ebef9f in /home/gannon/anaconda3/envs/fastai2/bin/python)
frame #63: <unknown function> + 0x18bd20 (0x55a157ebfd20 in /home/gannon/anaconda3/envs/fastai2/bin/python)
Can anyone offer any insights into this problem? I found one thread where someone was running into a similar problem, but the solution offered was that the validation set contained categorical values not present in the training set, so they were marked as ‘%na’. But that doesn’t seem to be the case here?
Also if anyone has issues with how I’m presenting this problem, I’m definitely open to improve on that too. I read through the debugging suggestions thread, but I feel like this is still pretty ugly and maybe not that helpful for anyone trying to help?
Thanks a lot