Tabular learner giving error on dtype

IZimbra · April 17, 2021, 5:58am

I’m trying to create and train a tabular learner, but seem to be having some trouble. The learner itself can be formed, but upon searching the learning rates, I get an error on the that appears to be based on the datatypes. I’ve searched through the df I’m using, and it contains only floats (except for the dep_var column, which is a boolean and treated as a categorical variable). My code and the full error stack-trace are below - can anyone help shed some light on this?

EDIT: I’m running this on a Paperspace instance, if that makes any difference.

df_nn_final = pd.read_csv(filePath)
cont_nn = list(deepcopy(df.columns)).remove(dep_var)
cat_nn = dep_var
procs_nn = [Categorify]
to_nn = TabularPandas(df_nn_final, procs_nn, cat_nn, cont_nn,
splits=splits, y_names=dep_var)
#Splits are calculated such that the first 80% of the df is allocated into our training set, the rest is validation

dls = to_nn.dataloaders(1024)
learn = tabular_learner(dls, layers=[500,250],
n_out=1, loss_func=F.mse_loss)
lr_min,lr_steep = learn.lr_find()

Running the above code gives the following error:

RuntimeError Traceback (most recent call last)
in
----> 1 lr_min,lr_steep = learn.lr_find()

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
222 n_epoch = num_it//len(self.dls.train) + 1
223 cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
–> 224 with self.no_logging(): self.fit(n_epoch, cbs=cb)
225 if show_plot: self.recorder.plot_lr_find()
226 if suggestions:

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
203 self.opt.set_hypers(lr=self.lr if lr is None else lr)
204 self.n_epoch = n_epoch
–> 205 self._with_events(self._do_fit, ‘fit’, CancelFitException, self._end_cleanup)
206
207 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
194 for epoch in range(self.n_epoch):
195 self.epoch=epoch
–> 196 self._with_events(self._do_epoch, ‘epoch’, CancelEpochException)
197
198 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
188
189 def _do_epoch(self):
–> 190 self._do_epoch_train()
191 self._do_epoch_validate()
192

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
180 def _do_epoch_train(self):
181 self.dl = self.dls.train
–> 182 self._with_events(self.all_batches, ‘train’, CancelTrainException)
183
184 def _do_epoch_validate(self, ds_idx=1, dl=None):

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
158 def all_batches(self):
159 self.n_iter = len(self.dl)
–> 160 for o in enumerate(self.dl): self.one_batch(*o)
161
162 def _do_one_batch(self):

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
176 self.iter = i
177 self._split(b)
–> 178 self._with_events(self._do_one_batch, ‘batch’, CancelBatchException)
179
180 def _do_epoch_train(self):

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in with_events(self, f, event_type, ex, final)
152
153 def with_events(self, f, event_type, ex, final=noop):
–> 154 try: self(f’before{event_type}’) ;f()
155 except ex: self(f’after_cancel{event_type}’)
156 finally: self(f’after_{event_type}’) ;final()

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
167 if not self.training or not len(self.yb): return
168 self(‘before_backward’)
–> 169 self._backward()
170 self(‘after_backward’)
171 self._step()

/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _backward(self)
149
150 def _step(self): self.opt.step()
–> 151 def _backward(self): self.loss.backward()
152
153 def _with_events(self, f, event_type, ex, final=noop):

/opt/conda/envs/fastai/lib/python3.8/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
219 retain_graph=retain_graph,
220 create_graph=create_graph)
–> 221 torch.autograd.backward(self, gradient, retain_graph, create_graph)
222
223 def register_hook(self, hook):

/opt/conda/envs/fastai/lib/python3.8/site-packages/torch/autograd/init.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
128 retain_graph = create_graph
129
–> 130 Variable.execution_engine.run_backward(
131 tensors, grad_tensors, retain_graph, create_graph,
132 allow_unreachable=True) # allow_unreachable flag

RuntimeError: Found dtype Char but expected Float

BobMcDear · April 23, 2021, 3:23pm

Is your task classification or regression? If it’s the latter, you must change the type of your dependent variable with:

df_nn_final[dep_var] = df_nn_final[dep_var].astype(np.float32)

Otherwise, you must pass y_block = CategoryBlock() to your TabularPandas to specify you’re doing classification rather than regression.

Please do let me know if this helped!

IZimbra · April 28, 2021, 5:29am

Borna,

This worked - thank you! I am now able to train this model.

I get ‘nan’ for my validation loss at all steps, but I suspect this is another problem. You wouldn’t happen to have any other suggestions, would you?

BobMcDear · April 30, 2021, 11:50am

I don’t see anything wrong with the snippet you’ve posted. Maybe you could also share the rest of your code and what your DataFrame looks like?