MultiCategoryBlock for Tabular non-bool values

I’ve figured out how to prep/train/export/load/predict a model with a single float y_value, but as soon as I add in a y_block param to TabularPandas of…

y_block=MultiCategoryBlock(encoded=True, vocab=y_values)

…the dataloaders (seen via dls = tab_df.dataloaders(bs=4096); dls.show_batch()) show that the float value was converted to a bool (all values become just 1.0). This is with a single y_value (multiple y_values throws an error, so I was trying to get the MultiCategoryBlock to work with just one value to see if I was doing something wrong).

Is this a known issue? Is it intentional that MultiCategoryBlock can only be used with bool values? The docs definitely only use it with bools: https://docs.fast.ai/tabular.core.html#Multi-label-categories

I find it extremely unlikely that I’m the first person to ever want to predict more than one value at once, so it’s much more likely that I’m just doing something wrong and misunderstanding how it’s supposed to be used. Any pointers to help me in the right direction would be very much appreciated.

1 Like

Yes, MultiCategoryBlock is for discrete categories. There is a RegressionBlock which is probably what you want:

1 Like

That seems perfect, thanks. Even adding in extra y_names works now, with predictions being floats (which I assume I can safely treat as confidence levels, ie: 0.0847 is a False with a 91.53% confidence).

The next issue to tackle is the error I mentioned before.
This works fine:
learn = tabular_learner(dls)
This gives an error when fitting:
learn = tabular_learner(dls, metrics=error_rate)

The dls come from here:

tab_df = TabularPandas(df, procs=[Categorify, FillMissing, Normalize],
                       cat_names = list_with_24_categories,
                       cont_names = list_with_2_continuous_ranges,
                       y_names=y_names_6_values,

                       y_block=RegressionBlock,
                       splits=RandomSplitter(valid_pct=0.2)(range_of(df))
                      )
dls = tab_df.dataloaders(bs=64*6)

Code causing an error:

learning_rate = 5e-2
learn.fit_one_cycle(1, learning_rate)

Error:

AssertionError                            Traceback (most recent call last)
<ipython-input-106-24445d14d700> in <module>
      1 learning_rate = 5e-2
----> 2 learn.fit_one_cycle(1, learning_rate)
      3 #learn.fit(2, learning_rate)

E:\ProgramFiles\anaconda\lib\site-packages\fastai\callback\schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    110     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    111               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 112     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    113 
    114 # Cell

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    209             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    210             self.n_epoch = n_epoch
--> 211             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    212 
    213     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in _do_fit(self)
    200         for epoch in range(self.n_epoch):
    201             self.epoch=epoch
--> 202             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    203 
    204     def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in _do_epoch(self)
    195     def _do_epoch(self):
    196         self._do_epoch_train()
--> 197         self._do_epoch_validate()
    198 
    199     def _do_fit(self):

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in _do_epoch_validate(self, ds_idx, dl)
    191         if dl is None: dl = self.dls[ds_idx]
    192         self.dl = dl
--> 193         with torch.no_grad(): self._with_events(self.all_batches, 'validate', CancelValidException)
    194 
    195     def _do_epoch(self):

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    158 
    159     def _with_events(self, f, event_type, ex, final=noop):
--> 160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
    162         self(f'after_{event_type}');  final()

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in all_batches(self)
    164     def all_batches(self):
    165         self.n_iter = len(self.dl)
--> 166         for o in enumerate(self.dl): self.one_batch(*o)
    167 
    168     def _do_one_batch(self):

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in one_batch(self, i, b)
    182         self.iter = i
    183         self._split(b)
--> 184         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
    185 
    186     def _do_epoch_train(self):

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in _with_events(self, f, event_type, ex, final)
    160         try: self(f'before_{event_type}');  f()
    161         except ex: self(f'after_cancel_{event_type}')
--> 162         self(f'after_{event_type}');  final()
    163 
    164     def all_batches(self):

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in __call__(self, event_name)
    139 
    140     def ordered_cbs(self, event): return [cb for cb in self.cbs.sorted('order') if hasattr(cb, event)]
--> 141     def __call__(self, event_name): L(event_name).map(self._call_one)
    142 
    143     def _call_one(self, event_name):

E:\ProgramFiles\anaconda\lib\site-packages\fastcore\foundation.py in map(self, f, gen, *args, **kwargs)
    152     def range(cls, a, b=None, step=None): return cls(range_of(a, b=b, step=step))
    153 
--> 154     def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
    155     def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
    156     def filter(self, f=noop, negate=False, gen=False, **kwargs):

E:\ProgramFiles\anaconda\lib\site-packages\fastcore\basics.py in map_ex(iterable, f, gen, *args, **kwargs)
    664     res = map(g, iterable)
    665     if gen: return res
--> 666     return list(res)
    667 
    668 # Cell

E:\ProgramFiles\anaconda\lib\site-packages\fastcore\basics.py in __call__(self, *args, **kwargs)
    649             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    650         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 651         return self.func(*fargs, **kwargs)
    652 
    653 # Cell

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in _call_one(self, event_name)
    143     def _call_one(self, event_name):
    144         if not hasattr(event, event_name): raise Exception(f'missing {event_name}')
--> 145         for cb in self.cbs.sorted('order'): cb(event_name)
    146 
    147     def _bn_bias_state(self, with_bias): return norm_bias_params(self.model, with_bias).map(self.opt.state)

E:\ProgramFiles\anaconda\lib\site-packages\fastai\callback\core.py in __call__(self, event_name)
     42                (self.run_valid and not getattr(self, 'training', False)))
     43         res = None
---> 44         if self.run and _run: res = getattr(self, event_name, noop)()
     45         if event_name=='after_fit': self.run=True #Reset self.run to True at each end of fit
     46         return res

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in after_batch(self)
    492         if len(self.yb) == 0: return
    493         mets = self._train_mets if self.training else self._valid_mets
--> 494         for met in mets: met.accumulate(self.learn)
    495         if not self.training: return
    496         self.lrs.append(self.opt.hypers[-1]['lr'])

E:\ProgramFiles\anaconda\lib\site-packages\fastai\learner.py in accumulate(self, learn)
    414     def accumulate(self, learn):
    415         bs = find_bs(learn.yb)
--> 416         self.total += learn.to_detach(self.func(learn.pred, *learn.yb))*bs
    417         self.count += bs
    418     @property

E:\ProgramFiles\anaconda\lib\site-packages\fastai\metrics.py in error_rate(inp, targ, axis)
    103 def error_rate(inp, targ, axis=-1):
    104     "1 - `accuracy`"
--> 105     return 1 - accuracy(inp, targ, axis=axis)
    106 
    107 # Cell

E:\ProgramFiles\anaconda\lib\site-packages\fastai\metrics.py in accuracy(inp, targ, axis)
     97 def accuracy(inp, targ, axis=-1):
     98     "Compute accuracy with `targ` when `pred` is bs * n_classes"
---> 99     pred,targ = flatten_check(inp.argmax(dim=axis), targ)
    100     return (pred == targ).float().mean()
    101 

E:\ProgramFiles\anaconda\lib\site-packages\fastai\torch_core.py in flatten_check(inp, targ)
    799     "Check that `out` and `targ` have the same number of elements and flatten them."
    800     inp,targ = TensorBase(inp.contiguous()).view(-1),TensorBase(targ.contiguous()).view(-1)
--> 801     test_eq(len(inp), len(targ))
    802     return inp,targ

E:\ProgramFiles\anaconda\lib\site-packages\fastcore\test.py in test_eq(a, b)
     33 def test_eq(a,b):
     34     "`test` that `a==b`"
---> 35     test(a,b,equals, '==')
     36 
     37 # Cell

E:\ProgramFiles\anaconda\lib\site-packages\fastcore\test.py in test(a, b, cmp, cname)
     23     "`assert` that `cmp(a,b)`; display inputs and `cname or cmp.__name__` if it fails"
     24     if cname is None: cname=cmp.__name__
---> 25     assert cmp(a,b),f"{cname}:\n{a}\n{b}"
     26 
     27 # Cell

AssertionError: ==:
384
2304

I don’t think it’s a coincidence that 384*(number of y_names) = 2304. Are metrics only useful with a single y_name? Or am I doing something else wrong?