Reuse some layers for multi label classification

Hi,

I am using the text_classifier_learner function to train a multilabel classifier. My labels have some kind of hierarchy i.e. they have the form “A_i#B_j” with A_i and B_j belonging to two different lists. My idea was to train a classifier first on the first part of the labels (i.e. the “parents”) before reusing this classifier to predict the whole labels (i.e. A#B = parent#enfant). Thus, I’d like to reuse all the layers of my first classifier but the last one in order to take into account the fact that the second classifier has more classes than the first ones. I did this that didn’t give me any error:

multilabel_classifier_1 = text_classifier_learner(data_clas_1, drop_mult=1.5)
# ... training ....
weights = multilabel_classifier_1.model.state_dict()
weights = dict((k,weights[k]) for k in weights.keys() if k not in ['1.layers.6.weight', '1.layers.6.bias'])
# Take all the layers but the last linear one

multilabel_classifier_2 = text_classifier_learner(data_clas_2, drop_mult=1)
weights_2 = multilabel_classifier_2.model.state_dict()
weights_2 = dict((k,weights_2[k]) for k in weights_2.keys() if k in ['1.layers.6.weight', '1.layers.6.bias'])
weights.update(weights_2)
multilabel_classifier_2.model_2.load_state_dict(weights)

However, when I try to train multilabel_classifier_2, I get this error:

RuntimeError Traceback (most recent call last)
in ()
----> 1 multilabel_classifier.fit_one_cycle(1, 1e-2)

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, wd, callbacks, **kwargs)
19 callbacks.append(OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor,
20 pct_start=pct_start, **kwargs))
—> 21 learn.fit(cyc_len, max_lr, wd=wd, callbacks=callbacks)
22
23 def lr_find(learn:Learner, start_lr:Floats=1e-7, end_lr:Floats=10, num_it:int=100, stop_div:bool=True, **kwargs:Any):

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
164 callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
165 fit(epochs, self.model, self.loss_func, opt=self.opt, data=self.data, metrics=self.metrics,
–> 166 callbacks=self.callbacks+callbacks)
167
168 def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
92 except Exception as e:
93 exception = e
—> 94 raise e
95 finally: cb_handler.on_train_end(exception)
96

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
87 if hasattr(data,‘valid_dl’) and data.valid_dl is not None and len(data.valid_ds.items) > 0:
88 val_loss = validate(model, data.valid_dl, loss_func=loss_func,
—> 89 cb_handler=cb_handler, pbar=pbar)
90 else: val_loss=None
91 if cb_handler.on_epoch_end(val_loss): break

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/basic_train.py in validate(model, dl, loss_func, cb_handler, pbar, average, n_batch)
52 if not is_listy(yb): yb = [yb]
53 nums.append(yb[0].shape[0])
—> 54 if cb_handler and cb_handler.on_batch_end(val_losses[-1]): break
55 if n_batch and (len(nums)>=n_batch): break
56 nums = np.array(nums, dtype=np.float32)

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/callback.py in on_batch_end(self, loss)
237 “Handle end of processing one batch with loss.”
238 self.state_dict[‘last_loss’] = loss
–> 239 stop = np.any(self(‘batch_end’, not self.state_dict[‘train’]))
240 if self.state_dict[‘train’]:
241 self.state_dict[‘iteration’] += 1

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/callback.py in call(self, cb_name, call_mets, **kwargs)
185 def call(self, cb_name, call_mets=True, **kwargs)->None:
186 “Call through to all of the CallbakHandler functions.”
–> 187 if call_mets: [getattr(met, f’on_{cb_name}’)(**self.state_dict, **kwargs) for met in self.metrics]
188 return [getattr(cb, f’on_{cb_name}’)(**self.state_dict, **kwargs) for cb in self.callbacks]
189

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/callback.py in (.0)
185 def call(self, cb_name, call_mets=True, **kwargs)->None:
186 “Call through to all of the CallbakHandler functions.”
–> 187 if call_mets: [getattr(met, f’on_{cb_name}’)(**self.state_dict, **kwargs) for met in self.metrics]
188 return [getattr(cb, f’on_{cb_name}’)(**self.state_dict, **kwargs) for cb in self.callbacks]
189

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/callback.py in on_batch_end(self, last_output, last_target, **kwargs)
272 if not is_listy(last_target): last_target=[last_target]
273 self.count += last_target[0].size(0)
–> 274 self.val += last_target[0].size(0) * self.func(last_output, *last_target).detach().cpu()
275
276 def on_epoch_end(self, **kwargs):

~/anaconda3/envs/fastai/lib/python3.7/site-packages/fastai/metrics.py in accuracy(input, targs)
37 input = input.argmax(dim=-1).view(n,-1)
38 targs = targs.view(n,-1)
—> 39 return (input==targs).float().mean()
40
41 def error_rate(input:Tensor, targs:Tensor)->Rank0Tensor:

RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 ‘other’

My two classifiers have the same structure except the output size of the last layer:

SequentialRNN(
  (0): MultiBatchRNNCore(
    (encoder): Embedding(17229, 400, padding_idx=1)
    (encoder_dp): EmbeddingDropout(
      (emb): Embedding(17229, 400, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): WeightDropout(
        (module): LSTM(400, 1150)
      )
      (1): WeightDropout(
        (module): LSTM(1150, 1150)
      )
      (2): WeightDropout(
        (module): LSTM(1150, 400)
      )
    )
    (input_dp): RNNDropout()
    (hidden_dps): ModuleList(
      (0): RNNDropout()
      (1): RNNDropout()
      (2): RNNDropout()
    )
  )
  (1): PoolingLinearClassifier(
    (layers): Sequential(
      (0): BatchNorm1d(1200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Dropout(p=0.4)
      (2): Linear(in_features=1200, out_features=50, bias=True)
      (3): ReLU(inplace)
      (4): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Dropout(p=0.1)
      (6): Linear(in_features=50, out_features=**13 ""or"" 7**, bias=True)
    )
  )
)

I work with PyTorch v1 and fastai v1. Has anyone got a similar error before? I found this but it doesn’t seem to apply to my problem.

Thanks :slight_smile:

Sorry, I just forgot to put:

multilabel_classifier_2.metrics = [accuracy_thresh] 

on the second classifier… It had nothing to do with reusing the layers.