Cadene models are not working with fastai v2

ilovescience · October 31, 2019, 11:47pm

I was trying to put together a quick port of the cadene models in fastai v1 to fastai v2.

I tried to generate a Learner object with a se_resnext50_32x4d model. The Learner object was created successfully. However, when running lr_find (or any fit command for that matter), it raises the following error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-73-4726d92214e5> in <module>
----> 1 lrf = learn.lr_find()

/opt/anaconda3/lib/python3.7/site-packages/fastai2/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot)
    178     n_epoch = num_it//len(self.dbunch.train_dl) + 1
    179     cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 180     with self.no_logging(): self.fit(n_epoch, cbs=cb)
    181     if show_plot: self.recorder.plot_lr_find()

/opt/anaconda3/lib/python3.7/site-packages/fastai2/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    250                     try:
    251                         self.epoch=epoch;          self('begin_epoch')
--> 252                         self._do_epoch_train()
    253                         self._do_epoch_validate()
    254                     except CancelEpochException:   self('after_cancel_epoch')

/opt/anaconda3/lib/python3.7/site-packages/fastai2/learner.py in _do_epoch_train(self)
    228         try:
    229             self.dl = self.dbunch.train_dl;                  self('begin_train')
--> 230             self.all_batches()
    231         except CancelTrainException:                         self('after_cancel_train')
    232         finally:                                             self('after_train')

/opt/anaconda3/lib/python3.7/site-packages/fastai2/learner.py in all_batches(self)
    206     def all_batches(self):
    207         self.n_iter = len(self.dl)
--> 208         for o in enumerate(self.dl): self.one_batch(*o)
    209 
    210     def one_batch(self, i, b):

/opt/anaconda3/lib/python3.7/site-packages/fastai2/learner.py in one_batch(self, i, b)
    217             if not self.training: return
    218             self.loss.backward();                            self('after_backward')
--> 219             self.opt.step();                                 self('after_step')
    220             self.opt.zero_grad()
    221         except CancelBatchException:                         self('after_cancel_batch')

/opt/anaconda3/lib/python3.7/site-packages/fastai2/optimizer.py in step(self)
     62 
     63     def step(self):
---> 64         for p,pg,state,hyper in self.all_params(with_grad=True):
     65             for stat in self.stats:    state = stat(state, p, **hyper)
     66             for step in self.steppers: step(p, **{**state, **hyper})

/opt/anaconda3/lib/python3.7/site-packages/fastai2/optimizer.py in all_params(self, n, with_grad)
     14     def all_params(self, n=slice(None), with_grad=False):
     15         res = L((p,pg,self.state[p],hyper) for pg,hyper in zip(self.param_groups[n],self.hypers[n]) for p in pg)
---> 16         return L(o for o in res if o[0].grad is not None) if with_grad else res
     17 
     18     def _set_require_grad(self, rg, p,pg,state,h): p.requires_grad_(rg or state.get('force_train', False))

/opt/anaconda3/lib/python3.7/site-packages/fastai2/core/foundation.py in __call__(cls, x, *args, **kwargs)
     40             return x
     41 
---> 42         res = super().__call__(*((x,) + args), **kwargs)
     43         res._newchk = 0
     44         return res

/opt/anaconda3/lib/python3.7/site-packages/fastai2/core/foundation.py in __init__(self, items, use_list, match, *rest)
    287         if items is None: items = []
    288         if (use_list is not None) or not _is_array(items):
--> 289             items = list(items) if use_list else _listify(items)
    290         if match is not None:
    291             if is_coll(match): match = len(match)

/opt/anaconda3/lib/python3.7/site-packages/fastai2/core/foundation.py in _listify(o)
    223     if isinstance(o, list): return o
    224     if isinstance(o, str) or _is_array(o): return [o]
--> 225     if is_iter(o): return list(o)
    226     return [o]
    227 

/opt/anaconda3/lib/python3.7/site-packages/fastai2/optimizer.py in <genexpr>(.0)
     14     def all_params(self, n=slice(None), with_grad=False):
     15         res = L((p,pg,self.state[p],hyper) for pg,hyper in zip(self.param_groups[n],self.hypers[n]) for p in pg)
---> 16         return L(o for o in res if o[0].grad is not None) if with_grad else res
     17 
     18     def _set_require_grad(self, rg, p,pg,state,h): p.requires_grad_(rg or state.get('force_train', False))

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in __getattr__(self, name)
    583                 return modules[name]
    584         raise AttributeError("'{}' object has no attribute '{}'".format(
--> 585             type(self).__name__, name))
    586 
    587     def __setattr__(self, name, value):

AttributeError: 'Sequential' object has no attribute 'grad'

It seems that somehow the optimizer is being passed the torch layers rather than the actual parameters (output from learn.opt.param_groups):

(#1) [(#2) [Sequential(
  (0): SEResNeXtBottleneck(
    (conv1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (se_module): SEModule(
      (avg_pool): AdaptiveAvgPool2d(output_size=1)
      (fc1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
      (relu): ReLU(inplace=True)
      (fc2): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
      (sigmoid): Sigmoid()
    )
    (downsample): Sequential(
      (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): SEResNeXtBottleneck(
    (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (se_module): SEModule(
      (avg_pool): AdaptiveAvgPool2d(output_size=1)
      (fc1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
      (relu): ReLU(inplace=True)
      (fc2): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
      (sigmoid): Sigmoid()
    )
  )
  (2): SEResNeXtBottleneck(
    (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (se_module): SEModule(
      (avg_pool): AdaptiveAvgPool2d(output_size=1)
      (fc1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
      (relu): ReLU(inplace=True)
      (fc2): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
      (sigmoid): Sigmoid()
    )
  )
  (3): SEResNeXtBottleneck(
    (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (se_module): SEModule(
      (avg_pool): AdaptiveAvgPool2d(output_size=1)
      (fc1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
      (relu): ReLU(inplace=True)
      (fc2): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
      (sigmoid): Sigmoid()
    )
  )
  (4): SEResNeXtBottleneck(
    (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (se_module): SEModule(
      (avg_pool): AdaptiveAvgPool2d(output_size=1)
      (fc1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
      (relu): ReLU(inplace=True)
      (fc2): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
      (sigmoid): Sigmoid()
    )
  )
  (5): SEResNeXtBottleneck(
    (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (se_module): SEModule(
      (avg_pool): AdaptiveAvgPool2d(output_size=1)
      (fc1): Conv2d(1024, 64, kernel_size=(1, 1), stride=(1, 1))
      (relu): ReLU(inplace=True)
      (fc2): Conv2d(64, 1024, kernel_size=(1, 1), stride=(1, 1))
      (sigmoid): Sigmoid()
    )
  )
),Sequential(
  (0): AdaptiveConcatPool2d(
    (ap): AdaptiveAvgPool2d(output_size=1)
    (mp): AdaptiveMaxPool2d(output_size=1)
  )
  (1): Flatten()
  (2): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.25, inplace=False)
  (4): Linear(in_features=4096, out_features=512, bias=True)
  (5): ReLU(inplace=True)
  (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.5, inplace=False)
  (8): Linear(in_features=512, out_features=6, bias=True)
)]]

This occured with other Cadene models as well.

I will post code later tonight. But I essentially ported the cadene model over to fastai v2, used create_cnn_model to create a model, and passed all the arguments to Learner.

sgugger · November 1, 2019, 12:59am

There is no reason they should work as nothing has been done for that yet. The models supported are the one in model_meta, so torchvision only for now, unless you pass custom cut and splitters.

ilovescience · November 1, 2019, 2:46am

I did paste the meta information for cuts and splits.

ilovescience · November 1, 2019, 2:53am

@sgugger here is a code snippet:

import importlib
def try_import(module):
    "Try to import `module`. Returns module's object on success, None on failure"
    try: return importlib.import_module(module)
    except: return None

pretrainedmodels = try_import('pretrainedmodels')

meta = {'cut': -2, 'split': lambda m: (m[0][3],     m[1])}
def get_model(model_name:str, pretrained:bool, seq:bool=True, pname:str='imagenet', **kwargs):
    pretrained = pname if pretrained else None
    model = getattr(pretrainedmodels, model_name)(pretrained=pretrained, **kwargs)
    return nn.Sequential(*model.children()) if seq else model

def se_resnext50_32x4d(pretrained:bool=False):  return get_model('se_resnext50_32x4d', pretrained)

 
def get_learner():
    dbch = get_data(256,128)
    model = create_cnn_model(se_resnext50_32x4d,get_c(dbch),cut=meta['cut'], pretrained=True)
    learn = Learner(dbch, model, splitter=meta['split'], loss_func=loss_func, opt_func=opt_func, metrics=metrics)
    #learn = cnn_learner(dbch,resnet152,loss_func=loss_func,opt_func=opt_func,metrics=metrics)
    return learn

learn = get_learner()
lrf = learn.lr_find()
print(learn.opt.param_groups)

Is there something I am doing wrong?

TomB · November 1, 2019, 8:00am

Looks like you’re not flattening the model hence param_groups contains modules not layers.
Not familiar with v2 yet, but in v1 you’d do something like:
return nn.Sequential(*flatten_model(model)) if seq else model
in get_model.

ilovescience · November 2, 2019, 1:19am

Unfortunately, this does not work. Also, I used the same return statement from the cnn_learner source code in v2.

kcturgutlu · November 30, 2019, 8:22pm

I can confirm that custom splitter when defined similar to the ones in vision.learner works fine now.

ilovescience · November 30, 2019, 9:07pm

So will the above code work fine now?

kcturgutlu · November 30, 2019, 9:15pm

I would suggest checking examples in vision.learner and see how split funcs are defined.

It’s something like: L(m[0][:3], m[0][3:], m[1:]).map(params) not m[0][:3], m[0][3:], m[1:]

ilovescience · November 30, 2019, 9:28pm

Thanks, I will look into it!