Updating Resnet to ResNeSt with Fastai

GrahamAcademy · April 25, 2020, 2:41pm

I’m trying to use the new ResNeSt architecture in Fastai and I am getting an error I don’t quite understand. BTW here is the new and improved Resnet if you want to take a look:

It looks super promising. The error when I try to train is “TypeError: conv2d(): argument ‘input’ (position 1) must be Tensor, not bool”. Here is the full traceback:

TypeError Traceback (most recent call last)
in
20 freeze = True
21 unfreeze = False
—> 22 learn, data = fit(bs, size, findLR, lr, epoch, save, partial, load, unfreeze, freeze, justLoad, cb)
23
24 # load = ‘magicDetail_02-nguyenv7-NoGan_B__5-457807’

in fit(bs, size, findLR, lr, epoch, save, partial, load, unfreeze, freeze, justLoad, cb)
55 base_loss = F.l1_loss
56
—> 57 body = create_body(arch, pretrained=True).cuda().eval()
58 requires_grad(body, False)
59

~\Anaconda3\envs\Fastai_02\lib\site-packages\fastai\vision\learner.py in create_body(arch, pretrained, cut)
54 def create_body(arch:Callable, pretrained:bool=True, cut:Optional[Union[int, Callable]]=None):
55 “Cut off the body of a typically pretrained model at cut (int) or cut the model as specified by cut(model) (function).”
—> 56 model = arch(pretrained)
57 cut = ifnone(cut, cnn_config(arch)[‘cut’])
58 if cut is None:

~\Anaconda3\envs\Fastai_02\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
–> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)

~/.cache\torch\hub\zhanghang1989_ResNeSt_master\resnest\torch\resnet.py in forward(self, x)
286
287 def forward(self, x):
–> 288 x = self.conv1(x)
289 x = self.bn1(x)
290 x = self.relu(x)

~\Anaconda3\envs\Fastai_02\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
–> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)

~\Anaconda3\envs\Fastai_02\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
98 def forward(self, input):
99 for module in self:
–> 100 input = module(input)
101 return input
102

~\Anaconda3\envs\Fastai_02\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
–> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)

~\Anaconda3\envs\Fastai_02\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
343
344 def forward(self, input):
–> 345 return self.conv2d_forward(input, self.weight)
346
347 class Conv3d(_ConvNd):

~\Anaconda3\envs\Fastai_02\lib\site-packages\torch\nn\modules\conv.py in conv2d_forward(self, input, weight)
340 _pair(0), self.dilation, self.groups)
341 return F.conv2d(input, weight, self.bias, self.stride,
–> 342 self.padding, self.dilation, self.groups)
343
344 def forward(self, input):

> TypeError: conv2d(): argument ‘input’ (position 1) must be Tensor, not bool

muellerzr · April 25, 2020, 2:52pm

How are you declaring the model and building the Learner? (Also we cant do much but speculate as we don’t know how you did anything)

GrahamAcademy · April 25, 2020, 3:09pm

Yes I am loading the model according to their docs. Here is the code and you can see the top line that I changed from

arch = models.resnet50

to

arch = torch.hub.load(‘zhanghang1989/ResNeSt’, ‘resnest50’, pretrained=True)

#arch = models.resnet50
arch = torch.hub.load(‘zhanghang1989/ResNeSt’, ‘resnest50’, pretrained=True)
src = ImageImageList.from_folder(path_56).use_partial_data(sample_pct = partial, seed= 34).split_by_rand_pct(0.1, seed = 42)

data = src.label_from_func(lambda x: hrpath/x.name)\
.transform(get_transforms( do_flip = True,flip_vert = True), tfm_y = True, size= size)\
.databunch(bs = bs).normalize(imagenet_stats, do_y = True)

data.c = 3
data.valid_dl = data.valid_dl.new(shuffle=True)
t = data.valid_ds[0][1].data
t1 = torch.stack([t,t])

t.shape
t1.shape

# Define the gram matrix equation : x*x'
def gram_mat(x:torch.Tensor) -> torch.Tensor: 
    n, c, h, w =  x.size()

    # resize
    x = x.view(n,c,-1)

    return x@x.transpose(1,2)/(c*h*w)

gram_mat(t1)

base_loss = F.l1_loss

body = create_body(arch, pretrained=True).cuda().eval()
requires_grad(body, False)

m_sizes = model_sizes(body, (224,224))
m_sizes

list_idx = [i for i,x in enumerate(m_sizes) if i<len(m_sizes)-1 and not(x == m_sizes[i+1])]
list_idx.append(len(m_sizes)-1)


class FeatureLoss(nn.Module):
    def __init__(self, feat_body, layer_idx, layer_weight):
        super().__init__()
        self.feat_body = feat_body
        self.loss_features = [self.feat_body[i] for i in layer_idx]
        self.hooks = hook_outputs(self.loss_features, detach=False)
        self.weight = layer_weight

        self.metric_names = ['pixel_L1'] +\
        [f'feature_L1_{i}' for i in layer_idx] + [f'feature_L1_Gram_{i}' for i in layer_idx]
        print(self.metric_names)

    def get_features(self, x, clone=False):
        self.feat_body(x)
        return [o.clone() if clone else o for o in self.hooks.stored]

    def forward(self, x_input, target):
        out_feat = self.get_features(target, clone=True)
        in_feat = self.get_features(x_input)

        self.f_losses = [base_loss(x_input, target)]

        self.f_losses += [base_loss(f_in, f_out)*w \
                          for f_in,f_out,w in zip(in_feat, out_feat,self.weight)]

        self.f_losses += [base_loss(gram_mat(f_in), gram_mat(f_out))*(w*1e3) \
                          for f_in,f_out,w in zip(in_feat, out_feat,self.weight)] # more weight for this

        self.metrics = dict(zip(self.metric_names, self.f_losses))

        return sum(self.f_losses)



feat_loss = FeatureLoss(body, list_idx, [15,2,5,5,0.3])
t1_c = t1.cuda()
feat_loss(t1_c, t1_c)
wd = 1e-3
learn = unet_learner(data, arch, wd = wd, loss_func= feat_loss,\
                     callback_fns=LossMetrics, blur=True, norm_type=NormType.Weight).to_fp16()

muellerzr · April 25, 2020, 3:13pm

The issue is you’re calling unet_learner. Unet_learner is only for base resnets, as fastai does many special things with it. I’d follow the unet_learner source code to see how it’s being built and make a custom Learner instead.

GrahamAcademy · April 25, 2020, 3:18pm

Thanks so much! There is still so much I need to learn, thanks.

GrahamAcademy · June 17, 2020, 5:23pm

Has anyone made progress modifying unet_learner to use resnest?

Jo_learner · June 20, 2020, 8:11am

Hi, I just get the same error with you. But I use the cnn_learn. Did you solved the problem?

florianl · June 24, 2020, 9:08pm

As Zach said, you can’t use can_learner or unet_learner to initialize your learner. Here’s what I did for a fastai2 classification model (I guess the steps should be similar for fastai but you might have to make some adjustments). Maybe not the cleanest code but it works .

You have to adjust a couple of things to use a “custom” pretrained model to work with fastai.

Load the pretrained model
Get the layer to cut off the head
Create body
Create a new head with the right number of outputs / classes
Create Model
Create Learner (with splitter function)

# load pretrained resnest-model
resnest = torch.hub.load('zhanghang1989/ResNeSt', 'resnest50', pretrained=True)

# get cut-layer
ll = list(enumerate(resnest.children()))
cut = next(i for i,o in reversed(ll) if has_pool_type(o))

# create body
body = nn.Sequential(*list(resnest.children())[:cut])

#get number of classes / outputs from dataloaders (dls)
n_out = dls.c

# create body
nf = num_features_model(nn.Sequential(*body.children())) * (2 if concat_pool else 1)
head = create_head(nf, n_out, concat_pool=True)

# create model
model = nn.Sequential(body, head)

# init new head
apply_init(model[1], nn.init.kaiming_normal_)

# define splitter for discriminative learning 
def  _resnet_split(m): return L(m[0][:6], m[0][6:], m[1:]).map(params)

# create learner
learn = Learner(dls, model, splitter=_resnet_split, metrics=accuracy)

Florian

nchukaobah · February 8, 2021, 3:56am

Thanks. This is very interesting. When you do this, how do you specify layer groups for doing fit_one_cycle? is that defined in fastai automatically or specified by the user?

nchukaobah · February 8, 2021, 5:50am

florianl:

# load pretrained resnest-model
resnest = torch.hub.load('zhanghang1989/ResNeSt', 'resnest50', pretrained=True)

# get cut-layer
ll = list(enumerate(resnest.children()))
cut = next(i for i,o in reversed(ll) if has_pool_type(o))

# create body
body = nn.Sequential(*list(resnest.children())[:cut])

#get number of classes / outputs from dataloaders (dls)
n_out = dls.c

# create body
nf = num_features_model(nn.Sequential(*body.children())) * (2 if concat_pool else 1)
head = create_head(nf, n_out, concat_pool=True)

# create model
model = nn.Sequential(body, head)

# init new head
apply_init(model[1], nn.init.kaiming_normal_)

# define splitter for discriminative learning 
def  _resnet_split(m): return L(m[0][:6], m[0][6:], m[1:]).map(params)

# create learner
learn = Learner(dls, model, splitter=_resnet_split, metrics=accuracy)

Found it in the docs sample for fastai. Thanks