Dear @j.laute, thank you for your response. Sorry for my late reply, I was out of the country during the weekend
Here are the info you asked for:
state.keys()
odict_keys(['0.weight', '1.weight', '1.bias', '1.running_mean', '1.running_var', '1.num_batches_tracked', '4.0.conv1.weight', '4.0.bn1.weight', '4.0.bn1.bias', '4.0.bn1.running_mean', '4.0.bn1.running_var', '4.0.bn1.num_batches_tracked', '4.0.conv2.weight', '4.0.bn2.weight', '4.0.bn2.bias', '4.0.bn2.running_mean', '4.0.bn2.running_var', '4.0.bn2.num_batches_tracked', '4.1.conv1.weight', '4.1.bn1.weight', '4.1.bn1.bias', '4.1.bn1.running_mean', '4.1.bn1.running_var', '4.1.bn1.num_batches_tracked', '4.1.conv2.weight', '4.1.bn2.weight', '4.1.bn2.bias', '4.1.bn2.running_mean', '4.1.bn2.running_var', '4.1.bn2.num_batches_tracked', '4.2.conv1.weight', '4.2.bn1.weight', '4.2.bn1.bias', '4.2.bn1.running_mean', '4.2.bn1.running_var', '4.2.bn1.num_batches_tracked', '4.2.conv2.weight', '4.2.bn2.weight', '4.2.bn2.bias', '4.2.bn2.running_mean', '4.2.bn2.running_var', '4.2.bn2.num_batches_tracked', '5.0.conv1.weight', '5.0.bn1.weight', '5.0.bn1.bias', '5.0.bn1.running_mean', '5.0.bn1.running_var', '5.0.bn1.num_batches_tracked', '5.0.conv2.weight', '5.0.bn2.weight', '5.0.bn2.bias', '5.0.bn2.running_mean', '5.0.bn2.running_var', '5.0.bn2.num_batches_tracked', '5.0.downsample.0.weight', '5.0.downsample.1.weight', '5.0.downsample.1.bias', '5.0.downsample.1.running_mean', '5.0.downsample.1.running_var', '5.0.downsample.1.num_batches_tracked', '5.1.conv1.weight', '5.1.bn1.weight', '5.1.bn1.bias', '5.1.bn1.running_mean', '5.1.bn1.running_var', '5.1.bn1.num_batches_tracked', '5.1.conv2.weight', '5.1.bn2.weight', '5.1.bn2.bias', '5.1.bn2.running_mean', '5.1.bn2.running_var', '5.1.bn2.num_batches_tracked', '5.2.conv1.weight', '5.2.bn1.weight', '5.2.bn1.bias', '5.2.bn1.running_mean', '5.2.bn1.running_var', '5.2.bn1.num_batches_tracked', '5.2.conv2.weight', '5.2.bn2.weight', '5.2.bn2.bias', '5.2.bn2.running_mean', '5.2.bn2.running_var', '5.2.bn2.num_batches_tracked', '5.3.conv1.weight', '5.3.bn1.weight', '5.3.bn1.bias', '5.3.bn1.running_mean', '5.3.bn1.running_var', '5.3.bn1.num_batches_tracked', '5.3.conv2.weight', '5.3.bn2.weight', '5.3.bn2.bias', '5.3.bn2.running_mean', '5.3.bn2.running_var', '5.3.bn2.num_batches_tracked', '6.0.conv1.weight', '6.0.bn1.weight', '6.0.bn1.bias', '6.0.bn1.running_mean', '6.0.bn1.running_var', '6.0.bn1.num_batches_tracked', '6.0.conv2.weight', '6.0.bn2.weight', '6.0.bn2.bias', '6.0.bn2.running_mean', '6.0.bn2.running_var', '6.0.bn2.num_batches_tracked', '6.0.downsample.0.weight', '6.0.downsample.1.weight', '6.0.downsample.1.bias', '6.0.downsample.1.running_mean', '6.0.downsample.1.running_var', '6.0.downsample.1.num_batches_tracked', '6.1.conv1.weight', '6.1.bn1.weight', '6.1.bn1.bias', '6.1.bn1.running_mean', '6.1.bn1.running_var', '6.1.bn1.num_batches_tracked', '6.1.conv2.weight', '6.1.bn2.weight', '6.1.bn2.bias', '6.1.bn2.running_mean', '6.1.bn2.running_var', '6.1.bn2.num_batches_tracked', '6.2.conv1.weight', '6.2.bn1.weight', '6.2.bn1.bias', '6.2.bn1.running_mean', '6.2.bn1.running_var', '6.2.bn1.num_batches_tracked', '6.2.conv2.weight', '6.2.bn2.weight', '6.2.bn2.bias', '6.2.bn2.running_mean', '6.2.bn2.running_var', '6.2.bn2.num_batches_tracked', '6.3.conv1.weight', '6.3.bn1.weight', '6.3.bn1.bias', '6.3.bn1.running_mean', '6.3.bn1.running_var', '6.3.bn1.num_batches_tracked', '6.3.conv2.weight', '6.3.bn2.weight', '6.3.bn2.bias', '6.3.bn2.running_mean', '6.3.bn2.running_var', '6.3.bn2.num_batches_tracked', '6.4.conv1.weight', '6.4.bn1.weight', '6.4.bn1.bias', '6.4.bn1.running_mean', '6.4.bn1.running_var', '6.4.bn1.num_batches_tracked', '6.4.conv2.weight', '6.4.bn2.weight', '6.4.bn2.bias', '6.4.bn2.running_mean', '6.4.bn2.running_var', '6.4.bn2.num_batches_tracked', '6.5.conv1.weight', '6.5.bn1.weight', '6.5.bn1.bias', '6.5.bn1.running_mean', '6.5.bn1.running_var', '6.5.bn1.num_batches_tracked', '6.5.conv2.weight', '6.5.bn2.weight', '6.5.bn2.bias', '6.5.bn2.running_mean', '6.5.bn2.running_var', '6.5.bn2.num_batches_tracked', '7.0.conv1.weight', '7.0.bn1.weight', '7.0.bn1.bias', '7.0.bn1.running_mean', '7.0.bn1.running_var', '7.0.bn1.num_batches_tracked', '7.0.conv2.weight', '7.0.bn2.weight', '7.0.bn2.bias', '7.0.bn2.running_mean', '7.0.bn2.running_var', '7.0.bn2.num_batches_tracked', '7.0.downsample.0.weight', '7.0.downsample.1.weight', '7.0.downsample.1.bias', '7.0.downsample.1.running_mean', '7.0.downsample.1.running_var', '7.0.downsample.1.num_batches_tracked', '7.1.conv1.weight', '7.1.bn1.weight', '7.1.bn1.bias', '7.1.bn1.running_mean', '7.1.bn1.running_var', '7.1.bn1.num_batches_tracked', '7.1.conv2.weight', '7.1.bn2.weight', '7.1.bn2.bias', '7.1.bn2.running_mean', '7.1.bn2.running_var', '7.1.bn2.num_batches_tracked', '7.2.conv1.weight', '7.2.bn1.weight', '7.2.bn1.bias', '7.2.bn1.running_mean', '7.2.bn1.running_var', '7.2.bn1.num_batches_tracked', '7.2.conv2.weight', '7.2.bn2.weight', '7.2.bn2.bias', '7.2.bn2.running_mean', '7.2.bn2.running_var', '7.2.bn2.num_batches_tracked', '10.weight', '10.bias', '10.running_mean', '10.running_var', '10.num_batches_tracked', '12.weight', '12.bias', '14.weight', '14.bias', '14.running_mean', '14.running_var', '14.num_batches_tracked', '16.weight', '16.bias'])
model.state_dict().keys()
odict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.bn1.num_batches_tracked', 'layer1.1.conv2.weight', 'layer1.1.bn2.weight', 'layer1.1.bn2.bias', 'layer1.1.bn2.running_mean', 'layer1.1.bn2.running_var', 'layer1.1.bn2.num_batches_tracked', 'layer1.2.conv1.weight', 'layer1.2.bn1.weight', 'layer1.2.bn1.bias', 'layer1.2.bn1.running_mean', 'layer1.2.bn1.running_var', 'layer1.2.bn1.num_batches_tracked', 'layer1.2.conv2.weight', 'layer1.2.bn2.weight', 'layer1.2.bn2.bias', 'layer1.2.bn2.running_mean', 'layer1.2.bn2.running_var', 'layer1.2.bn2.num_batches_tracked', 'layer2.0.conv1.weight', 'layer2.0.bn1.weight', 'layer2.0.bn1.bias', 'layer2.0.bn1.running_mean', 'layer2.0.bn1.running_var', 'layer2.0.bn1.num_batches_tracked', 'layer2.0.conv2.weight', 'layer2.0.bn2.weight', 'layer2.0.bn2.bias', 'layer2.0.bn2.running_mean', 'layer2.0.bn2.running_var', 'layer2.0.bn2.num_batches_tracked', 'layer2.0.downsample.0.weight', 'layer2.0.downsample.1.weight', 'layer2.0.downsample.1.bias', 'layer2.0.downsample.1.running_mean', 'layer2.0.downsample.1.running_var', 'layer2.0.downsample.1.num_batches_tracked', 'layer2.1.conv1.weight', 'layer2.1.bn1.weight', 'layer2.1.bn1.bias', 'layer2.1.bn1.running_mean', 'layer2.1.bn1.running_var', 'layer2.1.bn1.num_batches_tracked', 'layer2.1.conv2.weight', 'layer2.1.bn2.weight', 'layer2.1.bn2.bias', 'layer2.1.bn2.running_mean', 'layer2.1.bn2.running_var', 'layer2.1.bn2.num_batches_tracked', 'layer2.2.conv1.weight', 'layer2.2.bn1.weight', 'layer2.2.bn1.bias', 'layer2.2.bn1.running_mean', 'layer2.2.bn1.running_var', 'layer2.2.bn1.num_batches_tracked', 'layer2.2.conv2.weight', 'layer2.2.bn2.weight', 'layer2.2.bn2.bias', 'layer2.2.bn2.running_mean', 'layer2.2.bn2.running_var', 'layer2.2.bn2.num_batches_tracked', 'layer2.3.conv1.weight', 'layer2.3.bn1.weight', 'layer2.3.bn1.bias', 'layer2.3.bn1.running_mean', 'layer2.3.bn1.running_var', 'layer2.3.bn1.num_batches_tracked', 'layer2.3.conv2.weight', 'layer2.3.bn2.weight', 'layer2.3.bn2.bias', 'layer2.3.bn2.running_mean', 'layer2.3.bn2.running_var', 'layer2.3.bn2.num_batches_tracked', 'layer3.0.conv1.weight', 'layer3.0.bn1.weight', 'layer3.0.bn1.bias', 'layer3.0.bn1.running_mean', 'layer3.0.bn1.running_var', 'layer3.0.bn1.num_batches_tracked', 'layer3.0.conv2.weight', 'layer3.0.bn2.weight', 'layer3.0.bn2.bias', 'layer3.0.bn2.running_mean', 'layer3.0.bn2.running_var', 'layer3.0.bn2.num_batches_tracked', 'layer3.0.downsample.0.weight', 'layer3.0.downsample.1.weight', 'layer3.0.downsample.1.bias', 'layer3.0.downsample.1.running_mean', 'layer3.0.downsample.1.running_var', 'layer3.0.downsample.1.num_batches_tracked', 'layer3.1.conv1.weight', 'layer3.1.bn1.weight', 'layer3.1.bn1.bias', 'layer3.1.bn1.running_mean', 'layer3.1.bn1.running_var', 'layer3.1.bn1.num_batches_tracked', 'layer3.1.conv2.weight', 'layer3.1.bn2.weight', 'layer3.1.bn2.bias', 'layer3.1.bn2.running_mean', 'layer3.1.bn2.running_var', 'layer3.1.bn2.num_batches_tracked', 'layer3.2.conv1.weight', 'layer3.2.bn1.weight', 'layer3.2.bn1.bias', 'layer3.2.bn1.running_mean', 'layer3.2.bn1.running_var', 'layer3.2.bn1.num_batches_tracked', 'layer3.2.conv2.weight', 'layer3.2.bn2.weight', 'layer3.2.bn2.bias', 'layer3.2.bn2.running_mean', 'layer3.2.bn2.running_var', 'layer3.2.bn2.num_batches_tracked', 'layer3.3.conv1.weight', 'layer3.3.bn1.weight', 'layer3.3.bn1.bias', 'layer3.3.bn1.running_mean', 'layer3.3.bn1.running_var', 'layer3.3.bn1.num_batches_tracked', 'layer3.3.conv2.weight', 'layer3.3.bn2.weight', 'layer3.3.bn2.bias', 'layer3.3.bn2.running_mean', 'layer3.3.bn2.running_var', 'layer3.3.bn2.num_batches_tracked', 'layer3.4.conv1.weight', 'layer3.4.bn1.weight', 'layer3.4.bn1.bias', 'layer3.4.bn1.running_mean', 'layer3.4.bn1.running_var', 'layer3.4.bn1.num_batches_tracked', 'layer3.4.conv2.weight', 'layer3.4.bn2.weight', 'layer3.4.bn2.bias', 'layer3.4.bn2.running_mean', 'layer3.4.bn2.running_var', 'layer3.4.bn2.num_batches_tracked', 'layer3.5.conv1.weight', 'layer3.5.bn1.weight', 'layer3.5.bn1.bias', 'layer3.5.bn1.running_mean', 'layer3.5.bn1.running_var', 'layer3.5.bn1.num_batches_tracked', 'layer3.5.conv2.weight', 'layer3.5.bn2.weight', 'layer3.5.bn2.bias', 'layer3.5.bn2.running_mean', 'layer3.5.bn2.running_var', 'layer3.5.bn2.num_batches_tracked', 'layer4.0.conv1.weight', 'layer4.0.bn1.weight', 'layer4.0.bn1.bias', 'layer4.0.bn1.running_mean', 'layer4.0.bn1.running_var', 'layer4.0.bn1.num_batches_tracked', 'layer4.0.conv2.weight', 'layer4.0.bn2.weight', 'layer4.0.bn2.bias', 'layer4.0.bn2.running_mean', 'layer4.0.bn2.running_var', 'layer4.0.bn2.num_batches_tracked', 'layer4.0.downsample.0.weight', 'layer4.0.downsample.1.weight', 'layer4.0.downsample.1.bias', 'layer4.0.downsample.1.running_mean', 'layer4.0.downsample.1.running_var', 'layer4.0.downsample.1.num_batches_tracked', 'layer4.1.conv1.weight', 'layer4.1.bn1.weight', 'layer4.1.bn1.bias', 'layer4.1.bn1.running_mean', 'layer4.1.bn1.running_var', 'layer4.1.bn1.num_batches_tracked', 'layer4.1.conv2.weight', 'layer4.1.bn2.weight', 'layer4.1.bn2.bias', 'layer4.1.bn2.running_mean', 'layer4.1.bn2.running_var', 'layer4.1.bn2.num_batches_tracked', 'layer4.2.conv1.weight', 'layer4.2.bn1.weight', 'layer4.2.bn1.bias', 'layer4.2.bn1.running_mean', 'layer4.2.bn1.running_var', 'layer4.2.bn1.num_batches_tracked', 'layer4.2.conv2.weight', 'layer4.2.bn2.weight', 'layer4.2.bn2.bias', 'layer4.2.bn2.running_mean', 'layer4.2.bn2.running_var', 'layer4.2.bn2.num_batches_tracked', 'fc.weight', 'fc.bias'])
I can see that both are not same. Can you please tell me what I can do?
I set the precompute to false before fitting the model, but not right before saving. Does that make any difference? Anyways I will try that and see in the meanwhile.
Thank you for your time