Turn off eval mode for JIT exported model

austinmw · May 14, 2019, 10:18pm

I’ve trained a Fastai model and saved/loaded it as a PyTorch JIT model. I’d like to experiment with Monte Carlo Dropout, so I’m curious about how to turn off eval mode so that my prediction outputs are non-deterministic.

I’m using the resnet18 architecture which has dropout layers, but when I do this:
model = model.train()

The predictions I get from calling my model like this are still deterministic:
predict_values = model(img_tensor)

Can anyone spot what I’m missing?

Edit: Found my answer. “modes will always behave as if it is in the mode it was in during tracing, no matter which mode the ScriptModule is in.”

zlapp · May 28, 2019, 6:03pm

Could you share your code for training and prediction using Mont Carlo Dropout with fastai library?

austinmw · June 5, 2019, 3:10am

Sure. Not sure how correct this is…

# assuming learn = a trained cnn

def apply_dropout(m):
    if type(m) == nn.Dropout:
        m.train()

learn.model.eval()
learn.model.apply(apply_dropout)

trace_input = torch.ones(1,3,224,224).cuda()
jit_model = torch.jit.trace(learn.model, trace_input)

output_path = str(learn.path/f'model_jit_mcdo.pth')
torch.jit.save(jit_model, output_path)

with open(f'./classes.txt', 'r') as f:
    classes = f.read().splitlines()

if torch.cuda.is_available():
    print("Running model on GPU")
    model = torch.jit.load(output_path, map_location=torch.device('cuda'))
else:
    print("Running model on CPU")
    model = torch.jit.load(output_path, map_location=torch.device('cpu'))

# package image for serving
from glob import glob
img_paths = glob('/path/to/jpgs/*.jpg')
test_img_path = np.random.choice(img_paths)
img_pil = PIL.Image.open(test_img_path)
buffer = BytesIO()
img_pil.save(buffer, 'PNG', quality=75)
img_bytes = buffer.getvalue()

# server gets img_bytes and opens as pil image
img_pil_server = PIL.Image.open(io.BytesIO(img_bytes))

# define mcdo potential transforms list
mcdo_tfms = [transforms.RandomHorizontalFlip(p=0.5),
             transforms.RandomRotation(15),
             transforms.ColorJitter(brightness=0.05),
             transforms.ColorJitter(contrast=0.05),
             transforms.ColorJitter(saturation=0.05),
             transforms.ColorJitter(hue=0.05)]

# processing pipeline
preprocess = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomChoice(mcdo_tfms),
    transforms.ToTensor(),
    transforms.Normalize( # ImageNet stats
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225])
])
def preprocess_img(img_pil_server):
    img_tensor = preprocess(img_pil_server)
    img_tensor = img_tensor.unsqueeze(0)
    if torch.cuda.is_available():
        return img_tensor.to(torch.device('cuda'))
    return img_tensor

# number of samples to take
N = 100
prob_lists = []
for i in range(N):
    img_tensor = preprocess_img(img_pil_server)
    predict_values = model(img_tensor).tolist()[0]
    prob_values = [1 / (1 + math.exp(-v)) for v in predict_values]
    prob_lists.append(prob_values)

# convert probability lists to array
results = np.array(prob_lists)

# calculate MCDO probabilities
mcdo_averages = np.mean(results, axis=0)
mcdo_prob_dict = {k:round(v,4) for k,v in zip(classes,mcdo_averages)}
print(f'\nMCDO probabilities are {mcdo_prob_dict}')

# calculate std
per_class_std_population = np.std(results,axis=0, ddof=0)
per_class_std_sample = np.std(results,axis=0, ddof=1)
per_class_std_sample_dict = {k:round(v,4) for k,v in zip(classes,per_class_std_sample)}
print(f'\nUnbiased standard deviations: {per_class_std_sample_dict}')
# average the variances
average_std = round(np.sqrt(np.mean(np.multiply(per_class_std_sample,per_class_std_sample))),4)
print(f'Average standard deviation: {average_std}')

per_class_standard_errors = [std/np.sqrt(N) for std in per_class_std_sample]
per_class_standard_errors_dict = {k:round(v,4) for k,v in zip(classes,per_class_standard_errors)}
print(f'\nSE of the means: {per_class_standard_errors_dict}')