Turn off eval mode for JIT exported model

I’ve trained a Fastai model and saved/loaded it as a PyTorch JIT model. I’d like to experiment with Monte Carlo Dropout, so I’m curious about how to turn off eval mode so that my prediction outputs are non-deterministic.

I’m using the resnet18 architecture which has dropout layers, but when I do this:
model = model.train()

The predictions I get from calling my model like this are still deterministic:
predict_values = model(img_tensor)

Can anyone spot what I’m missing?

Edit: Found my answer. “modes will always behave as if it is in the mode it was in during tracing, no matter which mode the ScriptModule is in.”

Could you share your code for training and prediction using Mont Carlo Dropout with fastai library?

Sure. Not sure how correct this is…

# assuming learn = a trained cnn

def apply_dropout(m):
    if type(m) == nn.Dropout:
        m.train()

learn.model.eval()
learn.model.apply(apply_dropout)

trace_input = torch.ones(1,3,224,224).cuda()
jit_model = torch.jit.trace(learn.model, trace_input)

output_path = str(learn.path/f'model_jit_mcdo.pth')
torch.jit.save(jit_model, output_path)

with open(f'./classes.txt', 'r') as f:
    classes = f.read().splitlines()

if torch.cuda.is_available():
    print("Running model on GPU")
    model = torch.jit.load(output_path, map_location=torch.device('cuda'))
else:
    print("Running model on CPU")
    model = torch.jit.load(output_path, map_location=torch.device('cpu'))

# package image for serving
from glob import glob
img_paths = glob('/path/to/jpgs/*.jpg')
test_img_path = np.random.choice(img_paths)
img_pil = PIL.Image.open(test_img_path)
buffer = BytesIO()
img_pil.save(buffer, 'PNG', quality=75)
img_bytes = buffer.getvalue()

# server gets img_bytes and opens as pil image
img_pil_server = PIL.Image.open(io.BytesIO(img_bytes))

# define mcdo potential transforms list
mcdo_tfms = [transforms.RandomHorizontalFlip(p=0.5),
             transforms.RandomRotation(15),
             transforms.ColorJitter(brightness=0.05),
             transforms.ColorJitter(contrast=0.05),
             transforms.ColorJitter(saturation=0.05),
             transforms.ColorJitter(hue=0.05)]

# processing pipeline
preprocess = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomChoice(mcdo_tfms),
    transforms.ToTensor(),
    transforms.Normalize( # ImageNet stats
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225])
])
def preprocess_img(img_pil_server):
    img_tensor = preprocess(img_pil_server)
    img_tensor = img_tensor.unsqueeze(0)
    if torch.cuda.is_available():
        return img_tensor.to(torch.device('cuda'))
    return img_tensor

# number of samples to take
N = 100
prob_lists = []
for i in range(N):
    img_tensor = preprocess_img(img_pil_server)
    predict_values = model(img_tensor).tolist()[0]
    prob_values = [1 / (1 + math.exp(-v)) for v in predict_values]
    prob_lists.append(prob_values)

# convert probability lists to array
results = np.array(prob_lists)

# calculate MCDO probabilities
mcdo_averages = np.mean(results, axis=0)
mcdo_prob_dict = {k:round(v,4) for k,v in zip(classes,mcdo_averages)}
print(f'\nMCDO probabilities are {mcdo_prob_dict}')

# calculate std
per_class_std_population = np.std(results,axis=0, ddof=0)
per_class_std_sample = np.std(results,axis=0, ddof=1)
per_class_std_sample_dict = {k:round(v,4) for k,v in zip(classes,per_class_std_sample)}
print(f'\nUnbiased standard deviations: {per_class_std_sample_dict}')
# average the variances
average_std = round(np.sqrt(np.mean(np.multiply(per_class_std_sample,per_class_std_sample))),4)
print(f'Average standard deviation: {average_std}')

per_class_standard_errors = [std/np.sqrt(N) for std in per_class_std_sample]
per_class_standard_errors_dict = {k:round(v,4) for k,v in zip(classes,per_class_standard_errors)}
print(f'\nSE of the means: {per_class_standard_errors_dict}')