Inference with vision transformer model is different when using `learn.predict` and 'learn.model(inp)`

Code

basic_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.ConvertImageDtype(torch.float32),
    transforms.Resize((224,224)),
    transforms.Normalize(0,1),
    # transforms.Normalize(0,1),
    
])

def get_image(path):
    im = Image.open(path).convert('RGB')
    im = basic_transforms(im)
    return im.cuda()

for file in glob.glob("/notebooks/Inference Data/segmented_0padding_right_orientation/segmentation_103/*"):
    im_t = get_image(file)
    im_t = im_t.unsqueeze(0)
    print(im_t.shape)
    out = learn.predict(np.asarray(Image.open(file).convert('RGB').resize((224,224))))
    print(out)
    learn.model.eval()
    out = learn.model(im_t).cpu().detach().numpy()
    print(out)
    out_class = np.argmax(out)+1
    print(out_class) 
    break

Output:

torch.Size([1, 3, 224, 224])

('21', TensorImage(20), TensorImage([5.4576e-07, 1.7090e-07, 1.1458e-07, 2.7867e-07, 2.1852e-08, 2.4930e-07,
        3.6307e-07, 9.3603e-08, 8.4553e-07, 1.0110e-07, 4.7366e-07, 7.4483e-07,
        1.9120e-07, 6.4455e-07, 2.5059e-07, 1.2636e-06, 1.1804e-06, 7.0810e-07,
        6.8887e-06, 5.2062e-07, 9.9998e-01, 6.8552e-06, 1.7026e-06]))
[[-4.359714   -3.3154442  -2.116347   -3.1533966  -3.7310774  -4.8925233
  -3.3176816  -4.7674007  -3.1554716  -3.16898    -5.6131873  -3.6482942
  -2.2679214  -0.44155395 -3.388968   -4.0285454  -1.850384   -2.2328491
   2.415163   -3.3518667  -3.3467255   0.22249539 -3.323499  ]]
19

The output is different when using predict function and when I pass the input to the trained PyTorch model directly
I created a custom PyTorch model to train
Please help me figure out where I am going wrong

Model code for reference

class baseModel(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super().__init__()
        self.model_ft = timm.create_model(model_name, pretrained=pretrained, num_classes=0,in_chans=3)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(1024,512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(512,256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(256,23),
        )

    def forward(self, x):
        x_features = self.model_ft(x)
        return self.classifier(x_features)

model = baseModel('vit_small_patch16_224', pretrained=False)