Pascal VOC Single Image Testing


(Zachary Mueller) #1

I am wanting to do inference testing on a model to try and get the BB’s on a picture my model has yet to see whatsover to test how long it can make the decision in. How would I go about this?

Thanks,

Zach


(Zachary Mueller) #2

I realized I did not provide enough details when posting this. Here is essentailly all the required code I have found for the model to run:

#!pip install fastai==0.7.0
#!pip install torchtext==0.2.3
#from google.colab import drive
#drive.mount('/content/drive')
from fastai.conv_learner import *
from fastai.dataset import *

from pathlib import Path
import json
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects

from scipy.special import expit

RESNET = resnet34
sz = 224
bs = 12

PATH = Path('drive/My Drive/Datasets/~~/Pascal_VOC')
with open(PATH/'Annotations/labels.json', 'r') as f:
  data = json.load(f)

category = {cat_dic['id']: cat_dic['name'] for cat_dic in data['categories']}
filename = {img_dic['id']: img_dic['file_name'] for img_dic in data['images']}
imageIDs = [img['id'] for img in data['images']]


val_idxs = get_cv_idxs(len(filename)) 

JPEGS = 'JPEGImages'
CSV_BB = PATH/'tmp/largest-bbox.csv'
CSV = PATH/'tmp/largest-labels.csv'



class ConcatLblDataset(Dataset):
    
    def __init__(self, ds, y2): 
        self.ds = ds
        self.y2 = y2
        
    def __len__(self): 
        return len(self.ds)
    
    def __getitem__(self, i):
        x, y = self.ds[i]
        return (x, (y, self.y2[i]))

def test_convert_bbox_proper_output(bbox): 
    """
    converts to xy format that was inputed
    """
    return np.array([bbox[0], 
                     bbox[1], 
                     bbox[3] + bbox[1], 
                     bbox[2] + bbox[0]]) 
    
augs = [RandomFlip(tfm_y=TfmType.COORD),
        RandomRotate(30, tfm_y=TfmType.COORD),
        RandomLighting(0.1,0.1, tfm_y=TfmType.COORD)]

# Create Transform and image classifier data from CSV
transform = tfms_from_model(RESNET, sz, crop_type=CropType.NO, tfm_y=TfmType.COORD, aug_tfms=augs)
bbox_data = ImageClassifierData.from_csv(PATH, JPEGS, CSV_BB, tfms=transform, bs=bs, continuous=True, val_idxs=val_idxs)
name_data = ImageClassifierData.from_csv(PATH, JPEGS, CSV, tfms=tfms_from_model(RESNET, sz))

# Concatenate the labels to the bounding box dataset
concat_train = ConcatLblDataset(bbox_data.trn_ds, name_data.trn_y)
concat_valid = ConcatLblDataset(bbox_data.val_ds, name_data.val_y)

# Replace the dataset with these concatenated datasets
image_data = bbox_data
image_data.trn_dl.dataset = concat_train
image_data.val_dl.dataset = concat_valid


del concat_train
del concat_valid

regression_head = nn.Sequential(
    Flatten(), 
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(25088, 256),
    nn.ReLU(),
    nn.BatchNorm1d(256),
    nn.Dropout(0.5),
    nn.Linear(256, 4 + len(category))
)

base_model = ConvnetBuilder(RESNET, 0, 0, 0, custom_head=regression_head)

image_data = bbox_data
trainer = ConvLearner(image_data, base_model)
trainer.opt_fn = optim.Adam

trainer.load('rs_34_98_2_2_6_2019')

y = trainer.predict()
b = expit(y[2][:4])
transform = [((1627)*((1627-224)/1627)),
             ((971)*((971-224)/971)),
             ((971)*((971-224)/971)),
             ((1627)*((1627-224)/1627))]
bb = b * transform
bb_dd = test_convert_bbox_proper_output(bb)

Doing this spits out my four points for where the object is, and I can view how that looks. But how do I load a custom image that I do not have any pre-information about?