I’m trying to load in the drop4
model at the end of the pascal-multi notebook and use this to predict over a new image (of my own) that it has never seen before. I was able to do this for the dogs-cats model but it’s a bit harder for this one I think because of the transforms and custom head.
I’m able to load in the model but then when I predict over the new image it gives me unexpected results - like not bounding box coordinates but other arrays that I’m not sure how to interpret.
Here’s my process:
Load in the pascal-multi drop4
model
PATH_pascal = Path('data/pascal')
trn_j = json.load((PATH_pascal / 'pascal_train2007.json').open())
IMAGES,ANNOTATIONS,CATEGORIES = ['images', 'annotations', 'categories']
FILE_NAME,ID,IMG_ID,CAT_ID,BBOX = 'file_name','id','image_id','category_id','bbox'
cats = dict((o[ID], o['name']) for o in trn_j[CATEGORIES])
trn_fns = dict((o[ID], o[FILE_NAME]) for o in trn_j[IMAGES])
trn_ids = [o[ID] for o in trn_j[IMAGES]]
JPEGS_pascal = 'VOCdevkit2/VOC2007/JPEGImages'
IMG_PATH_pascal = PATH_pascal/JPEGS_pascal
MC_CSV = PATH_pascal/'tmp/mc.csv'
aug_tfms = [RandomRotate(3, p=0.5, tfm_y=TfmType.COORD),
RandomLighting(0.05, 0.05, tfm_y=TfmType.COORD),
RandomFlip(tfm_y=TfmType.COORD)]
tfms_pascal = tfms_from_model(arch, sz, crop_type=CropType.NO,
tfm_y=TfmType.COORD, aug_tfms=aug_tfms)
md_pascal = ImageClassifierData.from_csv(PATH_pascal, JPEGS_pascal,
MC_CSV, tfms=tfms_pascal, bs=bs)
id2cat = list(cats.values())
class StdConv(nn.Module):
def __init__(self, nin, nout, stride=2, drop=0.1):
super().__init__()
self.conv = nn.Conv2d(nin, nout, 3, stride=stride, padding=1)
self.bn = nn.BatchNorm2d(nout)
self.drop = nn.Dropout(drop)
def forward(self, x): return self.drop(self.bn(F.relu(self.conv(x))))
def flatten_conv(x,k):
bs,nf,gx,gy = x.size()
x = x.permute(0,2,3,1).contiguous()
return x.view(bs,-1,nf//k)
class OutConv(nn.Module):
def __init__(self, k, nin, bias):
super().__init__()
self.k = k
self.oconv1 = nn.Conv2d(nin, (len(id2cat)+1)*k, 3, padding=1)
self.oconv2 = nn.Conv2d(nin, 4*k, 3, padding=1)
self.oconv1.bias.data.zero_().add_(bias)
def forward(self, x):
return [flatten_conv(self.oconv1(x), self.k),
flatten_conv(self.oconv2(x), self.k)]
drop=0.4
class SSD_MultiHead(nn.Module):
def __init__(self, k, bias):
super().__init__()
self.drop = nn.Dropout(drop)
self.sconv0 = StdConv(512,256, stride=1, drop=drop)
self.sconv1 = StdConv(256,256, drop=drop)
self.sconv2 = StdConv(256,256, drop=drop)
self.sconv3 = StdConv(256,256, drop=drop)
self.out0 = OutConv(k, 256, bias)
self.out1 = OutConv(k, 256, bias)
self.out2 = OutConv(k, 256, bias)
self.out3 = OutConv(k, 256, bias)
def forward(self, x):
x = self.drop(F.relu(x))
x = self.sconv0(x)
x = self.sconv1(x)
o1c,o1l = self.out1(x)
x = self.sconv2(x)
o2c,o2l = self.out2(x)
x = self.sconv3(x)
o3c,o3l = self.out3(x)
return [torch.cat([o1c,o2c,o3c], dim=1),
torch.cat([o1l,o2l,o3l], dim=1)]
k = 9
head_reg4 = SSD_MultiHead(k, -4.)
models = ConvnetBuilder(arch, 0, 0, 0, custom_head=head_reg4)
learn_pascal = ConvLearner(md_pascal, models)
learn_pascal.opt_fn = optim.Adam
learn_pascal.load('drop4')
This all works fine.
Then I try to pass it an image of my own which is of shape (224, 224, 3)
:
learn_pascal.predict_array(tfms_pascal(open_image(PATH+fn))[None])
This command crashed my kernel and I have to restart
Then I try to apply different transforms and predict:
trn_tfms, val_tfms = tfms_from_model(arch, sz)
learn_pascal.predict_array(val_tfms(open_image(PATH+fn))[None])
val_tfms(open_image(PATH+fn))[None]
transforms it to shape (1,3,224,224)
This actually allows a prediciton, but this returns 2 arrays where the first is of shape (1, 189, 21)
and the second is of shape (1, 189, 4)
. And I’m not sure what these arrays even are.
If I just run: tfms_pascal(open_image(PATH+fn))
I get error:
TypeError: 'tuple' object is not callable
Anyone have an idea how to pass this model a single image for prediction?
Or, if the prediction here is correct how might I interpret the arrays?
I’ve also attempted to save it as torch model and predict from there:
torch.save(learn_pascal.model,'learn_pascal.pt')
model = torch.load('learn_pascal.pt')
model = model.eval()
_, tfms = tfms_from_model(arch, sz)
image = Variable(torch.Tensor(tfms(open_image(PATH+fn))[None]))
pred = model(image)
This returns the same thing as the two arrays mentioned above.