I have been trying to match accuracy for models converted to ONNX with Fastai model. I not able to reproduce same number. There problems is difference in preprocessing done by Fastai and what I have been doing for ONNX input.
For FastAI v2, I am doing the following for simple 2 class image classification task, :
Image read, open and covert to RGB Image.open().covert(‘RGB’)
Image resize to 224 keeping media ratio same, with resample mode 2
Image normalized by dividing 255
I want to replicate default preprocessing done by Fastai without using fastai in production.
muellerzr
(Zachary Mueller)
July 20, 2020, 10:18am
2
Did you train by normalizing on ImageNet? This may be where your mismatch is. You need to normalize by those statistics
No, I assumed it was trained on imagenet stats by default, but that was not the case.
I was re-encoded the tensors received by one_batch to see where i am going wrong, i was able to reproduce images by just multiplying tensor by 255.
muellerzr
(Zachary Mueller)
July 20, 2020, 11:37am
4
How did you build your DataBlock/DataLoaders?
I think i used most simple variant among all the option that are there
dls = ImageDataLoaders.from_df(df.loc[:,['frname','label','is_valid']],
path=imagepath, valid_col='is_valid',
label_col='label', seed=42, item_tfms=Resize(224))
I am using following loop for inference from ONNX:
mn=np.asarray([0.485, 0.456, 0.406])
std =np.asarray([0.229, 0.224, 0.225])
for each in tqdm(range(0,4080,4)):
a=[]
for each1 in jpgs[each:each+4]:
image_read_resize=PIL.Image.open(each1.as_posix()).convert('RGB')#,(224,224))
image_read_resize=image_read_resize.resize((287,224),resample=2) #maintain aspect to original image
image_rgb_224=np.array(image_read_resize)[:,32:-31,:] #convert to 224x224
image_move_axis=np.moveaxis(image_rgb_224,2,0)
image_norm=image_move_axis.astype(np.float32)/255
a.append(image_norm)
gets=ort_sess.run(['491'],{'input.1':a})
muellerzr
(Zachary Mueller)
July 20, 2020, 12:15pm
7
Are you using cnn_learner
? If so it adds normalizing from imagenet via this line:
# Cell
@log_args(to_return=True, but_as=Learner.__init__)
@delegates(Learner.__init__)
def cnn_learner(dls, arch, loss_func=None, pretrained=True, cut=None, splitter=None,
y_range=None, config=None, n_out=None, normalize=True, **kwargs):
"Build a convnet style learner from `dls` and `arch`"
if config is None: config = {}
meta = model_meta.get(arch, _default_meta)
if n_out is None: n_out = get_c(dls)
assert n_out, "`n_out` is not defined, and could not be infered from data, set `dls.c` or pass `n_out`"
if normalize: _add_norm(dls, meta, pretrained)
if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
model = create_cnn_model(arch, n_out, ifnone(cut, meta['cut']), pretrained, y_range=y_range, **config)
learn = Learner(dls, model, loss_func=loss_func, splitter=ifnone(splitter, meta['split']), **kwargs)
if pretrained: learn.freeze()
return learn
# Cell
@delegates(models.unet.DynamicUnet.__init__)
def unet_config(**kwargs):
"Convenience function to easily create a config for `DynamicUnet`"
You can further check your stats via dls.after_batch.normalize.mean
, std (IIRC)
1 Like
I wanted to use Mobilenet, but since it is not available in Fastai2, I am using Learner:
learn = Learner(dls,model,metrics=[error_rate,accuracy])
1 Like
I am suspecting the there is difference between my resizing method v/s fastai resize procedure during batch creation.
I am not able find where image open and resize procedure is called in the pipeline.
Because, I have verified reverve transformation for normilization with np.allclose
muellerzr
(Zachary Mueller)
July 20, 2020, 12:35pm
10
The first is in the type transforms, the second is after_item
. (It’s just PILImage.create
)
muellerzr:
PILImage.create
But it is just inheriting PILBase class and calling load image
_show_args = {'cmap':'viridis'}
_open_args = {'mode': 'RGB'}
@classmethod
def create(cls, fn:(Path,str,Tensor,ndarray,bytes), **kwargs)->None:
"Open an `Image` from path `fn`"
if isinstance(fn,TensorImage): fn = fn.permute(1,2,0).type(torch.uint8)
if isinstance(fn, TensorMask): fn = fn.type(torch.uint8)
if isinstance(fn,Tensor): fn = fn.numpy()
if isinstance(fn,ndarray): return cls(Image.fromarray(fn))
if isinstance(fn,bytes): fn = io.BytesIO(fn)
return cls(load_image(fn, **merge(cls._open_args, kwargs)))
def show(self, ctx=None, **kwargs):
"Show image using `merge(self._show_args, kwargs)`"
return show_image(self, ctx=ctx, **merge(self._show_args, kwargs))
def __repr__(self): return f'{self.__class__.__name__} mode={self.mode} size={"x".join([str(d) for d in self.size])}'
# Cell
class PILImage(PILBase): pass
which is just opening image and converting mode
@patch
def resize_max(x: Image.Image, resample=0, max_px=None, max_h=None, max_w=None):
"`resize` `x` to `max_px`, or `max_h`, or `max_w`"
h,w = x.shape
if max_px and x.n_px>max_px: h,w = Tuple(h,w).mul(math.sqrt(max_px/x.n_px))
if max_h and h>max_h: h,w = (max_h ,max_h*w/h)
if max_w and w>max_w: h,w = (max_w*h/w,max_w )
return x.reshape(round(h), round(w), resample=resample)
# Cell
def load_image(fn, mode=None, **kwargs):
"Open and load a `PIL.Image` and convert to `mode`"
im = Image.open(fn, **kwargs)
im.load()
im = im._new(im.im)
return im.convert(mode) if mode else im
# Cell
def image2tensor(img):
"Transform image to byte tensor in `c*h*w` dim order."
res = tensor(img)
Resize(224) in transform is dataloader is calling crop_pad
super().before_call(b, split_idx)
w,h = self.orig_sz
if not split_idx: self.tl = (random.randint(0,w-self.cp_size[0]), random.randint(0,h-self.cp_size[1]))
# Cell
mk_class('ResizeMethod', **{o:o.lower() for o in ['Squish', 'Crop', 'Pad']},
doc="All possible resize method as attributes to get tab-completion and typo-proofing")
# Cell
@delegates()
class Resize(RandTransform):
split_idx = None
mode,mode_mask,order = Image.BILINEAR,Image.NEAREST,1
"Resize image to `size` using `method`"
def __init__(self, size, method=ResizeMethod.Crop, pad_mode=PadMode.Reflection,
resamples=(Image.BILINEAR, Image.NEAREST), **kwargs):
super().__init__(**kwargs)
self.size,self.pad_mode,self.method = _process_sz(size),pad_mode,method
self.mode,self.mode_mask = resamples
def before_call(self, b, split_idx):
I think this function is reponsible for resize
from torchvision.transforms.functional import pad as tvpad
# Cell
mk_class('PadMode', **{o:o.lower() for o in ['Zeros', 'Border', 'Reflection']},
doc="All possible padding mode as attributes to get tab-completion and typo-proofing")
# Cell
_pad_modes = {'zeros': 'constant', 'border': 'edge', 'reflection': 'reflect'}
@patch
def _do_crop_pad(x:Image.Image, sz, tl, orig_sz,
pad_mode=PadMode.Zeros, resize_mode=Image.BILINEAR, resize_to=None):
if any(tl.ge(0)):
# At least one dim is inside the image, so needs to be cropped
c = tl.max(0)
x = x.crop((*c, *c.add(sz).min(orig_sz)))
if any(tl.lt(0)):
# At least one dim is outside the image, so needs to be padded
p = (-tl).max(0)
f = (sz-orig_sz-p).max(0)
x = tvpad(x, (*p, *f), padding_mode=_pad_modes[pad_mode])
I was able to reproduce similar results by using:
Resize using PIL, using cv2 doesn’t reproduce same results
During resize maintain aspect ration and then crop longer dimension according, I was not sure why this step increased the similarity in reproduction
Still I was not able to reproduce exact matrices:
There are difference in the matrices we get from PIL and CV2 image read (though difference is very small, but not returning True in 2 image tensor comparisson), in pipeline i am getting matrices in production from a different source, hence cannot actually change this step.