IndexError when trying to view data augmented images

dreadknought · December 11, 2018, 2:36pm

Hi,
I’m using part 2, lesson 8 basics to try to train a model that can recognize upper-body joints in images. My dataset are from the FLIC dataset with annotated joints (both wrists, both elbows, both shoulders, and head, for a total of 14 values (7 x/y coordinates). My CSV consists of the filename and a space-seperated list of the coordinates ( in y/x format, although I’ve also tried x/y format with same results).

Everything works great when I follow exactly the bounding-box example to augment the bounding-box data set, but when I try to augment the joints data set, I get IndexError when on the line x, y = next( iter( md.aug_dl ). Without augmentation, it seems to work fine.

f_model = resnet34
sz = 224
bs = 64

augs = [RandomFlip(tfm_y=TfmType.COORD),
        RandomRotate(30, tfm_y=TfmType.COORD),
        RandomLighting(0.1,0.1, tfm_y=TfmType.COORD)]

tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO, tfm_y=TfmType.COORD, aug_tfms=augs)
md = ImageClassifierData.from_csv(dataPath, imagePath, newTrainCsvPath, tfms=tfms, continuous=True, bs=4)

def show_img( im, figsize = None, ax = None ):
    if not ax: fig, ax = plt.subplots( figsize = figsize )
    ax.imshow( im )
    ax.get_xaxis().set_visible( False )
    ax.get_yaxis().set_visible( False )
    return ax

idx = 3
fig, axes = plt.subplots(3, 3, figsize = (9, 9))
for i, ax in enumerate( axes.flat ):
    x, y= next(iter(md.aug_dl))  # <--- IndexError here
    #ima = md.val_ds.denorm( to_np( x ))[idx]
    #b = to_np( y[idx] )
    ##print( y[idx] )
    #print( b )
    #show_img( ima, ax=ax)
    #plot_joints( ax, b )

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-13-ccf9064a3abb> in <module>
      2 fig, axes = plt.subplots(3, 3, figsize = (9, 9))
      3 for i, ax in enumerate( axes.flat ):
----> 4     x, y= next(iter(md.aug_dl))
      5     #ima = md.val_ds.denorm( to_np( x ))[idx]
      6     #b = to_np( y[idx] )

~/fastai/courses/dl2/fastai/dataloader.py in __iter__(self)
     86                 # avoid py3.6 issue where queue is infinite and can result in memory exhaustion
     87                 for c in chunk_iter(iter(self.batch_sampler), self.num_workers*10):
---> 88                     for batch in e.map(self.get_batch, c):
     89                         yield get_tensor(batch, self.pin_memory, self.half)
     90 

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/_base.py in result_iterator()
    584                     # Careful not to keep a reference to the popped future
    585                     if timeout is None:
--> 586                         yield fs.pop().result()
    587                     else:
    588                         yield fs.pop().result(end_time - time.time())

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
    430                 raise CancelledError()
    431             elif self._state == FINISHED:
--> 432                 return self.__get_result()
    433             else:
    434                 raise TimeoutError()

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
    382     def __get_result(self):
    383         if self._exception:
--> 384             raise self._exception
    385         else:
    386             return self._result

~/anaconda3/envs/fastai/lib/python3.6/concurrent/futures/thread.py in run(self)
     54 
     55         try:
---> 56             result = self.fn(*self.args, **self.kwargs)
     57         except BaseException as exc:
     58             self.future.set_exception(exc)

~/fastai/courses/dl2/fastai/dataloader.py in get_batch(self, indices)
     73 
     74     def get_batch(self, indices):
---> 75         res = self.np_collate([self.dataset[i] for i in indices])
     76         if self.transpose:   res[0] = res[0].T
     77         if self.transpose_y: res[1] = res[1].T

~/fastai/courses/dl2/fastai/dataloader.py in <listcomp>(.0)
     73 
     74     def get_batch(self, indices):
---> 75         res = self.np_collate([self.dataset[i] for i in indices])
     76         if self.transpose:   res[0] = res[0].T
     77         if self.transpose_y: res[1] = res[1].T

~/fastai/courses/dl2/fastai/dataset.py in __getitem__(self, idx)
    201             xs,ys = zip(*[self.get1item(i) for i in range(*idx.indices(self.n))])
    202             return np.stack(xs),ys
--> 203         return self.get1item(idx)
    204 
    205     def __len__(self): return self.n

~/fastai/courses/dl2/fastai/dataset.py in get1item(self, idx)
    195     def get1item(self, idx):
    196         x,y = self.get_x(idx),self.get_y(idx)
--> 197         return self.get(self.transform, x, y)
    198 
    199     def __getitem__(self, idx):

~/fastai/courses/dl2/fastai/dataset.py in get(self, tfm, x, y)
    206 
    207     def get(self, tfm, x, y):
--> 208         return (x,y) if tfm is None else tfm(x,y)
    209 
    210     @abstractmethod

~/fastai/courses/dl2/fastai/transforms.py in __call__(self, im, y)
    646         self.tfms.append(ChannelOrder(tfm_y))
    647 
--> 648     def __call__(self, im, y=None): return compose(im, y, self.tfms)
    649     def __repr__(self): return str(self.tfms)
    650 

~/fastai/courses/dl2/fastai/transforms.py in compose(im, y, fns)
    621     for fn in fns:
    622         #pdb.set_trace()
--> 623         im, y =fn(im, y)
    624     return im if y is None else (im, y)
    625 

~/fastai/courses/dl2/fastai/transforms.py in __call__(self, x, y)
    233         x,y = ((self.transform(x),y) if self.tfm_y==TfmType.NO
    234                 else self.transform(x,y) if self.tfm_y in (TfmType.PIXEL, TfmType.CLASS)
--> 235                 else self.transform_coord(x,y))
    236         return x, y
    237 

~/fastai/courses/dl2/fastai/transforms.py in transform_coord(self, x, ys)
    264     def transform_coord(self, x, ys):
    265         yp = partition(ys, 4)
--> 266         y2 = [self.map_y(y,x) for y in yp]
    267         x = self.do_transform(x, False)
    268         return x, np.concatenate(y2)

~/fastai/courses/dl2/fastai/transforms.py in <listcomp>(.0)
    264     def transform_coord(self, x, ys):
    265         yp = partition(ys, 4)
--> 266         y2 = [self.map_y(y,x) for y in yp]
    267         x = self.do_transform(x, False)
    268         return x, np.concatenate(y2)

~/fastai/courses/dl2/fastai/transforms.py in map_y(self, y0, x)
    258 
    259     def map_y(self, y0, x):
--> 260         y = CoordTransform.make_square(y0, x)
    261         y_tr = self.do_transform(y, True)
    262         return to_bb(y_tr)

~/fastai/courses/dl2/fastai/transforms.py in make_square(y, x)
    254         y1 = np.zeros((r, c))
    255         y = y.astype(np.int)
--> 256         y1[y[0]:y[2], y[1]:y[3]] = 1.
    257         return y1
    258 

IndexError: index 2 is out of bounds for axis 0 with size 2

And a subset of the CSV:

['fileName,joints\n',
 '12-oclock-high-special-edition-00004221.jpg,415.0625 225.9918213 368.6132813 244.13876340000002 261.578125 223.9754944 161.94791669999998 254.22039790000002 251.4804688 123.1591797 376.6914063 80.81632233 423.140625 175.5836639\n',
 '12-oclock-high-special-edition-00004281.jpg,372.421875 301.51019289999994 333.6796875 303.657135 223.9101563 258.5714417 153.6002604 243.5428569 234.671875 129.7550964 387.4882813 88.96327209 406.859375 192.0163269\n',
 '12-oclock-high-special-edition-00006331.jpg,278.125 351.9591675 249.125 328.39999389999997 189.3125 324.7755127 141.58333330000002 303.63265989999996 189.3125 235.97550959999998 270.875 225.1020355 307.125 266.7836914\n',
 '12-oclock-high-special-edition-00006361.jpg,322.453125 367.4693909 294.671875 331.836731 213.3125 327.87756349999995 155.10416669999998 297.5238139 197.4375 195.2449036 276.8125 175.4489746 338.328125 165.5510254\n']

Any help would be appreciated!

dreadknought · December 11, 2018, 2:40pm

Also, I was thinking that maybe some of the images aren’t RGB (3-channel?). I checked all of them with scipy and they are all 3-channel, so we can safely eliminate that.

hryu · December 17, 2018, 7:30pm

Hello,
I also have similar problem when I use “md.aug_dl” to see whether data augmentation is properly working in my dataset.
The following code replaced the dataset in “trn_dl” and “val_dl”, but there is no change in “aug_dl”, so I am suspecting this causing some problem.
But I do not know what to do to solve this.

trn_ds2 = ConcatLblDataset(md.trn_ds, trn_mcs)
val_ds2 = ConcatLblDataset(md.val_ds, val_mcs)
md.trn_dl.dataset = trn_ds2
md.val_dl.dataset = val_ds2