Mysterious Fastai ImageList Error

jfang · September 8, 2019, 8:13pm

Hi! I am struggling with a mysterious valueError in the dataloader that I am struggling to find the root of, after performing transforms to the data. The data that I’m working with is medical signal data. I am trying to convert the medical signals into Gramian Angular Fields after applying stretch and exponential amplify transforms to them. Here is what I’ve done with the transforms:

def stretch(x):
    l = int(187 * (1 + (random.random()-0.5)/3))
    y = resample(x, l)
    if l < 187:
        y_ = np.zeros(shape=(187, ))
        y_[:l] = y
    else:
        y_ = y[:187]
    return y_

def amp(x):
    factor = (0.8+random.random()/2.5)
    return x**factor

class GAF():
    def __init__(self, inp, opt): 
        self.inp = inp
        self.opt = opt
        self.out = self.forward(inp, opt)
        
    def forward(self, i, option): 
        imax, imin = i.max(), i.min()
        i_rescaled = ((i-imax)+(i-imin))/(imax-imin)
        i2 = np.arccos(i_rescaled)
        if option == 'summation':
            field = np.cos(i2[None, :] + i2[:, None])
        if option == 'difference':
            field = np.sin(-i2[None, :] + i2[:, None])
        return field

def proc_tfm_vec(img_vector, augmentations = True, opt = 'difference'):
    if(not augmentations):
        return GAF(img_vector, opt).out
    
    # random stretch and modified amplify
    stretch_tfm = random.randint(0,3)
    amp_tfm = random.randint(0,3)
    if amp_tfm > 0:
        img_vector = amp(img_vector)    
    if stretch_tfm > 0:
        img_vector = stretch(img_vector)
    
    # GAF 
    img = GAF(img_vector, 'difference').out
    return img

Now here is what the data looks like:

Now when I try to do the following:

class PixelList(ImageList):
    def open(self, index):
        regex = re.compile(r'\d+')
        fn = re.findall(regex, index)
        df_fn = self.inner_df[self.inner_df.index.values == int(fn[0])]
        img_pixel = df_fn.drop(labels=['label','index'],axis=1).values
        img_pixel = proc_tfm_vec(img_pixel, augmentations = True)
        return vision.Image(pil2tensor(img_pixel, np.float32))

src = (PixelList.from_df(data,'.',cols='index')
      .split_by_rand_pct(0.1)
      .label_from_df(cols='label'))

I would get a valueError like this:

You can deactivate this warning by passing `no_check=True`.
/home/jfang/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/basic_data.py:259: UserWarning: There seems to be something wrong with your dataset, for example, in the first batch can't access these elements in self.train_ds: 55054,22102,79667,19871,7882...
  warn(warn_msg)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-34-37d50dc7cb0f> in <module>
----> 1 data = (src.databunch(bs=48)
      2        .normalize())

~/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/vision/data.py in normalize(self, stats, do_x, do_y)
    175         "Add normalize transform using `stats` (defaults to `DataBunch.batch_stats`)"
    176         if getattr(self,'norm',False): raise Exception('Can not call normalize twice')
--> 177         if stats is None: self.stats = self.batch_stats()
    178         else:             self.stats = stats
    179         self.norm,self.denorm = normalize_funcs(*self.stats, do_x=do_x, do_y=do_y)

~/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/vision/data.py in batch_stats(self, funcs, ds_type)
    169         "Grab a batch of data and call reduction function `func` per channel"
    170         funcs = ifnone(funcs, [torch.mean,torch.std])
--> 171         x = self.one_batch(ds_type=ds_type, denorm=False)[0].cpu()
    172         return [func(channel_view(x), 1) for func in funcs]
    173 

~/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/basic_data.py in one_batch(self, ds_type, detach, denorm, cpu)
    166         w = self.num_workers
    167         self.num_workers = 0
--> 168         try:     x,y = next(iter(dl))
    169         finally: self.num_workers = w
    170         if detach: x,y = to_detach(x,cpu=cpu),to_detach(y,cpu=cpu)

~/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/basic_data.py in __iter__(self)
     73     def __iter__(self):
     74         "Process and returns items from `DataLoader`."
---> 75         for b in self.dl: yield self.proc_batch(b)
     76 
     77     @classmethod

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
    817             else:
    818                 del self.task_info[idx]
--> 819                 return self._process_data(data)
    820 
    821     next = __next__  # Python 2 compatibility

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
    844         self._try_put_index()
    845         if isinstance(data, ExceptionWrapper):
--> 846             data.reraise()
    847         return data
    848 

~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/_utils.py in reraise(self)
    367             # (https://bugs.python.org/issue2651), so we work around it.
    368             msg = KeyErrorMessage(msg)
--> 369         raise self.exc_type(msg)

ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/jfang/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/jfang/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/jfang/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/jfang/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/data_block.py", line 648, in __getitem__
    if self.item is None: x,y = self.x[idxs],self.y[idxs]
  File "/home/jfang/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/data_block.py", line 118, in __getitem__
    if isinstance(idxs, Integral): return self.get(idxs)
  File "/home/jfang/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/vision/data.py", line 271, in get
    res = self.open(fn)
  File "<ipython-input-32-7cd21bc4dcb1>", line 9, in open
    img_pixel = proc_tfm_vec(img_pixel, augmentations = True)
  File "<ipython-input-21-41cdb9c4256f>", line 11, in proc_tfm_vec
    img_vector = stretch(img_vector)
  File "<ipython-input-10-9f55a73e1acb>", line 6, in stretch
    y_[:l] = y
ValueError: could not broadcast input array from shape (179,187) into shape (179)

If anyone can find a solution or tell me what’s going on with this problem, I will very much appreciate it.

jfang · September 8, 2019, 8:59pm

Ah. Apparently I’ve gotten to the root of the problem - there was a dimension error. The input dimension was (1, 187) while the function proc_tfm_vec expects (187,).

MLee27 · September 13, 2019, 3:24am

Hi, what did you do to resolve this error? I’m facing same error here

jfang · September 13, 2019, 4:09am

I’m sorry, I don’t think I’m able to help you. My error occured when I was trying to create a data iterator to convert batches of medical signal into images using signal transforms for data augmentation before converting them into Gramian Angular Fields.

Good luck.