Dataloader crashing when instantiating transforms ("missing split_idx")

JacobVSO · April 12, 2023, 4:26pm

Hi there,
I’m having some trouble understanding what my issue is. “before_call” in the SpaceTfm class is missing “split_idx”.

Code:

#Load dataset
dataset = torch.load("drive/MyDrive/PopulationData/dataset.t")
xtuple, ytuple = zip(*dataset)
xs = torch.stack(xtuple)
ys = list(ytuple)

#Apply data augmentations and set up fastAI dataloaders
def get_items(dataset): return xs
def get_y(dataset): return ys

#List of transformations to apply.

transforms = [
    DeterministicDihedral(),
    Rotate(max_deg=25),
    SpaceTfm(
        [Brightness(),
         Contrast(),
         Saturation(),
         Hue()
        ],
    LightingTfm)
]

comp = setup_aug_tfms(transforms)

#Use FastAI's DataBlock API to define the format of the inputs and outputs of the model, split the dataset into a training and a validation set, and apply the augmentations listed above

population = DataBlock(
    blocks=(ImageBlock, RegressionBlock(1)), 
    get_items=get_items,
    get_y=get_y,
    splitter=RandomSplitter(valid_pct=0.2, seed=42), 
    batch_tfms=comp
    )

#Store the data as a DataLoaders object 

dls = population.dataloaders(None, bs=64, shuffle=True)
dls.show_batch()

Error log:

Could not do one pass in your dataloader, there is something wrong in it. Please see the stack trace below:
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-32-0c7d7f44637b> in <cell line: 36>()
     34 
     35 #Store the data as a DataLoaders object which is an iterator that provides the data one batch at a time. The data is now ready to be input into the model.
---> 36 dls = population.dataloaders(None, bs=64, shuffle=True)
     37 dls.show_batch()

8 frames

/usr/local/lib/python3.9/dist-packages/fastai/data/block.py in dataloaders(self, source, path, verbose, **kwargs)
    155         dsets = self.datasets(source, verbose=verbose)
    156         kwargs = {**self.dls_kwargs, **kwargs, 'verbose': verbose}
--> 157         return dsets.dataloaders(path=path, after_item=self.item_tfms, after_batch=self.batch_tfms, **kwargs)
    158 
    159     _docs = dict(new="Create a new `DataBlock` with other `item_tfms` and `batch_tfms`",

/usr/local/lib/python3.9/dist-packages/fastai/data/core.py in dataloaders(self, bs, shuffle_train, shuffle, val_shuffle, n, path, dl_type, dl_kwargs, device, drop_last, val_bs, **kwargs)
    335         dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
    336         def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337         dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
    338                       for i in range(1, self.n_subsets)]
    339         return self._dbunch_type(*dls, path=path, device=device)

/usr/local/lib/python3.9/dist-packages/fastai/data/core.py in <listcomp>(.0)
    335         dl = dl_type(self.subset(0), **merge(kwargs,def_kwargs, dl_kwargs[0]))
    336         def_kwargs = {'bs':bs if val_bs is None else val_bs,'shuffle':val_shuffle,'n':None,'drop_last':False}
--> 337         dls = [dl] + [dl.new(self.subset(i), **merge(kwargs,def_kwargs,val_kwargs,dl_kwargs[i]))
    338                       for i in range(1, self.n_subsets)]
    339         return self._dbunch_type(*dls, path=path, device=device)

/usr/local/lib/python3.9/dist-packages/fastai/data/core.py in new(self, dataset, cls, **kwargs)
     95         if not hasattr(self, '_n_inp') or not hasattr(self, '_types'):
     96             try:
---> 97                 self._one_pass()
     98                 res._n_inp,res._types = self._n_inp,self._types
     99             except Exception as e:

/usr/local/lib/python3.9/dist-packages/fastai/data/core.py in _one_pass(self)
     78         b = self.do_batch([self.do_item(None)])
     79         if self.device is not None: b = to_device(b, self.device)
---> 80         its = self.after_batch(b)
     81         self._n_inp = 1 if not isinstance(its, (list,tuple)) or len(its)==1 else len(its)-1
     82         self._types = explode_types(its)

/usr/local/lib/python3.9/dist-packages/fastcore/transform.py in __call__(self, o)
    206         self.fs = self.fs.sorted(key='order')
    207 
--> 208     def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
    209     def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
    210     def __getitem__(self,i): return self.fs[i]

/usr/local/lib/python3.9/dist-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    156     for f in tfms:
    157         if not is_enc: f = f.decode
--> 158         x = f(x, **kwargs)
    159     return x
    160 

/usr/local/lib/python3.9/dist-packages/fastai/vision/augment.py in __call__(self, b, split_idx, **kwargs)
     46         **kwargs
     47     ):
---> 48         self.before_call(b, split_idx=split_idx)
     49         return super().__call__(b, split_idx=split_idx, **kwargs) if self.do else b
     50 

/usr/local/lib/python3.9/dist-packages/fastai/vision/augment.py in before_call(self, b, split_idx)
    932         self.do = True
    933         while isinstance(b, tuple): b = b[0]
--> 934         for t in self.fs: t.before_call(b)
    935 
    936     def compose(self, 

TypeError: before_call() missing 1 required positional argument: 'split_idx'

No Google results for that error message.

On the face of it, it appears to be a problem with my instantiation of SpaceTfm in my aug transforms or with the DataBlock’s splitter but I suspect it might be something about how I’m feeding my data to DataBlock.

The “source” argument in my dataloaders call is None because the dataset has been prepared in advance so I just fetch the x and y values directly from values. I assume that’s the most appropriate approach in that situation.

It’s my first time trying to prepare my own data for a neural network so it might of course be something silly…

Thanks in advance for any suggestions!