Error using custom sampler


(Ashwin) #1

Hi,

I’m trying to use ImbalancedDatasetSampler (https://github.com/ufoym/imbalanced-dataset-sampler). I added this class to Pytorch’s samper.py file and changed dataloader to using this sampler instead of random sampler.

this works well when I have separate train and valid folders. That is, the following data loader doesn’t throw any error

data = ImageDataBunch.from_folder(path=r'path/to/train_valid/folder', ds_tfms=tfms, bs=bs).normalize(([0.5, 0.5,0.5], [1.0, 1.0, 1.0]))

However,

data = ImageDataBunch.from_folder(path=r'path/to/images', train='.', valid_pct=0.3, 
                                  ds_tfms=tfms, bs=bs).normalize(([0.5, 0.5,0.5], [1.0, 1.0, 1.0]))

throws error

I don’t get this error if I switch to using random sampler

======
ERROR

IndexError Traceback (most recent call last)
in
8 [crop(size=256)])
9 data = ImageDataBunch.from_folder(path=r’D:/CAD_stuff_/imgs_cat_all_mixed_as_12_cg_scale_0.1_mad_0_drz_mixed_train_only/patches/’, train=’.’, valid_pct=0.3,
—> 10 ds_tfms=tfms, bs=bs, num_workers=0).normalize(([0.5, 0.5,0.5], [1.0, 1.0, 1.0]))

~\AppData\Local\Continuum\anaconda3\envs\fastai_v1\lib\site-packages\fastai\vision\data.py in from_folder(cls, path, train, valid, valid_pct, classes, **kwargs)
109 else: src = il.random_split_by_pct(valid_pct)
110 src = src.label_from_folder(classes=classes)
–> 111 return cls.create_from_ll(src, **kwargs)
112
113 @classmethod

~\AppData\Local\Continuum\anaconda3\envs\fastai_v1\lib\site-packages\fastai\vision\data.py in create_from_ll(cls, lls, bs, val_bs, ds_tfms, num_workers, dl_tfms, device, test, collate_fn, size, no_check, resize_method, mult, padding_mode, mode, tfm_y)
98 if test is not None: lls.add_test_folder(test)
99 return lls.databunch(bs=bs, val_bs=val_bs, dl_tfms=dl_tfms, num_workers=num_workers, collate_fn=collate_fn,
–> 100 device=device, no_check=no_check)
101
102 @classmethod

~\AppData\Local\Continuum\anaconda3\envs\fastai_v1\lib\site-packages\fastai\data_block.py in databunch(self, path, bs, val_bs, num_workers, dl_tfms, device, collate_fn, no_check, **kwargs)
516 path = Path(ifnone(path, self.path))
517 data = self.x._bunch.create(self.train, self.valid, test_ds=self.test, path=path, bs=bs, val_bs=val_bs,
–> 518 num_workers=num_workers, device=device, collate_fn=collate_fn, no_check=no_check, **kwargs)
519 if getattr(self, ‘normalize’, False):#In case a normalization was serialized
520 norm = self.normalize

~\AppData\Local\Continuum\anaconda3\envs\fastai_v1\lib\site-packages\fastai\basic_data.py in create(cls, train_ds, valid_ds, test_ds, path, bs, val_bs, num_workers, dl_tfms, device, collate_fn, no_check, **dl_kwargs)
118 dls = [DataLoader(d, b, shuffle=s, drop_last=s, num_workers=num_workers, **dl_kwargs) for d,b,s in
119 zip(datasets, (bs,val_bs,val_bs,val_bs), (True,False,False,False)) if d is not None]
–> 120 return cls(*dls, path=path, device=device, dl_tfms=dl_tfms, collate_fn=collate_fn, no_check=no_check)
121
122 def getattr(self,k:int)->Any: return getattr(self.train_dl, k)

~\AppData\Local\Continuum\anaconda3\envs\fastai_v1\lib\site-packages\fastai\basic_data.py in init(self, train_dl, valid_dl, fix_dl, test_dl, device, dl_tfms, path, collate_fn, no_check)
98 self.single_dl = _create_dl(DataLoader(valid_dl.dataset, batch_size=1, num_workers=0))
99 self.path = Path(path)
–> 100 if not no_check: self.sanity_check()
101
102 def repr(self)->str:

~\AppData\Local\Continuum\anaconda3\envs\fastai_v1\lib\site-packages\fastai\basic_data.py in sanity_check(self)
240 print(final_message)
241 return
–> 242 idx = next(iter(self.train_dl.batch_sampler))
243 samples,fails = [],[]
244 for i in idx:

~\AppData\Local\Continuum\anaconda3\envs\fastai_v1\lib\site-packages\torch\utils\data\sampler.py in iter(self)
226 def iter(self):
227 batch = []
–> 228 for idx in self.sampler:
229 batch.append(idx)
230 if len(batch) == self.batch_size:

~\AppData\Local\Continuum\anaconda3\envs\fastai_v1\lib\site-packages\torch\utils\data\sampler.py in (.0)
80
81 def iter(self):
—> 82 return (self.indices[i] for i in torch.multinomial(
83 self.weights, self.num_samples, replacement=True))
84
IndexError: list index out of range

Could someone help me understand what could be going wrong?

Thank you