Hello,
I am trying to use a weighted dataloader for an imbalanced dataset with 8 classes.
My datablock (which works fine with previous dataloader/models):
test_size=0.3
splitter = TrainTestSplitter(test_size=test_size, random_state=42, stratify=train_set.iloc[:,1])
snd_blk = DataBlock(blocks=(ImageBlock, CategoryBlock),
splitter=splitter,
get_x=ColReader(0, pref=Exp_),
get_y=ColReader(1),
item_tfms=Resize(854))
batch_tfms=aug_transforms())
and my Dataloader:
dls = snd_blk.dataloaders(train_set,num_workers=2, dl_type=WeightedDL ,wgts=wgts, bs=16)
my train_set (which also worked fine previously) is a dataframe with len: 5700 and my wgts is the same.
When I try.
dls.show_batch()
or
lr_min,lr_steep=learn.lr_find()
I get the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-113-d18828793d27> in <module>
----> 1 lr_min,lr_steep=learn.lr_find()
2 print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/callback/schedule.py in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggestions)
220 n_epoch = num_it//len(self.dls.train) + 1
221 cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 222 with self.no_logging(): self.fit(n_epoch, cbs=cb)
223 if show_plot: self.recorder.plot_lr_find()
224 if suggestions:
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
210 self.opt.set_hypers(lr=self.lr if lr is None else lr)
211 self.n_epoch = n_epoch
--> 212 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
213
214 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
201 for epoch in range(self.n_epoch):
202 self.epoch=epoch
--> 203 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
204
205 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
195
196 def _do_epoch(self):
--> 197 self._do_epoch_train()
198 self._do_epoch_validate()
199
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
187 def _do_epoch_train(self):
188 self.dl = self.dls.train
--> 189 self._with_events(self.all_batches, 'train', CancelTrainException)
190
191 def _do_epoch_validate(self, ds_idx=1, dl=None):
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
164 def all_batches(self):
165 self.n_iter = len(self.dl)
--> 166 for o in enumerate(self.dl): self.one_batch(*o)
167
168 def _do_one_batch(self):
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/data/load.py in __iter__(self)
106 self.randomize()
107 self.before_iter()
--> 108 self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
109 for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
110 # fix issue 2899. If the process start method isn't fork, the data will be copied to cuda in learner one_batch.
~/miniconda3/envs/fastai_V2/lib/python3.8/site-packages/fastai/callback/data.py in get_idxs(self)
31 if self.n==0: return []
32 if not self.shuffle: return super().get_idxs()
---> 33 return list(np.random.choice(self.n, self.n, p=self.wgts))
34
35 # Cell
mtrand.pyx in numpy.random.mtrand.RandomState.choice()
ValueError: 'a' and 'p' must have same size
Any ideas as to where I am going wrong?
Cheers