Not sure if this is the right place to post this question, but it would be very nice if somebody has seen this before or knows a way to solve this.
The main issue is for two (example) different transformations
OK => tfms = get_transforms(do_flip=False, p_affine=0, p_lighting=0.75)
NOK => tfms = get_transforms(do_flip=False, max_rotate=25, p_affine=1, p_lighting=0.75)
I first found the error after trying to run “learn.lr_find()”
I tried all kind of things, but after debugging I am think that the enumeration of data.train_dl throws these errors
for x in enumerate(data.train_dl):
print(x)
But after some real look into the fastai library, pytorch and even the c-code that eventually give the error I was not able to find out why and where this goes wrong. As said very nice if somebody knows a solution
More details of what I am doing:
The issue is in a similar model as the headpose example showed in lesson 3, where the middle of the face is detected. I use my pictures of meters and I like to recognize the topcorner of a label with the identifying number. Because the testset is rather small I like to use a lot of augmentation.
After the following error messages I pasted two screenshots of one OK, another not OK
Code
#OK tfms = get_transforms(do_flip=False, p_affine=0, p_lighting=0.75)
#Beneath is the tfms giving errors
tfms = get_transforms(do_flip=False, max_rotate=25, p_affine=1, p_lighting=0.75)
data = (ImageItemList.from_folder(path)
.random_split_by_pct()
.label_from_func(get_ctr, label_cls=PointsItemList)
.transform(tfms, tfm_y=True, padding_mode='border')
.databunch(bs=12)
.normalize(imagenet_stats)
)
learn = create_cnn(data, models.resnet34)
learn.lr_find()
learn.recorder.plot()
Fulll error message
RuntimeError Traceback (most recent call last)
<ipython-input-20-5aa600e31cfe> in <module>()
----> 1 learn.lr_find()
2 learn.recorder.plot()
~/anaconda3/lib/python3.6/site-packages/fastai/train.py in lr_find(learn, start_lr, end_lr, num_it, stop_div, **kwargs)
26 cb = LRFinder(learn, start_lr, end_lr, num_it, stop_div)
27 a = int(np.ceil(num_it/len(learn.data.train_dl)))
---> 28 learn.fit(a, start_lr, callbacks=[cb], **kwargs)
29
30 def to_fp16(learn:Learner, loss_scale:float=512., flat_master:bool=False)->Learner:
~/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
160 callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
161 fit(epochs, self.model, self.loss_func, opt=self.opt, data=self.data, metrics=self.metrics,
--> 162 callbacks=self.callbacks+callbacks)
163
164 def create_opt(self, lr:Floats, wd:Floats=0.)->None:
~/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
92 except Exception as e:
93 exception = e
---> 94 raise e
95 finally: cb_handler.on_train_end(exception)
96
~/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
80 cb_handler.on_epoch_begin()
81
---> 82 for xb,yb in progress_bar(data.train_dl, parent=pbar):
83 xb, yb = cb_handler.on_batch_begin(xb, yb)
84 loss = loss_batch(model, xb, yb, loss_func, opt, cb_handler)
~/anaconda3/lib/python3.6/site-packages/fastprogress/fastprogress.py in __iter__(self)
63 self.update(0)
64 try:
---> 65 for i,o in enumerate(self._gen):
66 yield o
67 if self.auto_update: self.update(i+1)
~/anaconda3/lib/python3.6/site-packages/fastai/basic_data.py in __iter__(self)
45 def __iter__(self):
46 "Process and returns items from `DataLoader`."
---> 47 for b in self.dl:
48 y = b[1][0] if is_listy(b[1]) else b[1]
49 if not self.skip_size1 or y.size(0) != 1: yield self.proc_batch(b)
~/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
635 self.reorder_dict[idx] = batch
636 continue
--> 637 return self._process_next_batch(batch)
638
639 next = __next__ # Python 2 compatibility
~/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_next_batch(self, batch)
656 self._put_indices()
657 if isinstance(batch, ExceptionWrapper):
--> 658 raise batch.exc_type(batch.exc_msg)
659 return batch
660
RuntimeError: Traceback (most recent call last):
File "/home/paperspace/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
samples = collate_fn([dataset[i] for i in batch_indices])
File "/home/paperspace/anaconda3/lib/python3.6/site-packages/fastai/torch_core.py", line 94, in data_collate
return torch.utils.data.dataloader.default_collate(to_data(batch))
File "/home/paperspace/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 232, in default_collate
return [default_collate(samples) for samples in transposed]
File "/home/paperspace/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 232, in <listcomp>
return [default_collate(samples) for samples in transposed]
File "/home/paperspace/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 209, in default_collate
return torch.stack(batch, 0, out=out)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 1 and 0 in dimension 1 at /opt/conda/conda-bld/pytorch-nightly_1542272393570/work/aten/src/TH/generic/THTensorMoreMath.cpp:1319