I’m trying to train a GLOC detector (pilots passing out) and I’m putting together an ensemble of models as a kind of brute-force thing. I’m having trouble getting the VGG16 model to work. I resize my training images from 100 to 200 to fullsize 400. If my batch size is too small, I get a Value Error (bottom of post).
ValueError: Expected more than 1 value per channel when training, got input size [1, 1024]
At size=200
that threshold is around a batch size of 14. The issue in practice is I don’t have enough memory on my workstation (3 GB) to train 400x400 res images on a VGG16 model at a batch size greater than 14.
I’m thinking of just resizing images to the VGG-standard 224x224 for the ensemble…
But I’m trying to find out why this error is happening. As I understand it the FastAI implementation of VGG16BN is a modified version using only the convolutional layers… so it should be input-size agnostic… I’ll have to look into the code.
While I’m at, I noticed another strange thing: my test dataset, learner.data.test_ds.fnames
would always be missing one file from my test folder. And I’m pretty sure the same one each time. I haven’t tested why yet. It hasn’t screwed up any Kaggle submissions so that hasn’t been happening then.
The error report:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<timed eval> in <module>()
~/Aersu/GLOC/fastai_lin/learner.py in fit(self, lrs, n_cycle, wds, **kwargs)
211 self.sched = None
212 layer_opt = self.get_layer_opt(lrs, wds)
--> 213 self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)
214
215 def lr_find(self, start_lr=1e-5, end_lr=10, wds=None):
~/Aersu/GLOC/fastai_lin/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, metrics, callbacks, use_wd_sched, norm_wds, wds_sched_mult, **kwargs)
158 n_epoch = sum_geom(cycle_len if cycle_len else 1, cycle_mult, n_cycle)
159 fit(model, data, n_epoch, layer_opt.opt, self.crit,
--> 160 metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, **kwargs)
161
162 def get_layer_groups(self): return self.models.get_layer_groups()
~/Aersu/GLOC/fastai_lin/model.py in fit(model, data, epochs, opt, crit, metrics, callbacks, **kwargs)
84 batch_num += 1
85 for cb in callbacks: cb.on_batch_begin()
---> 86 loss = stepper.step(V(x),V(y))
87 avg_loss = avg_loss * avg_mom + loss * (1-avg_mom)
88 debias_loss = avg_loss / (1 - avg_mom**batch_num)
~/Aersu/GLOC/fastai_lin/model.py in step(self, xs, y)
38 def step(self, xs, y):
39 xtra = []
---> 40 output = self.m(*xs)
41 if isinstance(output,(tuple,list)): output,*xtra = output
42 self.opt.zero_grad()
~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/container.py in forward(self, input)
65 def forward(self, input):
66 for module in self._modules.values():
---> 67 input = module(input)
68 return input
69
~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323 for hook in self._forward_pre_hooks.values():
324 hook(self, input)
--> 325 result = self.forward(*input, **kwargs)
326 for hook in self._forward_hooks.values():
327 hook_result = hook(self, input, result)
~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/batchnorm.py in forward(self, input)
35 return F.batch_norm(
36 input, self.running_mean, self.running_var, self.weight, self.bias,
---> 37 self.training, self.momentum, self.eps)
38
39 def __repr__(self):
~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/functional.py in batch_norm(input, running_mean, running_var, weight, bias, training, momentum, eps)
1009 size = list(input.size())
1010 if reduce(mul, size[2:], size[0]) == 1:
-> 1011 raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
1012 f = torch._C._functions.BatchNorm(running_mean, running_var, training, momentum, eps, torch.backends.cudnn.enabled)
1013 return f(input, weight, bias)
ValueError: Expected more than 1 value per channel when training, got input size [1, 1024]