Examples / cifar failing

gerardo · October 12, 2018, 4:44am

I don’t know why is not showing the GPUs but they are working on the different examples.

platform   : Linux-4.15.0-36-generic-x86_64-with-debian-buster-sid
distro     : Ubuntu 18.04 Bionic Beaver
python     : 3.7.0
fastai     : 1.0.6.dev0
torch      : 1.0.0.dev20181011
nvidia dr. : 390.77
torch cuda : Not available
torch cuda : 9.2.148
nvcc  cuda : Unknown
torch gpus

learn = Learner(data, wrn_22(), metrics=accuracy).to_fp16()
learn.fit_one_cycle(30, 3e-3, wd=0.4, div_factor=10, pct_start=0.5)

RuntimeError Traceback (most recent call last)
in ()
1 learn = Learner(data, wrn_22(), metrics=accuracy).to_fp16()
----> 2 learn.fit_one_cycle(30, 3e-3, wd=0.4, div_factor=10, pct_start=0.5)

~/fastai/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, wd, **kwargs)
16 cbs = [OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor,
17 pct_start=pct_start, **kwargs)]
—> 18 learn.fit(cyc_len, max_lr, wd=wd, callbacks=cbs)
19
20 def lr_find(learn:Learner, start_lr:Floats=1e-5, end_lr:Floats=10, num_it:int=100, **kwargs:Any):

~/fastai/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
134 callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
135 fit(epochs, self.model, self.loss_fn, opt=self.opt, data=self.data, metrics=self.metrics,
–> 136 callbacks=self.callbacks+callbacks)
137
138 def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/fastai/fastai/basic_train.py in fit(epochs, model, loss_fn, opt, data, callbacks, metrics)
87 except Exception as e:
88 exception = e
—> 89 raise e
90 finally: cb_handler.on_train_end(exception)
91

~/fastai/fastai/basic_train.py in fit(epochs, model, loss_fn, opt, data, callbacks, metrics)
77 for xb,yb in progress_bar(data.train_dl, parent=pbar):
78 xb, yb = cb_handler.on_batch_begin(xb, yb)
—> 79 loss = loss_batch(model, xb, yb, loss_fn, opt, cb_handler)
80 if cb_handler.on_batch_end(loss): break
81

~/fastai/fastai/basic_train.py in loss_batch(model, xb, yb, loss_fn, opt, cb_handler, metrics)
16 if not is_listy(xb): xb = [xb]
17 if not is_listy(yb): yb = [yb]
—> 18 out = model(*xb)
19 out = cb_handler.on_loss_begin(out)
20 if not loss_fn: return out.detach(),yb[0].detach()

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
–> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)

~/fastai/fastai/vision/models/wrn.py in forward(self, x)
49 self.features = nn.Sequential(*layers)
50
—> 51 def forward(self, x): return self.features(x)
52
53

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
–> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
90 def forward(self, input):
91 for module in self._modules.values():
—> 92 input = module(input)
93 return input
94

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
–> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
311 def forward(self, input):
312 return F.conv2d(input, self.weight, self.bias, self.stride,
–> 313 self.padding, self.dilation, self.groups)
314
315

RuntimeError: thnn_conv2d_forward is not implemented for type torch.HalfTensor

jeremy · October 12, 2018, 12:51pm

Remove the to_fp16. Since your GPU isn’t working, it’s using CPU, which doesn’t support fp16.

gerardo · October 12, 2018, 2:01pm

I was running NVidia-smi -l and I clearly saw the CUDA % went to 100% but the memory was only 200 MB.

Why is not picking up the GPUs?
I have everything installed without any error.
Is this because of the nightly build from pytorch?

DrHB · October 12, 2018, 3:48pm

You might have GPU but you should check if torch is using it by using this command.
python -c 'import torch; print(torch.cuda.is_available())' this should return True

Edit: Jeremy is right in your torch is not using CUDA drivers (From the screenshot which you provided )

torch cuda : Not available

Maybe you have to reinstall from scratch…