Examples / cifar failing

I don’t know why is not showing the GPUs but they are working on the different examples.

platform   : Linux-4.15.0-36-generic-x86_64-with-debian-buster-sid
distro     : Ubuntu 18.04 Bionic Beaver
python     : 3.7.0
fastai     : 1.0.6.dev0
torch      : 1.0.0.dev20181011
nvidia dr. : 390.77
torch cuda : Not available
torch cuda : 9.2.148
nvcc  cuda : Unknown
torch gpus 

learn = Learner(data, wrn_22(), metrics=accuracy).to_fp16()
learn.fit_one_cycle(30, 3e-3, wd=0.4, div_factor=10, pct_start=0.5)


RuntimeError Traceback (most recent call last)
in ()
1 learn = Learner(data, wrn_22(), metrics=accuracy).to_fp16()
----> 2 learn.fit_one_cycle(30, 3e-3, wd=0.4, div_factor=10, pct_start=0.5)

~/fastai/fastai/train.py in fit_one_cycle(learn, cyc_len, max_lr, moms, div_factor, pct_start, wd, **kwargs)
16 cbs = [OneCycleScheduler(learn, max_lr, moms=moms, div_factor=div_factor,
17 pct_start=pct_start, **kwargs)]
—> 18 learn.fit(cyc_len, max_lr, wd=wd, callbacks=cbs)
19
20 def lr_find(learn:Learner, start_lr:Floats=1e-5, end_lr:Floats=10, num_it:int=100, **kwargs:Any):

~/fastai/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
134 callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
135 fit(epochs, self.model, self.loss_fn, opt=self.opt, data=self.data, metrics=self.metrics,
–> 136 callbacks=self.callbacks+callbacks)
137
138 def create_opt(self, lr:Floats, wd:Floats=0.)->None:

~/fastai/fastai/basic_train.py in fit(epochs, model, loss_fn, opt, data, callbacks, metrics)
87 except Exception as e:
88 exception = e
—> 89 raise e
90 finally: cb_handler.on_train_end(exception)
91

~/fastai/fastai/basic_train.py in fit(epochs, model, loss_fn, opt, data, callbacks, metrics)
77 for xb,yb in progress_bar(data.train_dl, parent=pbar):
78 xb, yb = cb_handler.on_batch_begin(xb, yb)
—> 79 loss = loss_batch(model, xb, yb, loss_fn, opt, cb_handler)
80 if cb_handler.on_batch_end(loss): break
81

~/fastai/fastai/basic_train.py in loss_batch(model, xb, yb, loss_fn, opt, cb_handler, metrics)
16 if not is_listy(xb): xb = [xb]
17 if not is_listy(yb): yb = [yb]
—> 18 out = model(*xb)
19 out = cb_handler.on_loss_begin(out)
20 if not loss_fn: return out.detach(),yb[0].detach()

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
–> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)

~/fastai/fastai/vision/models/wrn.py in forward(self, x)
49 self.features = nn.Sequential(*layers)
50
—> 51 def forward(self, x): return self.features(x)
52
53

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
–> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
90 def forward(self, input):
91 for module in self._modules.values():
—> 92 input = module(input)
93 return input
94

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
–> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)

~/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
311 def forward(self, input):
312 return F.conv2d(input, self.weight, self.bias, self.stride,
–> 313 self.padding, self.dilation, self.groups)
314
315

RuntimeError: thnn_conv2d_forward is not implemented for type torch.HalfTensor

1 Like

Remove the to_fp16. Since your GPU isn’t working, it’s using CPU, which doesn’t support fp16.

2 Likes

I was running NVidia-smi -l and I clearly saw the CUDA % went to 100% but the memory was only 200 MB.

Why is not picking up the GPUs?
I have everything installed without any error.
Is this because of the nightly build from pytorch?

You might have GPU but you should check if torch is using it by using this command.
python -c 'import torch; print(torch.cuda.is_available())' this should return True

Edit: Jeremy is right in your torch is not using CUDA drivers (From the screenshot which you provided )

torch cuda : Not available

Maybe you have to reinstall from scratch…