I’m having issues when loading the WT103 language model, specifically when I run learn.lr_find()
I get an CUDA error: out of memory
. Is it possible to use a smaller language model? I believe this one has vocabulary of few millions tokens!
RuntimeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
93 exception = e
---> 94 raise e
95 finally: cb_handler.on_train_end(exception)
/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
83 xb, yb = cb_handler.on_batch_begin(xb, yb)
---> 84 loss = loss_batch(model, xb, yb, loss_func, opt, cb_handler)
85 if cb_handler.on_batch_end(loss): break
/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py in loss_batch(model, xb, yb, loss_func, opt, cb_handler)
17 if not is_listy(yb): yb = [yb]
---> 18 out = model(*xb)
19 out = cb_handler.on_loss_begin(out)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
478 else:
--> 479 result = self.forward(*input, **kwargs)
480 for hook in self._forward_hooks.values():
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/container.py in forward(self, input)
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
478 else:
--> 479 result = self.forward(*input, **kwargs)
480 for hook in self._forward_hooks.values():
/usr/local/lib/python3.6/dist-packages/fastai/text/models.py in forward(self, input)
148 output = self.output_dp(outputs[-1])
--> 149 decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
150 return decoded, raw_outputs, outputs
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
478 else:
--> 479 result = self.forward(*input, **kwargs)
480 for hook in self._forward_hooks.values():
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py in forward(self, input)
62 def forward(self, input):
---> 63 return F.linear(input, self.weight, self.bias)
64
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1160 # fused op is marginally faster
-> 1161 return torch.addmm(bias, input, weight.t())
1162
RuntimeError: CUDA error: out of memory