I’m trying to deploy a model onto a flask web server and I’m running into this CUDA error: RuntimeError: CUDA error: no kernel image is available for execution on the device
Now, I think this error is something to do with the fact that my new RTX 3000 series GPU isn’t supported in some way. However, given that I want to deploy this on a CPU server eventually, I’d like to not use the GPU at all.
I’ve tried setting cpu=True
in load_learner
and calling default_device(None)
. Both have had no effect.
os.environ["CUDA_VISIBLE_DEVICES"] = "
also did nothing.
How do I force usage of CPU everywhere?
Flask server.py
import torch
from fastai.learner import load_learner
from fastai.torch_core import default_device
from flask import Flask
app = Flask(__name__)
# Seems to do nothing...
default_device(None)
class CodeTokenizer():
def __init__(self, split_char=' ', **kwargs): self.split_char = split_char
def __call__(self, items): return (t.split(self.split_char) for t in items)
LEARNER = './models/lang-model-v0-0-1.pkl'
# cpu=True also seems to do nothing
learn = load_learner(LEARNER, cpu=True)
DLS = './models/dls-v0-0-1'
learn.dls = torch.load(DLS)
@app.route('/')
def hello_world():
pred = learn.predict('def check_pydep(importname, module):', n_words=100)
return pred
if __name__ == '__main__':
app.run(debug=True)
Notebook
from fastai.text.all import *
path = Path('data/python'); path.ls()
files = get_files(path); files
bs = 90
sl = 40
cut = int(len(files)*0.8)
splits = [list(range(cut)), list(range(cut,len(files)))]
class CodeTokenizer():
def __init__(self, split_char=' ', **kwargs): self.split_char=split_char
def __call__(self, items): return (t.split(self.split_char) for t in items)
rules = [replace_rep, replace_wrep, spec_add_spaces, replace_all_caps, replace_maj, lowercase]
tfms = [Tokenizer(tok=CodeTokenizer()), Numericalize]
dsets = Datasets(files, [tfms], splits=splits, dl_type=LMDataLoader)
dls = dsets.dataloaders(bs=bs, seq_len=sl)
dls.show_batch(max_n=2)
# Use get_language_model so that there is a decode here
model = get_language_model(AWD_LSTM, len(dls.vocab))
# Create a LMLearner, a subclass of Learner with text callbacks and a useful predict method
learn = LMLearner(dls, model, loss_func=CrossEntropyLossFlat(), metrics=[accuracy])
learn.fit_one_cycle(5, 5e-3, wd=0.1)
learn.predict('def check_pydep(importname, module):', n_words=500)
# Save/load model to avoid retraining when kernel restarts
# learn.save('python-v1-0-0')
# learn.load('python-v1-0-0')
# Export architecture and parameters for deployment
learn.export('models/lang-model-v0-0-1.pkl')
# Export dls for deployment
torch.save(learn.dls, 'models/dls-v0-0-1')