Deploying Deep learning models to production

Is there a course/good resource on how to deploy deep learning models (how to serve it to production)?

I know how to create a flask REST API but I don’t know anything about Docker, Kubernetes, Microservices, etc.

2 Likes

Since you know how to use flask and make REST-apis, you could run your flask service in gunicorn for e.g.

You dont have to use Docker.

1 Like

We’ve deployed ULMFiT in production using endpoints through Amazon’s SageMaker. It does require that you have some familiarity with Docker to get it set up, so it may not be the easiest option for you.

2 Likes

Hey Matthew,

I was able to deploy ULMFiT on Amazon’s SageMaker using Docker container with fastai v1.0.59 and pytorch v1.1.0. While I am able to deploy locally and to a ml.m4.xlarge instance through a Sagemaker notebook and make predictions using learner.predict() I cant seem to get learner.get_preds() to work. I get the following error:

> algo-1-d31c1_1  | 2019-12-03 00:09:30,780 serve_ULMFiT INFO     Calling model
> [2019-12-03 00:09:31 +0000] [98] [INFO] Parent changed, shutting down: <Worker 42>
> algo-1-d31c1_1  | [2019-12-03 00:09:31 +0000] [98] [INFO] Worker exiting (pid: 42)
> algo-1-d31c1_1  | [2019-12-03 00:09:31 +0000] [99] [INFO] Parent changed, shutting down: <Worker 42>
> algo-1-d31c1_1  | [2019-12-03 00:09:31 +0000] [99] [INFO] Worker exiting (pid: 42)
> [2019-12-03 00:09:31 +0000] [96] [INFO] Parent changed, shutting down: <Worker 42>
> algo-1-d31c1_1  | [2019-12-03 00:09:31 +0000] [96] [INFO] Worker exiting (pid: 42)
> algo-1-d31c1_1  | 2019-12-03 00:09:31,551 serve_ULMFiT ERROR    Exception on /invocations [POST]
> algo-1-d31c1_1  | Traceback (most recent call last):
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/sagemaker_containers/_functions.py", line 85, in wrapper
> algo-1-d31c1_1  |     return fn(*args, **kwargs)
> algo-1-d31c1_1  |   File "/opt/ml/code/serve_ULMFiT.py", line 43, in predict_fn
> algo-1-d31c1_1  |     preds_all = model.get_preds(DatasetType.Test, ordered = True)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/text/learner.py", line 88, in get_preds
> algo-1-d31c1_1  |     preds = super().get_preds(ds_type=ds_type, activ=activ, with_loss=with_loss, n_batch=n_batch, pbar=pbar)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py", line 339, in get_preds
> algo-1-d31c1_1  |     activ=activ, loss_func=lf, n_batch=n_batch, pbar=pbar)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py", line 44, in get_preds
> algo-1-d31c1_1  |     zip(*validate(model, dl, cb_handler=cb_handler, pbar=pbar, average=False, n_batch=n_batch))]
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py", line 57, in validate
> algo-1-d31c1_1  |     for xb,yb in progress_bar(dl, parent=pbar, leave=(pbar is not None)):
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastprogress/fastprogress.py", line 72, in __iter__
> algo-1-d31c1_1  |     for i,o in enumerate(self._gen):
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/basic_data.py", line 75, in __iter__
> algo-1-d31c1_1  |     for b in self.dl: yield self.proc_batch(b)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 576, in __next__
> algo-1-d31c1_1  |     idx, batch = self._get_batch()
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 553, in _get_batch
> algo-1-d31c1_1  |     success, data = self._try_get_batch()
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 511, in _try_get_batch
> algo-1-d31c1_1  |     data = self.data_queue.get(timeout=timeout)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/queues.py", line 113, in get
> algo-1-d31c1_1  |     return _ForkingPickler.loads(res)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/torch/multiprocessing/reductions.py", line 276, in rebuild_storage_fd
> algo-1-d31c1_1  |     fd = df.detach()
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/resource_sharer.py", line 57, in detach
> algo-1-d31c1_1  |     with _resource_sharer.get_connection(self._id) as conn:
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/resource_sharer.py", line 87, in get_connection
> algo-1-d31c1_1  |     c = Client(address, authkey=process.current_process().authkey)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 493, in Client
> algo-1-d31c1_1  |     answer_challenge(c, authkey)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 732, in answer_challenge
> algo-1-d31c1_1  |     message = connection.recv_bytes(256)         # reject large message
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
> algo-1-d31c1_1  |     buf = self._recv_bytes(maxlength)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
> algo-1-d31c1_1  |     buf = self._recv(4)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
> algo-1-d31c1_1  |     chunk = read(handle, remaining)
> algo-1-d31c1_1  | BlockingIOError: [Errno 11] Resource temporarily unavailable
> algo-1-d31c1_1  | 
> algo-1-d31c1_1  | During handling of the above exception, another exception occurred:
> algo-1-d31c1_1  | 
> algo-1-d31c1_1  | Traceback (most recent call last):
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 2446, in wsgi_app
> algo-1-d31c1_1  |     response = self.full_dispatch_request()
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1951, in full_dispatch_request
> algo-1-d31c1_1  |     rv = self.handle_user_exception(e)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1820, in handle_user_exception
> algo-1-d31c1_1  |     reraise(exc_type, exc_value, tb)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/flask/_compat.py", line 39, in reraise
> algo-1-d31c1_1  |     raise value
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1949, in full_dispatch_request
> algo-1-d31c1_1  |     rv = self.dispatch_request()
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1935, in dispatch_request
> algo-1-d31c1_1  |     return self.view_functions[rule.endpoint](**req.view_args)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/sagemaker_containers/_transformer.py", line 171, in transform
> algo-1-d31c1_1  |     result = self._transform_fn(self._model, request.content, request.content_type, request.accept)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/sagemaker_containers/_transformer.py", line 201, in _default_transform_fn
> algo-1-d31c1_1  |     prediction = self._predict_fn(data, model)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/sagemaker_containers/_functions.py", line 87, in wrapper
> algo-1-d31c1_1  |     six.reraise(error_class, error_class(e), sys.exc_info()[2])
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/six.py", line 692, in reraise
> algo-1-d31c1_1  |     raise value.with_traceback(tb)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/sagemaker_containers/_functions.py", line 85, in wrapper
> algo-1-d31c1_1  |     return fn(*args, **kwargs)
> algo-1-d31c1_1  |   File "/opt/ml/code/serve_ULMFiT.py", line 43, in predict_fn
> algo-1-d31c1_1  |     preds_all = model.get_preds(DatasetType.Test, ordered = True)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/text/learner.py", line 88, in get_preds
> algo-1-d31c1_1  |     preds = super().get_preds(ds_type=ds_type, activ=activ, with_loss=with_loss, n_batch=n_batch, pbar=pbar)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py", line 339, in get_preds
> algo-1-d31c1_1  |     activ=activ, loss_func=lf, n_batch=n_batch, pbar=pbar)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py", line 44, in get_preds
> algo-1-d31c1_1  |     zip(*validate(model, dl, cb_handler=cb_handler, pbar=pbar, average=False, n_batch=n_batch))]
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/basic_train.py", line 57, in validate
> algo-1-d31c1_1  |     for xb,yb in progress_bar(dl, parent=pbar, leave=(pbar is not None)):
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastprogress/fastprogress.py", line 72, in __iter__
> algo-1-d31c1_1  |     for i,o in enumerate(self._gen):
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/fastai/basic_data.py", line 75, in __iter__
> algo-1-d31c1_1  |     for b in self.dl: yield self.proc_batch(b)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 576, in __next__
> algo-1-d31c1_1  |     idx, batch = self._get_batch()
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 553, in _get_batch
> algo-1-d31c1_1  |     success, data = self._try_get_batch()
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 511, in _try_get_batch
> algo-1-d31c1_1  |     data = self.data_queue.get(timeout=timeout)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/queues.py", line 113, in get
> algo-1-d31c1_1  |     return _ForkingPickler.loads(res)
> algo-1-d31c1_1  |   File "/usr/local/lib/python3.6/dist-packages/torch/multiprocessing/reductions.py", line 276, in rebuild_storage_fd
> algo-1-d31c1_1  |     fd = df.detach()
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/resource_sharer.py", line 57, in detach
> algo-1-d31c1_1  |     with _resource_sharer.get_connection(self._id) as conn:
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/resource_sharer.py", line 87, in get_connection
> algo-1-d31c1_1  |     c = Client(address, authkey=process.current_process().authkey)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 493, in Client
> algo-1-d31c1_1  |     answer_challenge(c, authkey)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 732, in answer_challenge
> algo-1-d31c1_1  |     message = connection.recv_bytes(256)         # reject large message
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
> algo-1-d31c1_1  |     buf = self._recv_bytes(maxlength)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
> algo-1-d31c1_1  |     buf = self._recv(4)
> algo-1-d31c1_1  |   File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
> algo-1-d31c1_1  |     chunk = read(handle, remaining)
> algo-1-d31c1_1  | sagemaker_containers._errors.ClientError: [Errno 11] Resource temporarily unavailable[2019-12-03 00:09:31 +0000] [97] [INFO] Parent changed, shutting down: <Worker 42>
> algo-1-d31c1_1  | [2019-12-03 00:09:31 +0000] [97] [INFO] Worker exiting (pid: 42)

Any ideas what could be my problem?