CPU Memory Error in superres inference

Hi,

I am trying to make a super-resolution app, and managed to get something running (code on github), and it worked when I was sending small images but now that I’m trying to send pictures from my smartphone, it runs out of memory:

  File "/usr/local/lib/python3.7/site-packages/uvicorn/protocols/http/httptools_impl.py", line 385, in run_asgi
    result = await app(self.scope, self.receive, self.send)
  File "/usr/local/lib/python3.7/site-packages/uvicorn/middleware/proxy_headers.py", line 45, in __call__
    return await self.app(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/fastapi/applications.py", line 149, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/applications.py", line 102, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/middleware/errors.py", line 181, in __call__
    raise exc from None
  File "/usr/local/lib/python3.7/site-packages/starlette/middleware/errors.py", line 159, in __call__
    await self.app(scope, receive, _send)
  File "/usr/local/lib/python3.7/site-packages/starlette/middleware/cors.py", line 76, in __call__
    await self.app(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/exceptions.py", line 82, in __call__
    raise exc from None
  File "/usr/local/lib/python3.7/site-packages/starlette/exceptions.py", line 71, in __call__
    await self.app(scope, receive, sender)
  File "/usr/local/lib/python3.7/site-packages/starlette/routing.py", line 550, in __call__
    await route.handle(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/routing.py", line 227, in handle
    await self.app(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/routing.py", line 41, in app
    response = await func(request)
  File "/usr/local/lib/python3.7/site-packages/fastapi/routing.py", line 148, in app
    dependant=dependant, values=values, is_coroutine=is_coroutine
  File "/usr/local/lib/python3.7/site-packages/fastapi/routing.py", line 103, in run_endpoint_function
    return await run_in_threadpool(dependant.call, **values)
  File "/usr/local/lib/python3.7/site-packages/starlette/concurrency.py", line 34, in run_in_threadpool
    return await loop.run_in_executor(None, func, *args)
  File "/usr/local/lib/python3.7/concurrent/futures/thread.py", line 57, in run
    result = self.fn(*self.args, **self.kwargs)
  File "app/server.py", line 107, in img2img
    pred = learn.predict(img_bytes)
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 229, in predict
    inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 217, in get_preds
    self._do_epoch_validate(dl=dl)
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 173, in _do_epoch_validate
    with torch.no_grad(): self.all_batches()
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 141, in all_batches
    for o in enumerate(self.dl): self.one_batch(*o)
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 147, in one_batch
    self.pred = self.model(*self.xb);                self('after_pred')
  File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/fastai2/layers.py", line 470, in forward
    nres = l(res)
  File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/fastai2/vision/models/unet.py", line 41, in forward
    return self.conv2(self.conv1(cat_x))
  File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/container.py", line 100, in forward
    input = module(input)
  File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 345, in forward
    return self.conv2d_forward(input, self.weight)
  File "/usr/local/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 342, in conv2d_forward
    self.padding, self.dilation, self.groups)
RuntimeError: [enforce fail at CPUAllocator.cpp:64] . DefaultCPUAllocator: can't allocate memory: you tried to allocate 780337152 bytes. Error code 12 (Cannot allocate memory)

I thought it was because I was running on a low-cost AWS EC2 instance, so I tried to run on a m5.2xlarge (8 vCPU, 32 GB RAM) instance, but got the same error. Sometimes. I also get a different error for some reason: (but small images always work fine):

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/site-packages/uvicorn/protocols/http/httptools_impl.py", line 385, in run_asgi
    result = await app(self.scope, self.receive, self.send)
  File "/usr/local/lib/python3.7/site-packages/uvicorn/middleware/proxy_headers.py", line 45, in __call__
    return await self.app(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/fastapi/applications.py", line 149, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/applications.py", line 102, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/middleware/errors.py", line 181, in __call__
    raise exc from None
  File "/usr/local/lib/python3.7/site-packages/starlette/middleware/errors.py", line 159, in __call__
    await self.app(scope, receive, _send)
  File "/usr/local/lib/python3.7/site-packages/starlette/middleware/cors.py", line 76, in __call__
    await self.app(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/exceptions.py", line 82, in __call__
    raise exc from None
  File "/usr/local/lib/python3.7/site-packages/starlette/exceptions.py", line 71, in __call__
    await self.app(scope, receive, sender)
  File "/usr/local/lib/python3.7/site-packages/starlette/routing.py", line 550, in __call__
    await route.handle(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/routing.py", line 227, in handle
    await self.app(scope, receive, send)
  File "/usr/local/lib/python3.7/site-packages/starlette/routing.py", line 41, in app
    response = await func(request)
  File "/usr/local/lib/python3.7/site-packages/fastapi/routing.py", line 148, in app
    dependant=dependant, values=values, is_coroutine=is_coroutine
  File "/usr/local/lib/python3.7/site-packages/fastapi/routing.py", line 103, in run_endpoint_function
    return await run_in_threadpool(dependant.call, **values)
  File "/usr/local/lib/python3.7/site-packages/starlette/concurrency.py", line 34, in run_in_threadpool
    return await loop.run_in_executor(None, func, *args)
  File "/usr/local/lib/python3.7/concurrent/futures/thread.py", line 57, in run
    result = self.fn(*self.args, **self.kwargs)
  File "app/server.py", line 107, in img2img
    pred = learn.predict(img_bytes)
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 229, in predict
    inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 217, in get_preds
    self._do_epoch_validate(dl=dl)
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 176, in _do_epoch_validate
    dl,*_ = change_attrs(dl, names, old, has);       self('after_validate')
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 122, in __call__
    def __call__(self, event_name): L(event_name).map(self._call_one)
  File "/usr/local/lib/python3.7/site-packages/fastcore/foundation.py", line 362, in map
    return self._new(map(g, self))
  File "/usr/local/lib/python3.7/site-packages/fastcore/foundation.py", line 315, in _new
    def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/fastcore/foundation.py", line 41, in __call__
    res = super().__call__(*((x,) + args), **kwargs)
  File "/usr/local/lib/python3.7/site-packages/fastcore/foundation.py", line 306, in __init__
    items = list(items) if use_list else _listify(items)
  File "/usr/local/lib/python3.7/site-packages/fastcore/foundation.py", line 242, in _listify
    if is_iter(o): return list(o)
  File "/usr/local/lib/python3.7/site-packages/fastcore/foundation.py", line 208, in __call__
    return self.fn(*fargs, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 125, in _call_one
    [cb(event_name) for cb in sort_by_run(self.cbs)]
  File "/usr/local/lib/python3.7/site-packages/fastai2/learner.py", line 125, in <listcomp>
    [cb(event_name) for cb in sort_by_run(self.cbs)]
  File "/usr/local/lib/python3.7/site-packages/fastai2/callback/core.py", line 23, in __call__
    if self.run and _run: getattr(self, event_name, noop)()
  File "/usr/local/lib/python3.7/site-packages/fastai2/callback/core.py", line 95, in after_validate
    if not self.save_preds: self.preds   = detuplify(to_concat(self.preds, dim=self.concat_dim))
  File "/usr/local/lib/python3.7/site-packages/fastai2/torch_core.py", line 213, in to_concat
    if is_listy(xs[0]): return type(xs[0])([to_concat([x[i] for x in xs], dim=dim) for i in range_of(xs[0])])
IndexError: list index out of range

Does someone have any idea why I have these errors?

Thanks!

Not sure, but maybe you ran a command with numbers you haven’t yet reached. Remember, if you have 200 items, then the index would be 199, because the first is 0. Hope I helped.