Datablock with numpy input

Hi guys
How do I create a Data_block from a PyTorch dataset?
Example below
Thanks!

from torch.utils.data import Dataset
import numpy as np


class NumpyDataset(Dataset):
    def __init__(self, x, y=None):
        self.x = x
        self.y = y
        self.is_train = y is None

    def __len__(self):
        return len(self.x.shape[0])

    def __getitem__(self, idx):
        # Additional feature derival
        if self.is_train:
            return self.x[idx], self.y[idx]
        else:
            return self.x[idx]

x = np.random.normal(10, 2, (100, 30, 5))
y = np.random.normal(10, 2, (100, 3))

dataset = NumpyDataset(x=x, y=y)

# how to setup a DataBlock

You don’t. You can create a DataLoaders object from regular PyTorch datasets (though all the visualization methods like show_batch and show_results will fail).

Oh. That make sense. Thanks!

And how about predicting after the model is trainer?
E.g.

# Learner trained

dataset_test = NumpyDataset(x=x_test)

learn.predict(dataset_test)

Predict expects a single input. For multiple you should build a DataLoader and pass that into get_preds. Or pass an individual NumPy row/input into predict.

Hmm okay.Thanks.

I dont think that passing a Dataloader works? At least I could not get it to work.
It gave an error with

for met in mets: met.accumulate(self.learn)
test_eq(len(inp), len(targ))

which is weird since I dont supply y, I just want to predict :slight_smile:

I actually cannot pass the numpy array to predict either.

learn.get_preds(x_test.numpy())
learn.predict(x_test.numpy())

this gives me the error:

return self.valid.new(test_ds, **kwargs)
AttributeError: 'DataLoader' object has no attribute 'new'

In general, please provide the whole stack trace and not the end of it, as it’s more helpful for people who try to understand your problem. Here your last error suggests you are not using fastai’s DataLoader but Pytorch

Okay. Thanks.

If I am creating my own Pytorch Dataset and set it up with fastai’s DataLoader, and try and predict with it I get the following:

Traceback (most recent call last):
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-23-ecabe716ca35>", line 1, in <module>
    learn.get_preds(dl_test)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 195, in get_preds
    if dl is None: dl = self.dls[ds_idx].new(shuffled=False, drop_last=False)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/data/core.py", line 117, in __getitem__
    def __getitem__(self, i): return self.loaders[i]
TypeError: tuple indices must be integers or slices, not DataLoader

It works fine with setting up my own Dataloader for the training :slight_smile:

You need to pass your dataloader as a keyword argument: dl=dl_test. The first argument is the dataset index.

learn.get_preds(dl=dl_test)

Traceback (most recent call last):
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 133, in one_batch
    self.pred = self.model(*self.xb);                self('after_pred')
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
TypeError: forward() takes 2 positional arguments but 512 were given
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-28-16483adb6db3>", line 1, in <module>
    learn.get_preds(dl=dl_test)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 203, in get_preds
    self._do_epoch_validate(dl=dl)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 159, in _do_epoch_validate
    with torch.no_grad(): self.all_batches()
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 127, in all_batches
    for o in enumerate(self.dl): self.one_batch(*o)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 141, in one_batch
    finally:                                             self('after_batch')
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 108, in __call__
    def __call__(self, event_name): L(event_name).map(self._call_one)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 362, in map
    return self._new(map(g, self))
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 315, in _new
    def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 41, in __call__
    res = super().__call__(*((x,) + args), **kwargs)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 306, in __init__
    items = list(items) if use_list else _listify(items)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 242, in _listify
    if is_iter(o): return list(o)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastcore/foundation.py", line 208, in __call__
    return self.fn(*fargs, **kwargs)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 111, in _call_one
    [cb(event_name) for cb in sort_by_run(self.cbs)]
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 111, in <listcomp>
    [cb(event_name) for cb in sort_by_run(self.cbs)]
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/callback/core.py", line 23, in __call__
    if self.run and _run: getattr(self, event_name, noop)()
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/learner.py", line 389, in after_batch
    for met in mets: met.accumulate(self.learn)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/metrics.py", line 35, in accumulate
    if self.flatten: pred,targ = flatten_check(pred,targ)
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastai2/torch_core.py", line 765, in flatten_check
    test_eq(len(inp), len(targ))
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastcore/test.py", line 32, in test_eq
    test(a,b,equals, '==')
  File "/Users/rickoclausen/Library/Caches/pypoetry/virtualenvs/gxf8OsH2-py3.7/lib/python3.7/site-packages/fastcore/test.py", line 22, in test
    assert cmp(a,b),f"{cname}:\n{a}\n{b}"
AssertionError: ==:
7784
232

It looks like your dataloader does not return a tuple. Even if you have no targets, it should return a tuple of size 1.

1 Like