How to extract training/validation sets from dataloader

hi,

I would like to extract the filelists for the training/validation sets from the dataloader defined here:

snd_blk = DataBlock(blocks=(ImageBlock, CategoryBlock),
                splitter=splitter,
               get_x=ColReader(0, pref=Exp_),
               get_y=ColReader(1),
               item_tfms=Resize(854))

where Exp_ = Path("…")

So to view the training and validation sets I tried:

path = get_files(Exp_)

then

dsets = snd_blk.datasets(path)

but I get the error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-30-69c2a82ebb82> in <module>
----> 1 dsets = snd_blk.datasets(path)

~/miniconda3/envs/Massage/lib/python3.8/site-packages/fastai/data/block.py in datasets(self, source, verbose)
    108         splits = (self.splitter or RandomSplitter())(items)
    109         pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
--> 110         return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
    111 
    112     def dataloaders(self, source, path='.', verbose=False, **kwargs):

~/miniconda3/envs/Massage/lib/python3.8/site-packages/fastai/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
    327     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    328         super().__init__(dl_type=dl_type)
--> 329         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    330         self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    331 

~/miniconda3/envs/Massage/lib/python3.8/site-packages/fastai/data/core.py in <listcomp>(.0)
    327     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    328         super().__init__(dl_type=dl_type)
--> 329         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    330         self.n_inp = ifnone(n_inp, max(1, len(self.tls)-1))
    331 

~/miniconda3/envs/Massage/lib/python3.8/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     95     def __call__(cls, x=None, *args, **kwargs):
     96         if not args and not kwargs and x is not None and isinstance(x,cls): return x
---> 97         return super().__call__(x, *args, **kwargs)
     98 
     99 # Cell

~/miniconda3/envs/Massage/lib/python3.8/site-packages/fastai/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose, dl_type)
    253         if do_setup:
    254             pv(f"Setting up {self.tfms}", verbose)
--> 255             self.setup(train_setup=train_setup)
    256 
    257     def _new(self, items, split_idx=None, **kwargs):

~/miniconda3/envs/Massage/lib/python3.8/site-packages/fastai/data/core.py in setup(self, train_setup)
    275             for f in self.tfms.fs:
    276                 self.types.append(getattr(f, 'input_types', type(x)))
--> 277                 x = f(x)
    278             self.types.append(type(x))
    279         types = L(t if is_listy(t) else [t] for t in self.types).concat().unique()

~/miniconda3/envs/Massage/lib/python3.8/site-packages/fastai/data/transforms.py in __call__(self, o, **kwargs)
    201 
    202     def __call__(self, o, **kwargs):
--> 203         if len(self.cols) == 1: return self._do_one(o, self.cols[0])
    204         return L(self._do_one(o, c) for c in self.cols)
    205 

~/miniconda3/envs/Massage/lib/python3.8/site-packages/fastai/data/transforms.py in _do_one(self, r, c)
    195 
    196     def _do_one(self, r, c):
--> 197         o = r[c] if isinstance(c, int) else r[c] if c=='name' or c=='cat' else getattr(r, c)
    198         if len(self.pref)==0 and len(self.suff)==0 and self.label_delim is None: return o
    199         if self.label_delim is None: return f'{self.pref}{o}{self.suff}'

TypeError: 'PosixPath' object is not subscriptable

Is there something I am doing wrong or a better to way to extract this information?

Cheers

Hi Robert!

not sure about why your way isn’t working, what I usually do is once I have constructed the dataloader, is dls.train.items or dls.valid.items to get either a list of paths with what’s in each set if the labels were fetched from the folder names, or a pandas dataframe with name/class if the labels were fetched from a dataframe.

Hope that helps,

K.