Failed to find "re.compile('^(.*)_\\d+.jpg$')" in "xxx_1500.jpeg"

I saw that Jeremy also got this error: https://www.youtube.com/watch?v=44pe47sB4BI&t=3933s 1h16min is there a solution?

Sometimes it works other not.
I guess it depends on the preparation of the Transform and if we are using a Pipeline.

---------------------------------------------------------------------------
**AssertionError**                            Traceback (most recent call last)
<ipython-input-22-8517c825915f> in <module>
----> 1 dls = dblock.dataloaders(path)
      2 dls.show_batch()

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/block.py in dataloaders(self, source, path, verbose, **kwargs)
     96 
     97     def dataloaders(self, source, path='.', verbose=False, **kwargs):
---> 98         dsets = self.datasets(source)
     99         kwargs = {**self.dls_kwargs, **kwargs, 'verbose': verbose}
    100         return dsets.dataloaders(path=path, after_item=self.item_tfms, after_batch=self.batch_tfms, **kwargs)

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/block.py in datasets(self, source, verbose)
     93         splits = (self.splitter or RandomSplitter())(items)
     94         pv(f"{len(splits)} datasets of sizes {','.join([str(len(s)) for s in splits])}", verbose)
---> 95         return Datasets(items, tfms=self._combine_type_tfms(), splits=splits, dl_type=self.dl_type, n_inp=self.n_inp, verbose=verbose)
     96 
     97     def dataloaders(self, source, path='.', verbose=False, **kwargs):

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in __init__(self, items, tfms, tls, n_inp, dl_type, **kwargs)
    272     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    273         super().__init__(dl_type=dl_type)
--> 274         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    275         self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp
    276 

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in <listcomp>(.0)
    272     def __init__(self, items=None, tfms=None, tls=None, n_inp=None, dl_type=None, **kwargs):
    273         super().__init__(dl_type=dl_type)
--> 274         self.tls = L(tls if tls else [TfmdLists(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])
    275         self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp
    276 

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     39             return x
     40 
---> 41         res = super().__call__(*((x,) + args), **kwargs)
     42         res._newchk = 0
     43         return res

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in __init__(self, items, tfms, use_list, do_setup, split_idx, train_setup, splits, types, verbose)
    212         if do_setup:
    213             pv(f"Setting up {self.tfms}", verbose)
--> 214             self.setup(train_setup=train_setup)
    215 
    216     def _new(self, items, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, types=self.types, **kwargs)

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in setup(self, train_setup)
    226 
    227     def setup(self, train_setup=True):
--> 228         self.tfms.setup(self, train_setup)
    229         if len(self) != 0:
    230             x = super().__getitem__(0) if self.splits is None else super().__getitem__(self.splits[0])[0]

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in setup(self, items, train_setup)
    177         tfms = self.fs[:]
    178         self.fs.clear()
--> 179         for t in tfms: self.add(t,items, train_setup)
    180 
    181     def add(self,t, items=None, train_setup=False):

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in add(self, t, items, train_setup)
    180 
    181     def add(self,t, items=None, train_setup=False):
--> 182         t.setup(items, train_setup)
    183         self.fs.append(t)
    184 

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in setup(self, items, train_setup)
     76     def setup(self, items=None, train_setup=False):
     77         train_setup = train_setup if self.train_setup is None else self.train_setup
---> 78         return self.setups(getattr(items, 'train', items) if train_setup else items)
     79 
     80     def _call(self, fn, x, split_idx=None, **kwargs):

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
     96         if not f: return args[0]
     97         if self.inst is not None: f = MethodType(f, self.inst)
---> 98         return f(*args, **kwargs)
     99 
    100     def __get__(self, inst, owner):

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/transforms.py in setups(self, dsets)
    214 
    215     def setups(self, dsets):
--> 216         if self.vocab is None and dsets is not None: self.vocab = CategoryMap(dsets, add_na=self.add_na)
    217         self.c = len(self.vocab)
    218 

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/transforms.py in __init__(self, col, sort, add_na)
    199             if not hasattr(col,'unique'): col = L(col, use_list=True)
    200             # `o==o` is the generalized definition of non-NaN used by Pandas
--> 201             items = L(o for o in col.unique() if o==o)
    202             if sort: items = items.sorted()
    203         self.items = '#na#' + items if add_na else items

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/foundation.py in unique(self)
    379         return self._new(i for i,o in enumerate(self) if f(o))
    380 
--> 381     def unique(self): return L(dict.fromkeys(self).keys())
    382     def enumerate(self): return L(enumerate(self))
    383     def val2idx(self): return {v:k for k,v in self.enumerate()}

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in <genexpr>(.0)
    218     def _after_item(self, o): return self.tfms(o)
    219     def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms.fs}"
--> 220     def __iter__(self): return (self[i] for i in range(len(self)))
    221     def show(self, o, **kwargs): return self.tfms.show(o, **kwargs)
    222     def decode(self, o, **kwargs): return self.tfms.decode(o, **kwargs)

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in __getitem__(self, idx)
    253         res = super().__getitem__(idx)
    254         if self._after_item is None: return res
--> 255         return self._after_item(res) if is_indexer(idx) else res.map(self._after_item)
    256 
    257 # Cell

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/core.py in _after_item(self, o)
    216     def _new(self, items, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, types=self.types, **kwargs)
    217     def subset(self, i): return self._new(self._get(self.splits[i]), split_idx=i)
--> 218     def _after_item(self, o): return self.tfms(o)
    219     def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms.fs}"
    220     def __iter__(self): return (self[i] for i in range(len(self)))

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in __call__(self, o)
    183         self.fs.append(t)
    184 
--> 185     def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
    186     def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
    187     def __getitem__(self,i): return self.fs[i]

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
    136     for f in tfms:
    137         if not is_enc: f = f.decode
--> 138         x = f(x, **kwargs)
    139     return x
    140 

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in __call__(self, x, **kwargs)
     70     @property
     71     def name(self): return getattr(self, '_name', _get_name(self))
---> 72     def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
     73     def decode  (self, x, **kwargs): return self._call('decodes', x, **kwargs)
     74     def __repr__(self): return f'{self.name}: {self.encodes} {self.decodes}'

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
     80     def _call(self, fn, x, split_idx=None, **kwargs):
     81         if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 82         return self._do_call(getattr(self, fn), x, **kwargs)
     83 
     84     def _do_call(self, f, x, **kwargs):

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
     84     def _do_call(self, f, x, **kwargs):
     85         if not _is_tuple(x):
---> 86             return x if f is None else retain_type(f(x, **kwargs), x, f.returns_none(x))
     87         res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
     88         return retain_type(res, x)

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
     96         if not f: return args[0]
     97         if self.inst is not None: f = MethodType(f, self.inst)
---> 98         return f(*args, **kwargs)
     99 
    100     def __get__(self, inst, owner):

/opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/data/transforms.py in __call__(self, o, **kwargs)
    170     def __call__(self, o, **kwargs):
    171         res = self.matcher(str(o))
--> 172         assert res,f'Failed to find "{self.pat}" in "{o}"'
    173         return res.group(1)
    174 

**AssertionError**: Failed to find "re.compile('^(.*)_\\d+.jpg$')" in "xxx_1500.jpeg"

I also changed with '([\\w\\s.-]).jpg')

Failed to find "re.compile('([\\w\\s.-]).jpg')" in "xxx_1500.jpeg"

as in the suggestion here Failed to find "re.compile('([^\\\\]+)_\\d+.jpg$')" in "/content/drive/

This just means that your regex is only looking for .jpg images and you have a .jpeg image in your folder.

If you are mixing multiple file extensions a possible solution would be to not match only .jpg and instead do something like [a-z]+ at the end. This would match any file extension.

Or you can also make a list of extensions you want to match and do a big or on the regex.

1 Like

Yes but I have only .jpg in the folder

this file should be in the folder somewhere :sweat_smile:

2 Likes

image
you are right… not sure why it is there. strange.
Thank you so much @lgvaz

1 Like

To account for any oddities just change the .jpg to a .* and you should be okay.

1 Like

now it works perfectly

1 Like

I would recommend to change it to [a-z]+, .* can grab stuff that don’t even have an extension.

[a-z]+ is dangerous enough already :sweat_smile:

2 Likes

but if I grab .jpeg and .jpg and maybe other files extension like .txt or .csv will I have a problem in the creation of the DataLoader later?

What is going to grab the files is get_image_files. So you don’t have to worry about non related types…

Now that I said this, you can go with the simpler @muellerzr solution .* :sweat_smile: I just now realised get_image_files is already filtering the extensions

2 Likes

Awesome guys. Thank you so much @muellerzr and @lgvaz Placing here the final version

dblock = DataBlock(blocks=(ImageBlock, CategoryBlock), 
                   get_items=get_image_files, 
                   get_y=[attrgetter("name"), RegexLabeller(pat=r'^(.*)_\d+\..*$')], 
                   splitter=RandomSplitter(valid_pct=0.2, seed=42),
                   item_tfms=Resize(128),
                   batch_tfms=aug_transforms(mult=1))

To be even cleaner, I’d put pat=r'^(.*)_\d+\..*$. The added \. will match the dot before the extension (which can then be anything).

3 Likes