You can also try taking inspiration from fastai’s get_files, maybe it’s faster:
def _get_files(parent, p, f, extensions):
p = Path(p)#.relative_to(parent)
if isinstance(extensions,str): extensions = [extensions]
low_extensions = [e.lower() for e in extensions] if extensions is not None else None
res = [p/o for o in f if not o.startswith('.')
and (extensions is None or f'.{o.split(".")[-1].lower()}' in low_extensions)]
return res
def get_files(path:PathOrStr, extensions:Collection[str]=None, recurse:bool=False,
include:Optional[Collection[str]]=None, presort:bool=False)->FilePathList:
"Return list of files in `path` that have a suffix in `extensions`; optionally `recurse`."
if recurse:
res = []
for i,(p,d,f) in enumerate(os.walk(path)):
# skip hidden dirs
if include is not None and i==0: d[:] = [o for o in d if o in include]
else: d[:] = [o for o in d if not o.startswith('.')]
res += _get_files(path, p, f, extensions)
if presort: res = sorted(res, key=lambda p: _path_to_same_str(p), reverse=False)
return res
else:
f = [o.name for o in os.scandir(path) if o.is_file()]
res = _get_files(path, path, f, extensions)
if presort: res = sorted(res, key=lambda p: _path_to_same_str(p), reverse=False)
return res
But that looks a lot like what you’re already doing so I’m not sure it is of any use to you.