Augmented Dataset(s) creator

Hi, I wrote a piece of code which takes a vision dataset, does the desired augmentations (using fastai v1), and save the augmented images mirroring the original tree structure.

from fastai import *
from fastai.vision import *
from fastai.imports import *

import matplotlib.image as IM 

# Example:
#
# TR = gettransfs()
#
# transfy(TR, '/data/apath', variants=5)
#
# ---
# It puts, by default, the new dataset in ../AUGM, where ./ is 
# the rootpath you specified (/data/apath in the example above).

def gettransfs(do_flip=False, max_rotate=90, **kwargs):
    tfms = get_transforms(do_flip=do_flip, max_rotate=90, **kwargs)
    return tfms

def transfy(tfms, rootpath, variants=1, newdir='AUGM', tree_structure=True, include_source=True, **kwargs): 
    PATH=rootpath
    if not os.path.exists(PATH):
        print('Path does not exist.')
        return 0
    else: print('Working in '+PATH)
    os.chdir(PATH)
    workdir = PATH

    if not os.path.exists(f'{workdir}/../'+newdir):
        os.mkdir(f'{workdir}/../'+newdir) 
    content = os.listdir(workdir)
    subfolders = [item for item in content if os.path.isdir(item)] 
    print('Classes (subfolders) found: '), print(len(subfolders))
    if tree_structure:
        for folder in subfolders:
            if not os.path.exists(f'{workdir}/../'+newdir+'/'+folder): os.mkdir(f'{workdir}/../'+newdir+'/'+folder)
    
    for folder in subfolders:
        os.chdir(f'{workdir}/'+folder)
        currentd=os.getcwd().replace('\\','/')
        filelist=os.listdir(currentd)
        print('Visiting '+currentd)
        #verify_images(currentd) 
        #commented since it's dangerous      
        if tree_structure:
            dest=os.path.abspath((f'{workdir}/../'+newdir+'/'+folder)).replace('\\','/')
        else:
            dest=os.path.abspath((f'{workdir}/../'+newdir)).replace('\\','/')
                
        for file_idx in filelist:
            current_img = open_image(currentd+'/'+file_idx)
            if include_source:
                shutil.copyfile(currentd+'/'+file_idx, dest+'/'+file_idx)
            for i in range(variants):
                currenttsfimg=apply_tfms(tfms[0], current_img, size=299, **kwargs)
                currenttsfimgnp=image2np(currenttsfimg.data)
                IM.imsave(os.path.splitext(dest+'/'+file_idx)[0]+'_'+str(i+2)+os.path.splitext(dest+'/'+file_idx)[1],
                currenttsfimgnp)
    print('Finished')

It is quite rough, but it works.
If you want to customize it for your specific needs, read the its code and the documentation relative to get_transforms, apply_tfms(), and Transform in fastai v1 docs.

2 Likes

that piece of code came very useful and quite easy to use. Thank you!

Maybe this is useful for somebody:
To make it work in my case, I had to apply_tfms as a method to current_img. Also, I used the save method on currenttsfimg without parsing the image to a numpy array using image2np() and therefore not using the IM.imsave, but the save method fastai provides.

1 Like