QHAdam
Below is a rough version of QHAdam which can be then used with the Lookahead wrapper to give RangerQH. The parameter update definitely could be refactored to be more efficient/elegant
def opt_func(ps, lr=defaults.lr): return Lookahead(QHAdam(ps, lr=lr, wd=1e-2,mom=0.9, eps=1e-6))
def qhadam_step(p, lr, mom, sqr_mom, sqr_avg, nus, step, grad_avg, eps, **kwargs):
nu_1 = nus[0]
nu_2 = nus[1]
debias1 = debias(mom, 1-mom, step)
debias2 = debias(sqr_mom, 1-sqr_mom, step)
num = (((1-nu_1) * p.grad.data) + (nu_1 * (grad_avg / debias1)))
denom = ( ((1 - nu_2) * (p.grad.data)**2) + (nu_2 * (sqr_avg / debias2))).sqrt() + eps
p.data = p.data - lr * (num / denom)
return p
def QHAdam(params, lr=1e-3, mom=0.9, sqr_mom=0.99, nus=[(.7, 1.0)], eps=1e-8, wd=0., decouple_wd=False):
from functools import partial
steppers = [weight_decay] if decouple_wd else [l2_reg]
steppers.append(qhadam_step)
stats = [average_grad, average_sqr_grad, step_stat]
return Optimizer(params, steppers, stats=stats, lr=lr, nus=nus, mom=mom, > sqr_mom=sqr_mom, eps=eps, wd=wd)
Testing fastai Ranger v fastai2 Ranger
I tried removing all the transformations and shuffling in v1 and v2 for a fair comparison between both, but I keep getting an error when trying to remove all of the transforms in item_img_tfms
in fastai2, I guess I need to get more familiar with the new data api. For fastai2 I used AffineCoordTfm
to do the resizing as I had seen Jeremy do it in the kaggle RSNA comp. Happy to test both for Ranger if I can get fastai2 to give me images with no transforms 
fastai2 data (gives error)
src = Path(ādata/imagewoof/imagewoofā)
items = get_image_files(src)
split_idx = GrandparentSplitter(valid_name=āvalā)(items)
tfms = [[PILImage.create], [parent_label, lbl_dict.getitem, Categorize()]]
item_img_tfms = [ToTensor()]
dsrc = DataSource(items, tfms, splits=split_idx)
batch_tfms = [Cuda(), IntToFloatTensor(), Normalize(*imagenet_stats)]
dbunch = dsrc.databunch(shuffle_train=False, after_batch=batch_tfms+[AffineCoordTfm(size=128)], bs=64)
fastai v1 data
n_gpus = num_distrib() or 1
nw = min(8, num_cpus()//n_gpus)
img_ls = ImageList.from_folder(src).split_by_folder(valid=āvalā).label_from_folder()
img_ls = img_ls.transform(([flip_lr(p=0.0)], ), size=128)
data =img_ls.databunch(bs=64, num_workers=nw).normalize(imagenet_stats)
data.train_dl = data.train_dl.new(shuffle=False)