Hi @Pomo, I saw your very helpful answer and implemented it but I’m still not getting reproducible results. I’m not sure if I’m setting num_workers in the right place but I’m loading the datasets already split so this can’t be causing the problems like @blissweb mentioned. Here is how I’m creating the databunch, setting the seeds and running the learner:
def random_seed(seed_value):
import random
random.seed(seed_value) # Python
import numpy as np
np.random.seed(seed_value) # cpu vars
import torch
torch.manual_seed(seed_value) # cpu vars
if torch.cuda.is_available():
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value) # gpu vars
torch.backends.cudnn.deterministic = True #needed
torch.backends.cudnn.benchmark = False
random_seed(0)
dep_var = 'NumberOfSales'
df = train_df[cat_vars + cont_vars + [dep_var]].copy()
path="c:/Benchmarking/testBench.csv"
data = (TabularList.from_df(df, cat_names=cat_vars, cont_names=cont_vars, procs=procs,)
.split_by_idx(valid_idx)
.label_from_df(cols=dep_var, label_cls=FloatList, log=False)
.add_test(TabularList.from_df(test_df, path=path, cat_names=cat_vars, cont_names=cont_vars))
.databunch(num_workers=0))
#x=best.x I'm using scikit opt to find the best parameters but then can't reproduce the results.
x=[500, 500, 100, 0.0005, 0.4, 8]
print(x)
learn3 = tabular_learner(data, layers=[x[0],x[1],x[2]], ps=[0.09,0.5,0.5], emb_drop=0.04,
y_range=y_range, metrics=mae)
learn3.fit_one_cycle(1, x[3], wd=x[4], div_factor=x[5])