Following up on my post 93 above I tried running test_df predictions on the rossman notebook and added the modified notebooks to here: https://github.com/adriangrepo/my_fastai_v3/tree/master/nbs/dl1
I am using 1.0.39.dev0
The tests I did and errors encountered are:
For all tests used this code to define classes:
def unique_deps(x:Series)->List:
od = OrderedDict.fromkeys(x)
res = list(OrderedDict.fromkeys(x).keys())
res.sort()
return res, od
classes, od =unique_deps(df[dep_var].values)
#TEST 1
#DataBunch definition:
data = TabularDataBunch.from_df(path, df=df, dep_var=dep_var, valid_idx=valid_idx, procs=procs, cat_names=cat_vars, cont_names=cont_vars, classes=classes, test_df=test_df)
#error at:
learn.fit_one_cycle(3, 1e-3, wd=0.2)
#error:
/mnt/963GB/Data/Python/Courses/fastai/fastai/fastai/metrics.py in exp_rmspe(pred=tensor([[0.1735, 1.0371, 0.1735, …, 0.1735, 0…0.1494, 0.1494, 0.1494]],
device=‘cuda:1’), targ=tensor([ 4406, 5207, 7457, 13138, 3965, 4794… 4715, 6638, 10668, 12394], device=‘cuda:1’))
45 def exp_rmspe(pred:FloatTensor, targ:FloatTensor)->Rank0Tensor:
46 “Exp RMSE between pred
and targ
.”
—> 47 assert pred.numel() == targ.numel(), “Expected same numbers of elements in pred & targ”
pred.numel =
targ.numel =
48 if len(pred.shape)==2: pred=pred.squeeze(1)
49 pred, targ = torch.exp(pred), torch.exp(targ)
AssertionError: Expected same numbers of elements in pred & targ
ipdb> pred.shape torch.Size([64, 21733]) ipdb> targ.shape torch.Size([64])
#TEST 2
#classes as defined above
#DataBunch definition:
data = (TabularList.from_df(df, path=path, cat_names=cat_vars, cont_names=cont_vars, procs=procs, test_df=test_df) .split_by_idx(valid_idx) .label_from_df(cols=dep_var, label_cls=FloatList, log=True, classes=classes) .databunch())
#error at
preds, y = learn.get_preds(DatasetType.Test)
#error
~/miniconda3/envs/fastai-py3.7/lib/python3.7/site-packages/fastprogress/fastprogress.py in init(self=, gen=None, total=None, display=True, leave=False, parent=None, auto_update=True)
143 class NBProgressBar(ProgressBar):
144 def init(self, gen, total=None, display=True, leave=True, parent=None, auto_update=True):
–> 145 self.progress = html_progress_bar(0, len(gen) if total is None else total, “”)
self.progress = undefined
global html_progress_bar =
global len = undefined
gen = None
total = None
146 super().init(gen, total, display, leave, parent, auto_update)
147
TypeError: object of type ‘NoneType’ has no len()
ipdb> gen.shape *** AttributeError: ‘NoneType’ object has no attribute ‘shape’
NB predicting row by row on the test_df works fine
for idx, row in test_df.iterrows():
pred = learn.predict(row)
#Test 3
data = (TabularList.from_df(df, path=path, cat_names=cat_vars, cont_names=cont_vars, procs=procs, test_df=test_df)
.split_by_idx(valid_idx)
.label_from_df(cols=dep_var, label_cls=FloatList, log=True)
.databunch())
#error at
preds, y = learn.get_preds(DatasetType.Test)
#error
~/miniconda3/envs/fastai-py3.7/lib/python3.7/site-packages/fastprogress/fastprogress.py in init(self, gen, total, display, leave, parent, auto_update)
143 class NBProgressBar(ProgressBar):
144 def init(self, gen, total=None, display=True, leave=True, parent=None, auto_update=True):
–> 145 self.progress = html_progress_bar(0, len(gen) if total is None else total, “”)
146 super().init(gen, total, display, leave, parent, auto_update)
147
TypeError: object of type ‘NoneType’ has no len()
NB predicting row by row on the test_df works fine
for idx, row in test_df.iterrows():
pred = learn.predict(row)
#TEST 4
data = TabularDataBunch.from_df(path, df=df, dep_var=dep_var, valid_idx=valid_idx, procs=procs, cat_names=cat_vars, cont_names=cont_vars)
#error at
data = TabularDataBunch.from_df(path, df=df, dep_var=dep_var, valid_idx=valid_idx, procs=procs, cat_names=cat_vars, cont_names=cont_vars)
#error
File “/mnt/963GB/Data/Python/Courses/fastai/fastai/fastai/data_block.py”, line 38, in
def process(self, ds:Collection): ds.items = array([self.process_one(item) for item in ds.items])
File “/mnt/963GB/Data/Python/Courses/fastai/fastai/fastai/data_block.py”, line 282, in process_one
raise Exception(“Your validation data contains a label that isn’t present in the training set, please fix your data.”)
Exception: Your validation data contains a label that isn’t present in the training set, please fix your data.
Interesting that using exactly the same validation indexes in TESTS 1,2,3 does not produce this error, and this error occurs every time I create a TabularDataBunch like this.
I then removed the columns from the df that had different values between validation and training data ie:
col_diffs={}
for col in df:
diffs = set(val_df[col]).difference(set(df[col]))
if len(diffs)>0:
col_diffs[col]=diffs
df.drop(columns=[col_diffs.keys()],inplace=True)
test_df.drop(columns=[columns=[col_diffs.keys(],inplace=True)
and still got the same error.
Then I did some testing on the inference tutorial: tutorial.inference.ipynb in docs_src.
Modified notebooks I used for testing are here: https://github.com/adriangrepo/my_fastai_v3/tree/master/docs_src
Key summary for these: using TabularList.from_df and passing in test_df=test_df , I kept getting errors on learn.get_preds(DatasetType.Test)
#error:
~/miniconda3/envs/fastai-py3.7/lib/python3.7/site-packages/fastprogress/fastprogress.py in init(self, gen, total, display, leave, parent, auto_update)
143 class NBProgressBar(ProgressBar):
144 def init(self, gen, total=None, display=True, leave=True, parent=None, auto_update=True):
–> 145 self.progress = html_progress_bar(0, len(gen) if total is None else total, “”)
146 super().init(gen, total, display, leave, parent, auto_update)
147
TypeError: object of type ‘NoneType’ has no len()
But when using TabularDataBunch.from_df() I did manage to pass in test_df and get preds from learn.get_preds(DatasetType.Test) with some provisos as per noted in the notebook tutorial.inference_tabulardatabunch.ipynb.