Hi folks, has anyone figured out how to create a sample of data from folder using datablock_api?
Lesson 4 part 2 (after the streaming interruption): https://www.youtube.com/watch?v=GK1XhPM3K0g
Evening everyone.Any one joining the call?I am on it
@shub.chat 1 min
Here I change the dependent variable:
dep_var = 'education-num'
cat_names = ['workclass', '>=50k', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age','fnlwgt']
procs = [FillMissing, Categorify, Normalize]
data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
.split_by_idx(list(range(800,1000)))
.label_from_df(cols=dep_var)
.add_test(test, label=0)
.databunch())
After learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
I change the loss function:
learn.loss_func = MSELossFlat()
And then the fit which has the error:
learn.fit(1, 1e-2)
RuntimeError Traceback (most recent call last)
in
----> 1 learn.fit(1, 1e-2)
/opt/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(self, epochs, lr, wd, callbacks)
160 callbacks = [cb(self) for cb in self.callback_fns] + listify(callbacks)
161 fit(epochs, self.model, self.loss_func, opt=self.opt, data=self.data, metrics=self.metrics,
–> 162 callbacks=self.callbacks+callbacks)
163
164 def create_opt(self, lr:Floats, wd:Floats=0.)->None:
/opt/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
92 except Exception as e:
93 exception = e
—> 94 raise e
95 finally: cb_handler.on_train_end(exception)
96
/opt/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in fit(epochs, model, loss_func, opt, data, callbacks, metrics)
87 if hasattr(data,‘valid_dl’) and data.valid_dl is not None:
88 val_loss = validate(model, data.valid_dl, loss_func=loss_func,
—> 89 cb_handler=cb_handler, pbar=pbar)
90 else: val_loss=None
91 if cb_handler.on_epoch_end(val_loss): break
/opt/anaconda3/lib/python3.6/site-packages/fastai/basic_train.py in validate(model, dl, loss_func, cb_handler, pbar, average, n_batch)
52 if not is_listy(yb): yb = [yb]
53 nums.append(yb[0].shape[0])
—> 54 if cb_handler and cb_handler.on_batch_end(val_losses[-1]): break
55 if n_batch and (len(nums)>=n_batch): break
56 nums = np.array(nums, dtype=np.float32)
/opt/anaconda3/lib/python3.6/site-packages/fastai/callback.py in on_batch_end(self, loss)
236 “Handle end of processing one batch with loss
.”
237 self.state_dict[‘last_loss’] = loss
–> 238 stop = np.any(self(‘batch_end’, not self.state_dict[‘train’]))
239 if self.state_dict[‘train’]:
240 self.state_dict[‘iteration’] += 1
/opt/anaconda3/lib/python3.6/site-packages/fastai/callback.py in call(self, cb_name, call_mets, **kwargs)
184 def call(self, cb_name, call_mets=True, **kwargs)->None:
185 “Call through to all of the CallbakHandler
functions.”
–> 186 if call_mets: [getattr(met, f’on_{cb_name}’)(**self.state_dict, **kwargs) for met in self.metrics]
187 return [getattr(cb, f’on_{cb_name}’)(**self.state_dict, **kwargs) for cb in self.callbacks]
188
/opt/anaconda3/lib/python3.6/site-packages/fastai/callback.py in (.0)
184 def call(self, cb_name, call_mets=True, **kwargs)->None:
185 “Call through to all of the CallbakHandler
functions.”
–> 186 if call_mets: [getattr(met, f’on_{cb_name}’)(**self.state_dict, **kwargs) for met in self.metrics]
187 return [getattr(cb, f’on_{cb_name}’)(**self.state_dict, **kwargs) for cb in self.callbacks]
188
/opt/anaconda3/lib/python3.6/site-packages/fastai/callback.py in on_batch_end(self, last_output, last_target, train, **kwargs)
269 if not is_listy(last_target): last_target=[last_target]
270 self.count += last_target[0].size(0)
–> 271 self.val += last_target[0].size(0) * self.func(last_output, *last_target).detach().cpu()
272
273 def on_epoch_end(self, **kwargs):
/opt/anaconda3/lib/python3.6/site-packages/fastai/metrics.py in accuracy(input, targs)
37 input = input.argmax(dim=-1).view(n,-1)
38 targs = targs.view(n,-1)
—> 39 return (input==targs).float().mean()
40
41 def error_rate(input:Tensor, targs:Tensor)->Rank0Tensor:
RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 ‘other’
Paper by Google AI on how batch size impact performance
https://arxiv.org/abs/1811.03600
I guess I get nan
because the education-num
has some NA’s. However, when I select the age
as the dep_var, again I get the Expected object of scalar type Long but got scalar type Float for argument #2 other
so to fix it, I convert it to float:
df['age'] = 1.0*df['age']
and now it works fine.
I won’t be able to attend the call today sorry people.
I totally lost track of time on this. My apologies. Hopefully will be able to participate next week.