Links
Background
I am currently trying to apply some sentiment analysis knowledge from 1st part of Deep learning course. The data was in form of one big .csv file, which I reduced to 4 columns (id, Clothing ID, Review Text and Recommended IND (= dependent variable)) and split to train and validation sets. I trained a model on the train set to predict words (as Jeremy did) and saved the encoder and vocabulary. Then I proceeded to sentiment analysis.
Data loading
I found out it’s possible to use TabularDataset in order to get splits from .csv files, so I used that.
splits = data.TabularDataset.splits(path=PATH, format='csv', skip_header=True,
train='trn.csv', validation='val.csv',
fields=[('id', None),
('Clothing ID', None),
('Review Text', TEXT),
('Recommended IND', LABEL)])
Having the splits created I went on and created TextData and RNN_Learner.
m2 = md2.get_model(opt_fn, 1500, bptt, emb_sz=em_sz, n_hid=nh, n_layers=nl,
dropout=0.1, dropouti=0.4, wdrop=0.5, dropoute=0.05, dropouth=0.3)
m2.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
m2.load_encoder(f'adam1_enc')
Training
Then I started learning.
m2.clip=25.
lrs=np.array([1e-4,1e-4,1e-4,1e-3,1e-2])
m2.freeze_to(-1)
m2.fit(lrs/2, 1, metrics=[accuracy])
But after the first epoch I get this error:
TypeError: '<' not supported between instances of 'Example' and 'Example'
Full traceback:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-35-893af9e1e024> in <module>()
1 m2.freeze_to(-1)
----> 2 m2.fit(lrs/2, 1, metrics=[accuracy])
D:\Dropbox\Dropbox\Computer science\Applied machine learning\FastAI NLP\Clothing reviews\fastai\learner.py in fit(self, lrs, n_cycle, wds, **kwargs)
285 self.sched = None
286 layer_opt = self.get_layer_opt(lrs, wds)
--> 287 return self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)
288
289 def warm_up(self, lr, wds=None):
D:\Dropbox\Dropbox\Computer science\Applied machine learning\FastAI NLP\Clothing reviews\fastai\learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, best_save_name, use_clr, use_clr_beta, metrics, callbacks, use_wd_sched, norm_wds, wds_sched_mult, use_swa, swa_start, swa_eval_freq, **kwargs)
232 metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16,
233 swa_model=self.swa_model if use_swa else None, swa_start=swa_start,
--> 234 swa_eval_freq=swa_eval_freq, **kwargs)
235
236 def get_layer_groups(self): return self.models.get_layer_groups()
D:\Dropbox\Dropbox\Computer science\Applied machine learning\FastAI NLP\Clothing reviews\fastai\model.py in fit(model, data, n_epochs, opt, crit, metrics, callbacks, stepper, swa_model, swa_start, swa_eval_freq, **kwargs)
159
160 if not all_val:
--> 161 vals = validate(model_stepper, cur_data.val_dl, metrics, seq_first=seq_first)
162 stop=False
163 for cb in callbacks: stop = stop or cb.on_epoch_end(vals)
D:\Dropbox\Dropbox\Computer science\Applied machine learning\FastAI NLP\Clothing reviews\fastai\model.py in validate(stepper, dl, metrics, seq_first)
220 stepper.reset(False)
221 with no_grad_context():
--> 222 for (*x,y) in iter(dl):
223 preds, l = stepper.evaluate(VV(x), VV(y))
224 batch_cnts.append(batch_sz(x, seq_first=seq_first))
D:\Dropbox\Dropbox\Computer science\Applied machine learning\FastAI NLP\Clothing reviews\fastai\nlp.py in __iter__(self)
322 it = iter(self.src)
323 for i in range(len(self)):
--> 324 b = next(it)
325 yield getattr(b, self.x_fld).data, getattr(b, self.y_fld).data
326
D:\Programs\Anaconda3\envs\fastai\lib\site-packages\torchtext\data\iterator.py in __iter__(self)
133 def __iter__(self):
134 while True:
--> 135 self.init_epoch()
136 for idx, minibatch in enumerate(self.batches):
137 # fast-forward if loaded from state
D:\Programs\Anaconda3\envs\fastai\lib\site-packages\torchtext\data\iterator.py in init_epoch(self)
109 self._random_state_this_epoch = self.random_shuffler.random_state
110
--> 111 self.create_batches()
112
113 if self._restored_from_state:
D:\Programs\Anaconda3\envs\fastai\lib\site-packages\torchtext\data\iterator.py in create_batches(self)
233 def create_batches(self):
234 if self.sort:
--> 235 self.batches = batch(self.data(), self.batch_size,
236 self.batch_size_fn)
237 else:
D:\Programs\Anaconda3\envs\fastai\lib\site-packages\torchtext\data\iterator.py in data(self)
94 """Return the examples in the dataset in order, sorted, or shuffled."""
95 if self.sort:
---> 96 xs = sorted(self.dataset, key=self.sort_key)
97 elif self.shuffle:
98 xs = [self.dataset[i] for i in self.random_shuffler(range(len(self.dataset)))]
TypeError: '<' not supported between instances of 'Example' and 'Example'