Thank you for posting this! I’m trying to run your code, but am hitting tensor size errors when I get to the learning rate finder and training. I’m a little lost– it looks like somehow in the forward step, my matrix sizes aren’t matching up, but I’m not sure where that should take me. If anyone any hints, I’d sure appreciate it.
Thank you again @KarlH for showing us your notebook!
<ipython-input-41-4a6b8ae95523> in <module>
----> 1 learn.lr_find(end_lr = 1000, stepper=TransformStepper)
2 learn.sched.plot()
~/fastai/courses/dl2/fastai/learner.py in lr_find(self, start_lr, end_lr, wds, linear, **kwargs)
343 layer_opt = self.get_layer_opt(start_lr, wds)
344 self.sched = LR_Finder(layer_opt, len(self.data.trn_dl), end_lr, linear=linear)
--> 345 self.fit_gen(self.model, self.data, layer_opt, 1, **kwargs)
346 self.load('tmp')
347
~/fastai/courses/dl2/fastai/learner.py in fit_gen(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, best_save_name, use_clr, use_clr_beta, metrics, callbacks, use_wd_sched, norm_wds, wds_sched_mult, use_swa, swa_start, swa_eval_freq, **kwargs)
247 metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16,
248 swa_model=self.swa_model if use_swa else None, swa_start=swa_start,
--> 249 swa_eval_freq=swa_eval_freq, **kwargs)
250
251 def get_layer_groups(self): return self.models.get_layer_groups()
~/fastai/courses/dl2/fastai/model.py in fit(model, data, n_epochs, opt, crit, metrics, callbacks, stepper, swa_model, swa_start, swa_eval_freq, visualize, **kwargs)
139 batch_num += 1
140 for cb in callbacks: cb.on_batch_begin()
--> 141 loss = model_stepper.step(V(x),V(y), epoch)
142 avg_loss = avg_loss * avg_mom + loss * (1-avg_mom)
143 debias_loss = avg_loss / (1 - avg_mom**batch_num)
<ipython-input-35-25fa36db025a> in step(self, xs, y, epoch)
11 src_mask, trg_mask = create_masks(src, trg_input)
12
---> 13 output = self.m(src, trg_input, src_mask, trg_mask)
14
15
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
<ipython-input-32-33f714b08be4> in forward(self, src, trg, src_mask, trg_mask)
6 self.out = nn.Linear(d_model, trg_vocab)
7 def forward(self, src, trg, src_mask, trg_mask):
----> 8 e_outputs = self.encoder(src, src_mask)
9 d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
10 output = self.out(d_output)
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
<ipython-input-31-c1456d295578> in forward(self, src, mask)
9 def forward(self, src, mask):
10 x = self.embed(src)
---> 11 x = self.pe(x)
12 for i in range(N):
13 x = self.layers[i](x, mask)
~/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
<ipython-input-25-05d4173edd48> in forward(self, x)
20 seq_len = x.size(1)
21 x = x + Variable(self.pe[:,:seq_len], \
---> 22 requires_grad=False).cuda()
23 return x
RuntimeError: The size of tensor a (152) must match the size of tensor b (80) at non-singleton dimension 1```