In Chapter 12 we create an LSTMCell
from scratch as such:
class LSTMCell(Module):
def __init__(self, ni, nh):
self.ih = nn.Linear(ni,4*nh)
self.hh = nn.Linear(nh,4*nh)
def forward(self, input, state):
h,c = state
#One big multiplication for all the gates is better than 4 smaller ones
gates = (self.ih(input) + self.hh(h)).chunk(4, 1)
ingate,forgetgate,outgate = map(torch.sigmoid, gates[:3])
cellgate = gates[3].tanh()
c = (forgetgate*c) + (ingate*cellgate)
h = outgate * c.tanh()
return h, (h,c)
How do I create a single layer LSTM model to use this cell? This isn’t implemented in the notebook. I have tried it on my own but I’m getting an error RuntimeError: size mismatch, m1: [1024 x 64], m2: [2 x 256] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:41
and I’m not sure what the mistake is. Here’s the code for the model and training:
class LMModel6(Module):
def __init__(self, vocab_sz, n_hidden, n_layers):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.lstm = LSTMCell(n_layers, n_hidden)
self.h_o = nn.Linear(n_hidden, vocab_sz)
self.h = torch.zeros(n_layers, bs, n_hidden)
def forward(self, x):
h, res = self.lstm(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(res)
def reset(self):
for h in self.h: h.zero_()
learn = Learner(dls, LMModel6(len(vocab), 64, 2),
loss_func=CrossEntropyLossFlat(),
metrics=accuracy, cbs=ModelReseter)
learn.fit_one_cycle(15, 1e-2)
Any idea how to fix this?