This is my code of trying to replicate multi-layer RNN in chapter 12.
class LMModel5_2(Module):
def __init__(self, vocab_sz, n_hidden, n_layers):
self.n_layers = n_layers
self.n_hidden = n_hidden
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = L(nn.Linear(n_hidden, n_hidden) for i in range(n_layers))
self.h_o = nn.Linear(n_hidden, vocab_sz)
self.h = torch.zeros(n_layers, bs, n_hidden)
def forward(self, x):
outputs = [L() for i in range(self.n_layers)]
for i in range(sl):
self.h[0] = self.h[0] + self.i_h(x[:,0])
self.h[0] = F.relu(self.h_h[0](self.h[0]))
outputs[0] += self.h[0]
for layer in range(1, self.n_layers):
for i in range(sl):
self.h[layer] = self.h[layer] + outputs[layer-1][i]
self.h[layer] = F.relu(self.h_h[layer](self.h[layer]))
outputs[layer] += self.h[layer]
self.h = self.h.detach()
return self.h_o(torch.stack(outputs[self.n_layers-1], dim=1))
def reset(self):
self.h.zero_()
When I run this code, I get an error like this.
<ipython-input-169-d409acf1d12d> in forward(self, x)
15 for layer in range(1, self.n_layers):
16 for i in range(sl):
---> 17 self.h[layer] = self.h[layer] + outputs[layer-1][i]
18 self.h[layer] = F.relu(self.h_h[layer](self.h[layer]))
19 outputs[layer] += self.h[layer]
RuntimeError: Output 0 of UnbindBackward0 is a view and its base or another view of its base has been modified inplace. This view is the output of a function that returns multiple views. Such functions do not allow the output views to be modified inplace. You should replace the inplace operation by an out-of-place one.
I can’t seem to get what this error is trying to say. I’m assuming that since I’m adding to the hidden state a tensor that I got from ‘outputs’( a list and not a tensor ), it’s giving me an error.
I assumed that because, I have another version which I think I have succeeded
class LMModel5_1(Module):
def __init__(self, vocab_sz, n_hidden, n_layers):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = L(nn.Linear(n_hidden, n_hidden) for i in range(n_layers))
self.h_o = nn.Linear(n_hidden, vocab_sz)
self.h = L(0 for j in range(n_layers))
self.layer_output = torch.zeros(n_layers+1, sl, bs, n_hidden)
def forward(self, x):
for i in range(sl):
self.layer_output[0,i] = self.i_h(x[:,i])
for layer in range(len(self.h_h)):
for i in range(sl):
self.h[layer] = self.h[layer] + self.layer_output[layer,i]
self.h[layer] = self.h_h[layer](self.h[layer])
self.h[layer] = F.relu(self.h[layer])
self.layer_output[layer+1,i] = self.h[layer]
for i in range(len(self.h)):
self.h[i] = self.h[i].detach()
self.layer_output = self.layer_output.detach()
return self.h_o(self.layer_output[len(self.h_h)]).view(bs, sl, -1)
def reset(self):
for i in range(len(self.h)):
self.h[i] = 0
I basically think this code is very similar to the first code, except for the fact that I stored the ouputs of the previous layers into a tensor. Is this the case?
And I’m confused with tensor.detach(). It says that this function clears gradient history. But if I replace the hidden state with the detach-ed version at the end, won’t there be nothing to gradient-descend? I remember that pytorch calculates gradients when calling backwards(). When is this method called in this code?
And also, can anybody advise me what improvements can be done to this code? It will be grateful…
Great thanks to everyone!