I’ve been trying to implement something similar to the approach in this blog post:
Looks like it works pretty well.
I’ve built a LM with the SNLI corpus.
The LM worked and I was able to generate new sentences that were reasonable.
I’ve been getting stuck when trying to make sentence vectors. The vectors that I’m getting have no predictive power.
I using pytorch with the fast.ai lib. Every time I try to modify the fast.ai lib I get horribly lost.
Here is my LM code, any ideas where I’m going wrong creating the sentence vector? I’m using forward to train the LM and sentence_vector to create the vectors.
#based on https://github.com/pytorch/examples/blob/master/word_language_model/model.py
class LSTMLM(nn.Module):
"""Container module with an encoder, a module, and a decoder."""
def __init__(self, ntoken, nhid, nlayers, dropout=0.5):
super(LSTMLM, self).__init__()
self.drop = nn.Dropout(dropout)
self.encoder = nn.Embedding(ntoken, nhid)
self.lstm = nn.LSTM(nhid, nhid, nlayers, dropout=dropout)
self.decoder = nn.Linear(nhid, ntoken)
# "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
# https://arxiv.org/abs/1608.05859
# and
# "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
# https://arxiv.org/abs/1611.01462
self.decoder.weight = self.encoder.weight
self.init_weights()
self.nhid = nhid
self.nlayers = nlayers
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange, initrange)
def sentence_vector(self, input, hidden):
emb = self.drop(self.encoder(input))
output, hidden = self.lstm(emb, hidden)
output = self.drop(output)
num_words = output.shape[0]
batch_size = output.shape[1]
#flip the outputs first 2 parts so the pooling will do the right thing
#we want the a single vector from the sentence, one per element in the batch
output = output.view(batch_size, num_words, hidden_size)
m = max_pool(output)[0][0]
a = avg_pool(output)[0][0]
l = output[0][-1]
#sentence_vec = torch.cat([m,a,l])
return l
return sentence_vec
def forward(self, input, hidden):
emb = self.drop(self.encoder(input))
output, hidden = self.lstm(emb, hidden)
output = self.drop(output)
decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
def init_hidden(self, bsz):
weight = next(self.parameters())
return (weight.new_zeros(self.nlayers, bsz, self.nhid),
weight.new_zeros(self.nlayers, bsz, self.nhid))
The fast.ai lib has this code for pooling, but I have no idea what the inputs are.
class PoolingLinearClassifier(nn.Module):
def __init__(self, layers, drops):
super().__init__()
self.layers = nn.ModuleList([
LinearBlock(layers[i], layers[i + 1], drops[i]) for i in range(len(layers) - 1)])
def pool(self, x, bs, is_max):
f = F.adaptive_max_pool1d if is_max else F.adaptive_avg_pool1d
return f(x.permute(1,2,0), (1,)).view(bs,-1)
def forward(self, input):
raw_outputs, outputs = input
output = outputs[-1]
sl,bs,_ = output.size()
avgpool = self.pool(output, bs, False)
mxpool = self.pool(output, bs, True)
x = torch.cat([output[-1], mxpool, avgpool], 1)
for l in self.layers:
l_x = l(x)
x = F.relu(l_x)
return l_x, raw_outputs, outputs