While the model itself is the same, I handled data and fit on my own instead of letting fastai do it, which is the cause of this. The model only returns an array zeros and the next character generated always is ‘\n’. What am I doing wrong here? Thanks in advance.
#Defining Modelclass
class Char3Model(nn.Module):
def __init__(self, vocab_size, n_fac):
super(Char3Model, self).__init__()
self.e = nn.Embedding(vocab_size, n_fac)
self.l1 = nn.Linear(n_fac, n_hidden)
self.l2 = nn.Linear(n_hidden, n_hidden)
self.l3 = nn.Linear(n_hidden, vocab_size)
def forward(self, c1, c2, c3):
in1 = F.relu(self.l1(self.e(c1)))
in2 = F.relu(self.l1(self.e(c2)))
in3 = F.relu(self.l1(self.e(c3)))
h = torch.zeros(in1.size())
h = F.tanh(self.l2(h+in1))
h = F.tanh(self.l2(h+in2))
h = F.tanh(self.l2(h+in3))
return F.log_softmax(self.l3(h))
def numToCharTensor(n):
out = torch.zeros(1, vocab_size)
out[0][n] = 1
return out.long()
#Declaring model, criterion and optimizer
model = Char3Model(vocab_size, n_fac)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.002)
#Training the model
tot_loss = 0
for i in range(len(x1[:400])):
optimizer.zero_grad()
t1 = numToCharTensor(x1[i])
t2 = numToCharTensor(x2[i])
t3 = numToCharTensor(x3[i])
output = model(t1, t2, t3)
loss = criterion(output, numToCharTensor(y[i]))
tot_loss += loss
loss.backward()
optimizer.step()
print(total_loss) //prints 0.
#Code to generate next character
def gen_next(sub_str):
char_tensors = [numToCharTensor(char_indices[c]) for c in sub_str]
print(chars[torch.argmax(model(*char_tensors)).numpy()])
print(gen_text('he ')) //prints a new line
Edit: Just realized that output gives us a (1, 86, 86) tensor while we are comparing it to a (1, 86) tensor in criterion. Now I wonder how I should be comparing them.
Edit: Fixed it. My model still doesn’t seem to be learning, as in, it generates different outputs for different inputs, but they all have the same argmax. I’ll have to look into it.