Hello all. I am training a Recurrent Encoder-Decoder network to generate animation curves from speech.
To start off with I created a basic Pytorch loop to overfit a subsample of my data. This went well.
I tried to do the same with a fastai Learner, and the results are very smooth and poor.
Can somebody help me with understanding what modifications I need to make to create a simple Learner which will train my model?
Model:
from torch import nn
import torch.nn.functional as F
import torch
input_size = 40
hidden_size = 256
output_size = 1
CUDA = torch.cuda.is_available()
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size):
super(EncoderRNN, self).__init__()
self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
self.hidden = torch.zeros(1, bs, hidden_size).to("cuda" if torch.cuda.is_available() else "cpu")
def forward(self, input, hidden):
output = input.transpose(2,1)
output, hidden = self.gru(output, hidden)
return output, hidden
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size):
super(DecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, input, hidden):
output = F.relu(input)
output, hidden = self.gru(output, hidden)
output = self.linear(output)
return output, hidden
class EDRNN(nn.Module):
def __init__(self, input_size, output_size):
super().__init__()
# architecture
self.enc = EncoderRNN(input_size, hidden_size)
self.dec = DecoderRNN(hidden_size, output_size)
self = self.cuda() if CUDA else self
def forward(self, xb):
enc_out, hidden = self.enc(x, self.enc.hidden)
dec_out, hidden = self.dec(enc_out, hidden)
return dec_out.squeeze(dim=2)
def decode(self, x): # for inference
pass
Pytorch loop:
model = EDRNN(input_size, output_size)
criterion = nn.L1Loss()
enc_optim = torch.optim.Adam(model.enc.parameters(), lr = 0.001)
dec_optim = torch.optim.Adam(model.dec.parameters(), lr = 0.001)
for epoch in range(0,1000):
for x, y in train_dl:
model.zero_grad()
output = model(x)
loss = criterion(output, y)
loss.backward()
enc_optim.step()
dec_optim.step()
print(loss)