Code for Attention
class SelfAttention(nn.Module):
def __init__(self,attention_size):
super().__init__()
self.layers = nn.ModuleList([
LinearBlock(attention_size, 1, 0.)])
self.softmax = nn.Softmax(dim=-1)
@staticmethod
def get_mask(attentions):
"""
Construct mask for padded itemsteps, based on lengths
"""
mask = Variable(torch.ones(attentions.size())).detach()
mask = mask.to(DEVICE)
return mask
def forward(self, input):
raw_outputs, outputs = input
output = outputs[-1].permute(1,0,2)
bs, ml, _ = output.shape
x = output.contiguous().view(bs*ml,-1)
for l in self.layers:
x = l(x)
x = F.relu(x) #nn.Tanh()(x)
x = x.view(bs, ml)
scores = self.softmax(x)
mask = self.get_mask(scores)
masked_scores = scores * mask
_sums = masked_scores.sum(-1, keepdim=True)
scores = masked_scores.div(_sums)
weighted = output * scores.unsqueeze(-1).expand_as(output)
representations = weighted.sum(1).squeeze()
return representations, raw_outputs, outputs
def get_rnn_classifier(bptt, max_seq, n_class, n_tok, emb_sz, n_hid, n_layers, pad_token, layers, drops, bidir=False,dropouth=0.3, dropouti=0.5, dropoute=0.1, wdrop=0.5, qrnn=False):
rnn_enc = MultiBatchRNN(bptt, max_seq, n_tok, emb_sz, n_hid, n_layers, pad_token=pad_token, bidir=bidir,dropouth=dropouth, dropouti=dropouti, dropoute=dropoute, wdrop=wdrop, qrnn=qrnn)
model = SequentialRNN(rnn_enc, SelfAttention(400))
return model
I have attached the error
I figured self.fit_gen(self.model, self.data, layer_opt, 1, **kwargs) is introducing these weights . I am not sure how to solve this . I have tried everything . It works perfectly with PoolingLayerClassifier . All I am doing is plug an Attenion Layer . Its very frustrating can anyone help ?
def lr_find(self, start_lr=1e-5, end_lr=10, wds=None, linear=False, **kwargs):
self.save('tmp')
layer_opt = self.get_layer_opt(start_lr, wds)
self.sched = LR_Finder(layer_opt, len(self.data.trn_dl), end_lr, linear=linear)
self.fit_gen(self.model, self.data, layer_opt, 1, **kwargs)
self.load('tmp')