This is not related to the course or fastai library(sorry if this is out of scope). I am trying to train a pytorch GRU model on my own dataset but running into this assertion error at the point of loss.backward(). Here is my code with the error. Any help is appreciated.
class AttendResistance(nn.Module):
def __init__(self, nb_classes, nb_tokens, embedding_matrix,
embed_dropout_rate=0, final_dropout_rate=0, return_attention=False):
super(AttendResistance, self).__init__()
embedding_dim = 20
hidden_size = 32
self.embed_dropout_rate = embed_dropout_rate
self.final_dropout_rate = final_dropout_rate
self.return_attention = return_attention
self.hidden_size = hidden_size
self.nb_classes = nb_classes
self.embed = nn.Embedding(nb_tokens, embedding_dim)
self.embed.weight = nn.Parameter(embedding_matrix)
self.embed_dropout = nn.Dropout2d(embed_dropout_rate)
self.gru = nn.GRU(embedding_dim, hidden_size, num_layers = 1, batch_first=True, dropout = 0.5,
bidirectional=False)
self.final_drop = nn.Dropout(final_dropout_rate)
self.linear = nn.Linear(hidden_size, nb_classes)
self.softmax = nn.Softmax(dim = 1)
def forward(self, input_seqs):
print (input_seqs.size())
x = self.embed(input_seqs)
print (x.size())
x = nn.Tanh()(x)
print (x.size())
x = self.embed_dropout(x)
print (x.size())
x, _ = self.gru(x)
print (x.size())
x = self.final_drop(x)
print (x.size())
x = self.linear(x[:, -1, :].float())
print (x.size())
outputs = self.softmax(x)
print (outputs.size())
if self.return_attention:
return outputs, att_weights
else:
return outputs
attn_res = AttendResistance(268, 20, embedding_matrix, 0.5, 0.3, True)
attn_res = attn_res.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(attn_res.parameters())
num_epochs = 10
for epoch in range(num_epochs):
for i, (prot_seqs, labels) in enumerate(train_loader):
prot_seqs = Variable(prot_seqs.long()).cuda()
labels = Variable(labels.long()).cuda()
#print (prot_seqs)
#print (labels)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs, att_weights = attn_res(prot_seqs)
print (outputs)
loss = criterion(outputs, torch.max(labels, 1)[1])
print (loss)
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(X_train)//batch_size, loss.data[0]))
And here is the error with the print output:
torch.Size([64, 1602])
torch.Size([64, 1602, 20])
torch.Size([64, 1602, 20])
torch.Size([64, 1602, 20])
torch.Size([64, 1602, 32])
torch.Size([64, 1602, 32])
torch.Size([64, 268])
torch.Size([64, 268])
Variable containing:
1.00000e-03 *
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
... ⋱ ...
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
[torch.cuda.FloatTensor of size 64x268 (GPU 0)]
Variable containing:
5.5909
[torch.cuda.FloatTensor of size 1 (GPU 0)]
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-89-a32cf2edb4cc> in <module>()
17 print (torch.sum(att_weights))
18 print (loss)
---> 19 loss.backward()
20 optimizer.step()
21
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)
165 Variable.
166 """
--> 167 torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
168
169 def register_hook(self, hook):
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)
97
98 Variable._execution_engine.run_backward(
---> 99 variables, grad_variables, retain_graph)
100
101
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py in _do_backward(self, gradients, retain_variables)
333 def _do_backward(self, gradients, retain_variables):
334 self.retain_variables = retain_variables
--> 335 result = super(NestedIOFunction, self)._do_backward(gradients, retain_variables)
336 if not retain_variables:
337 del self._nested_output
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py in backward(self, *gradients)
341 def backward(self, *gradients):
342 nested_gradients = _unflatten(gradients, self._nested_output)
--> 343 result = self.backward_extended(*nested_gradients)
344 return tuple(_iter_None_tensors(result))
345
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/nn/_functions/rnn.py in backward_extended(self, grad_output, grad_hy)
333 output,
334 weight,
--> 335 grad_weight)
336 else:
337 grad_weight = [(None,) * len(layer_weight) for layer_weight in weight]
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py in backward_weight(fn, input, hx, output, weight, grad_weight)
466
467 # copy the weights from the weight_buf into grad_weight
--> 468 grad_params = get_parameters(fn, handle, dw)
469 _copyParams(grad_params, grad_weight)
470 return grad_weight
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py in get_parameters(fn, handle, weight_buf)
169 layer_params.append(param)
170 else:
--> 171 assert cur_offset == offset
172
173 cur_offset = offset + filter_dim_a[0]
AssertionError:
Because the error is not giving me any explicit message, I don’t know what am I doing wrong here. I am aware it is an assertion error that is happening. But I don’t know what are these cur_offset and offset variables are. Running on,
0.3.0.post4
Cuda compilation tools, release 8.0, V8.0.61