From what I can tell, to do text classification you call
get_rnn_classifier which creates the RNN part of the model plus the classifer. The classifier source code is as follows:
class LinearBlock(nn.Module): def __init__(self, ni, nf, drop): super().__init__() self.lin = nn.Linear(ni, nf) self.drop = nn.Dropout(drop) self.bn = nn.BatchNorm1d(ni) def forward(self, x): return self.lin(self.drop(self.bn(x))) class PoolingLinearClassifier(nn.Module): def __init__(self, layers, drops): super().__init__() self.layers = nn.ModuleList([ LinearBlock(layers[i], layers[i + 1], drops[i]) for i in range(len(layers) - 1)]) def pool(self, x, bs, is_max): f = F.adaptive_max_pool1d if is_max else F.adaptive_avg_pool1d return f(x.permute(1,2,0), (1,)).view(bs,-1) def forward(self, input): raw_outputs, outputs = input output = outputs[-1] sl,bs,_ = output.size() avgpool = self.pool(output, bs, False) mxpool = self.pool(output, bs, True) x = torch.cat([output[-1], mxpool, avgpool], 1) for l in self.layers: l_x = l(x) x = F.relu(l_x) return l_x, raw_outputs, outputs
To me this looks like dropout/BN/relu is being done on the final output layer. Isn’t this unusual? Why would you do these ops right before producing the log probabilities?
Apologies if this is answered somewhere in the video lectures, I just was poking through the source code trying to get an understanding of the model.