Hi, @Samuel. So, before you ask, I have already used ULMFiT model to get h_c from pretrained RNN. Here my code, that I used to get features for clustering purpose:
bptt,em_sz,nh,nl = 64,200,512,3
vs = len(itos)
opt_fn = partial(optim.Adam, betas=(0.8, 0.99))
bs = 48
len(test_clas)
# 10393
y = np.zeros((len(test_clas), 3*em_sz))
test_ds = TextDataset(test_clas, y)
test_dl = DataLoader(test_ds, bs, transpose=True, num_workers=1, pad_idx=1)
#some hacky :slight_smile: we do not need to train our model, so we just put test_dl as trn_dl parameter
md = ModelData(PATH, test_dl, None)
#define our custom head to just get h_c
class PoolingLinearClustering(nn.Module):
def __init__(self, layers, drops):
super().__init__()
self.layers = nn.ModuleList([
LinearBlock(layers[i], layers[i + 1], drops[i]) for i in range(len(layers) - 1)])
def pool(self, x, bs, is_max):
f = F.adaptive_max_pool1d if is_max else F.adaptive_avg_pool1d
return f(x.permute(1,2,0), (1,)).view(bs,-1)
def forward(self, input):
raw_outputs, outputs = input
output = outputs[-1]
sl,bs,_ = output.size()
avgpool = self.pool(output, bs, False)
mxpool = self.pool(output, bs, True)
x = torch.cat([output[-1], mxpool, avgpool], 1)
return x, raw_outputs, outputs
#define our function to get rnn learner with head defined above
def get_rnn_clustering(bptt, max_seq, n_tok, emb_sz, n_hid, n_layers, pad_token, layers=[em_sz*3], drops=None, bidir=False,
dropouth=0.3, dropouti=0.5, dropoute=0.1, wdrop=0.5):
rnn_enc = MultiBatchRNN(bptt, max_seq, n_tok, emb_sz, n_hid, n_layers, pad_token=pad_token, bidir=bidir,
dropouth=dropouth, dropouti=dropouti, dropoute=dropoute, wdrop=wdrop)
return SequentialRNN(rnn_enc, PoolingLinearClustering(layers, drops))
dps = np.array([0.4,0.5,0.05,0.3,0.4])*0.5
#get model
m = get_rnn_clustering(bptt, 20*70, vs, emb_sz=em_sz, n_hid=nh, n_layers=nl, pad_token=1,
layers=[em_sz*3],
drops=[dps[4], 0.1],
dropouti=dps[0], wdrop=dps[1], dropoute=dps[2], dropouth=dps[3])
#define learn as usual
opt_fn = partial(optim.Adam, betas=(0.7, 0.99))
learn = RNN_Learner(md, TextModel(to_gpu(m)), opt_fn=opt_fn)
learn.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
learn.clip=25.
learn.metrics = [accuracy]
#Here we just feed test_dl into data object:
learn.data.test_dl = test_dl
#Then use ordinal predict in test layer
predictions = learn.predict(is_test=True)
predictions.shape
# it will be equals (len(test_dl), 3*em_sz), in my case (10393, 600)