Here is what I do to use embeddings from a neural net in a random forest:
def embed_features(learner, xs):
xs = xs.copy()
for i, feature in enumerate(learn.dls.cat_names):
emb = learner.model.embeds[i]
new_feat = pd.DataFrame(emb(tensor(xs[feature], dtype=torch.int64)), index=xs.index, columns=[f'{feature}_{j}' for j in range(emb.embedding_dim)])
xs.drop(columns=feature, inplace=True)
xs = xs.join(new_feat)
return xs
I used that function then like this:
embeded_xs = embed_features(learn, learn.dls.train.xs)
xs_valid = embed_features(learn, learn.dls.valid.xs)
Hope this helps anyone!