Thanks for the quick turnaround!!
In this code:
def intrinsic_attention(learn, text, class_id=None):
"Calculate the intrinsic attention of the input w.r.t to an output `class_id`, or the classification given by the model if `None`."
learn.model.train()
_eval_dropouts(learn.model)
learn.model.zero_grad()
learn.model.reset()
dl = dls.test_dl([text])
ids = dl.one_batch()[0]
emb = learn.model[0].module.encoder(batch).detach().requires_grad_(True)
lstm = learn.model[0].module(emb, True)
learn.model.eval()
cl = learn.model[1]((lstm, torch.zeros_like(batch).bool(),))[0].softmax(dim=-1)
if class_id is None: class_id = cl.argmax()
cl[0][class_id].backward()
attn = emb.grad.squeeze().abs().sum(dim=-1)
attn /= attn.max()
tok, _ = learn.dls.decode_batch((*tuplify(batch), *tuplify(cl)))[0]
return tok, attn
I got this error:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-62-139f078376c4> in <module>
----> 1 show_intrinsic_attention(learn,"Superman is the best superhero! No one will ever defeat him!")
<ipython-input-61-ec8033bf013f> in show_intrinsic_attention(learn, text, class_id, **kwargs)
55
56 def show_intrinsic_attention(learn, text:str, class_id:int=None, **kwargs)->None:
---> 57 text, attn = intrinsic_attention(learn, text, class_id)
58 show_piece_attn(text.split(), to_np(attn), **kwargs)
<ipython-input-61-ec8033bf013f> in intrinsic_attention(learn, text, class_id)
15 learn.model.zero_grad()
16 learn.model.reset()
---> 17 dl = dls.test_dl([text])
18 ids = dl.one_batch()[0]
19 emb = learn.model[0].module.encoder(batch).detach().requires_grad_(True)
NameError: name 'dls' is not defined
Which I fixed by changing this line:
dl = dls.test_dl([text])
to:
dl = learn.dls.test_dl([text])
Then I got:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-64-139f078376c4> in <module>
----> 1 show_intrinsic_attention(learn,"Superman is the best superhero! No one will ever defeat him!")
<ipython-input-63-86ff1b978f06> in show_intrinsic_attention(learn, text, class_id, **kwargs)
55
56 def show_intrinsic_attention(learn, text:str, class_id:int=None, **kwargs)->None:
---> 57 text, attn = intrinsic_attention(learn, text, class_id)
58 show_piece_attn(text.split(), to_np(attn), **kwargs)
<ipython-input-63-86ff1b978f06> in intrinsic_attention(learn, text, class_id)
17 dl = learn.dls.test_dl([text])
18 ids = dl.one_batch()[0]
---> 19 emb = learn.model[0].module.encoder(batch).detach().requires_grad_(True)
20 lstm = learn.model[0].module(emb, True)
21 learn.model.eval()
NameError: name 'batch' is not defined
Which I fixed by changing line 18:
ids = dl.one_batch()[0]
to:
batch = dl.one_batch()[0]
Since I didn’t see that “ids” was used anywhere. Now the output has all the words highlighted the same, and it’s showing nan outputs:
Here’s how I created the learners for both (I only use one of the DataBlocks at a time):
#This is for a normal category prediction, where only one can be correct.
imdb_clas = DataBlock(blocks=(TextBlock.from_df(['names'], vocab=dbunch.vocab), CategoryBlock),
get_x=attrgetter('text'),
get_y=attrgetter('number'),
splitter=TrainTestSplitter(test_size = 0.2, stratify=df_numbers['number'], random_state = 12))
#This is a regression. Use this to predict a floating point number.
imdb_clas = DataBlock(blocks=(TextBlock.from_df(['names'], vocab=dbunch.vocab), RegressionBlock),
get_x=attrgetter('text'),
get_y=attrgetter('number'),
splitter=TrainTestSplitter(test_size = 0.1, stratify=df_scores['number'], df=df_numbers, random_state = 24)
)
#For regressions
callbacks = [SaveModelCallback(),EarlyStoppingCallback(patience=3)]
learn = text_classifier_learner(dbunch_class, AWD_LSTM, drop_mult=0.5, loss_func=MSELossFlat(), wd = 0.1, y_range=(-3,3), cbs=callbacks).to_fp16()
learn = learn.load_encoder('finetuned6_208.pkl')
Thanks for the help!