If y’all will be working on an official implementation of this tomorrow, we’d also love to see this brought over from https://github.com/fastai/fastai/blob/master/fastai/text/interpret.py
def show_top_losses(self, k:int, max_len:int=70)->None:
"""
Create a tabulation showing the first `k` texts in top_losses along with their prediction, actual,loss, and probability of
actual class. `max_len` is the maximum number of tokens displayed.
"""
from IPython.display import display, HTML
items = []
tl_val,tl_idx = self.top_losses()
for i,idx in enumerate(tl_idx):
if k <= 0: break
k -= 1
tx,cl = self.data.dl(self.ds_type).dataset[idx]
cl = cl.data
classes = self.data.classes
txt = ' '.join(tx.text.split(' ')[:max_len]) if max_len is not None else tx.text
tmp = [txt, f'{classes[self.pred_class[idx]]}', f'{classes[cl]}', f'{self.losses[idx]:.2f}',
f'{self.preds[idx][cl]:.2f}']
items.append(tmp)
items = np.array(items)
names = ['Text', 'Prediction', 'Actual', 'Loss', 'Probability']
df = pd.DataFrame({n:items[:,i] for i,n in enumerate(names)}, columns=names)
with pd.option_context('display.max_colwidth', pd_max_colwidth()):
display(HTML(df.to_html(index=False)))
Originally mentioned here:
It would be really useful in fastai2.
If not, no problem. We all appreciate your work @sgugger and @muellerzr !