Hi guys,
I’m trying to implement a custom metric that returns the number of valid SMILES strings (it’s a chemical notation to describe a molecule in 1-line) during training of a LSTM model.
So far, I wrote a callback that samples the model each epoch and it works fine. I can print the sampled SMILES and see what is being generated. Here’s my callback and my test metric:
#export
class MolSampler_V2(Callback):
def __init__(self, text:str='', max_size:int=30, temperature:float=1.0, max_mols:int=5):
super().__init__()
self.text = text
self.max_size = max_size
self.temperature = temperature
self.max_mols = max_mols
def sampling(self):
act = getattr(learn.loss_func, 'activation', noop)
self.model.reset() # Reset the model
stop_index = self.dls.train.vocab.index(BOS) # Define the stop token
idxs = self.dls.test_dl([self.text]).items[0].to(self.dls.device)
nums = self.dls.train_ds.numericalize # Numericalize (used to decode)
accum_idxs = [] # Store predicted tokens
for _ in range(self.max_size):
with torch.no_grad(): preds=self.model(idxs[None])[0][-1]
res = act(preds)
if self.temperature != 1.: res.pow_(1 / self.temperature)
idx = torch.multinomial(res, 1).item()
if idx != stop_index:
accum_idxs.append(idx)
idxs = TensorText(idxs.new_tensor([idx]))
else:
break
decoded = ''.join([nums.vocab[o] for o in accum_idxs if nums.vocab[o] not in [BOS, PAD]]) # Decode predicted tokens
return decoded
def before_epoch(self):
self.learn.smiles = []
def after_epoch(self):
self.learn.smiles += [self.sampling() for _ in range(self.max_mols)]
def func(self):
#print(self.learn.smiles)
return len(self.learn.smiles)
class TestMetric(ValueMetric):
@property
def value(self): return self.func
What I want to do now is to create a metric that returns len(self.learn.smiles)
. However, when I create my learner I get an error: AttributeError: 'NoneType' object has no attribute 'smiles'
.
Anybody tried something similar? I think this might be easier than what I’m doing, but I really cant see it.