| from transformers import AutoModelForMaskedLM |
| import numpy as np |
| from tdc import Oracle, Evaluator |
|
|
| class MolScoringFunctions: |
| def __init__(self, score_func_names=None, device=None, sa_transform='inverse'): |
| """ |
| Class for generating score vectors given generated sequence |
| |
| Args: |
| score_func_names: list of scoring function names to be evaluated |
| score_weights: weights to scale scores (default: 1) |
| sa_transform: how to transform SA scores to higher-is-better ~[0,1]: |
| 'inverse' (default): 1/(1+SA) — range ~0.09-0.5, weak gradient |
| 'linear': (10-SA)/9 — range ~0-1, stronger gradient |
| """ |
| if score_func_names is None: |
| |
| self.score_func_names = [] |
| else: |
| self.score_func_names = score_func_names |
| |
| self.sa_transform = sa_transform |
|
|
| oracle_qed = Oracle('qed') |
| oracle_sa = Oracle('sa') |
|
|
| self.all_funcs = {'qed': oracle_qed, |
| 'sa': oracle_sa, |
| } |
| |
| def forward(self, input_seqs): |
| scores = [] |
| |
| for i, score_func in enumerate(self.score_func_names): |
| score = self.all_funcs[score_func](input_seqs) |
| |
| |
| |
| if score_func == 'sa': |
| if self.sa_transform == 'linear': |
| score = (10.0 - np.array(score)) / 9.0 |
| score = np.maximum(score, 0.0) |
| else: |
| score = 1.0 / (1.0 + np.array(score)) |
| |
| scores.append(score) |
| |
| |
| scores = np.float32(scores).T |
| |
| return scores |
| |
| def __call__(self, input_seqs: list): |
| return self.forward(input_seqs) |
|
|
|
|
| def unittest(): |
| scoring = MolScoringFunctions(score_func_names=['qed', 'sa']) |
| |
| smiles = ['CCOc1cc(ccc1NC(=O)N[C@@H]2CCCC[C@@H]2O)F'] |
| |
| scores = scoring(input_seqs=smiles) |
| print(scores) |
| print(len(scores)) |
|
|
| if __name__ == '__main__': |
| unittest() |