| | from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer |
| | from transformers import AutoModelForMaskedLM |
| | import numpy as np |
| | from scoring.functions.binding import BindingAffinity |
| | from scoring.functions.permeability import Permeability |
| | from scoring.functions.solubility import Solubility |
| | from scoring.functions.hemolysis import Hemolysis |
| | from scoring.functions.nonfouling import Nonfouling |
| |
|
| | base_path = '/path/to/your/home' |
| |
|
| | class ScoringFunctions: |
| | def __init__(self, score_func_names=None, prot_seqs=None, device=None): |
| | """ |
| | Class for generating score vectors given generated sequence |
| | |
| | Args: |
| | score_func_names: list of scoring function names to be evaluated |
| | score_weights: weights to scale scores (default: 1) |
| | target_protein: sequence of target protein binder |
| | """ |
| | emb_model = AutoModelForMaskedLM.from_pretrained('aaronfeller/PeptideCLM-23M-all').roformer.to(device).eval() |
| | tokenizer = SMILES_SPE_Tokenizer(f'{base_path}/TR2-D2/tr2d2-pep/tokenizer/new_vocab.txt', |
| | f'{base_path}/TR2-D2/tr2d2-pep/tokenizer/new_splits.txt') |
| | prot_seqs = prot_seqs if prot_seqs is not None else [] |
| | |
| | if score_func_names is None: |
| | |
| | self.score_func_names = [] |
| | else: |
| | self.score_func_names = score_func_names |
| | |
| | |
| | |
| | |
| | self.target_protein = prot_seqs |
| | print(len(prot_seqs)) |
| | |
| | if ('binding_affinity1' in score_func_names) and (len(prot_seqs) == 1): |
| | binding_affinity1 = BindingAffinity(prot_seqs[0], tokenizer=tokenizer, base_path=base_path, device=device) |
| | binding_affinity2 = None |
| | elif ('binding_affinity1' in score_func_names) and ('binding_affinity2' in score_func_names) and (len(prot_seqs) == 2): |
| | binding_affinity1 = BindingAffinity(prot_seqs[0], tokenizer=tokenizer, base_path=base_path, device=device) |
| | binding_affinity2 = BindingAffinity(prot_seqs[1], tokenizer=tokenizer, base_path=base_path, device=device) |
| | else: |
| | print("here") |
| | binding_affinity1 = None |
| | binding_affinity2 = None |
| |
|
| | permeability = Permeability(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model) |
| | sol = Solubility(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model) |
| | nonfouling = Nonfouling(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model) |
| | hemo = Hemolysis(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model) |
| |
|
| | self.all_funcs = {'binding_affinity1': binding_affinity1, |
| | 'binding_affinity2': binding_affinity2, |
| | 'permeability': permeability, |
| | 'nonfouling': nonfouling, |
| | 'solubility': sol, |
| | 'hemolysis': hemo |
| | } |
| | |
| | def forward(self, input_seqs): |
| | scores = [] |
| | |
| | for i, score_func in enumerate(self.score_func_names): |
| | score = self.all_funcs[score_func](input_seqs = input_seqs) |
| | |
| | scores.append(score) |
| | |
| | |
| | scores = np.float32(scores).T |
| | |
| | return scores |
| | |
| | def __call__(self, input_seqs: list): |
| | return self.forward(input_seqs) |