File size: 4,322 Bytes
baeba65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
from modrag_protein_functions import uniprot_node, listbioactives_node, getbioactives_node, predict_node, docking_node
from modrag_molecule_functions import smiles_node
def get_actives_for_protein(query_protein: str):
'''
Finds Bioactive molecules for a give protein. Uses Uniprot to find chembl IDs for the
protein, and then queries chembl for bioative molecules.
Args:
query_protein: The protein to search for.
Returns:
bioactives_for_protein_string: A string containing bioactive molecules.
bioacts_images: A list of images of the bioactives.
'''
bioactives_for_protein_string = ''
# find UP accession codes for protein
up_ac_list, ids_string, _ = uniprot_node([query_protein])
bioactives_for_protein_string += 'Found the following Uniprot ACs: \n'
for up_ac in up_ac_list[0]:
bioactives_for_protein_string += up_ac + ', \n'
# find chembl IDs for each accession code
bioacts, chembl_string, chembl_ids = listbioactives_node(up_ac_list[0])
bioactives_for_protein_string += 'Found the following chembl IDs: \n'
#check for chembl IDs with bioactives:
largest = 0
for bioact_num, chemblid in zip(bioacts[0], chembl_ids[0]):
bioactives_for_protein_string += f'{chemblid}: {bioact_num} bioactive molecules. \n'
if bioact_num > largest:
largest = bioact_num
largest_id = chemblid
if largest == 0:
return 'No bioactives found for protein', None
bioactives_for_protein_string += f'Chose the Chembl ID {largest_id} with {largest} bioactive molecules. \n'
# get list of bioactives for best chembl_id
bioacts, bioacts_string, bioacts_images = getbioactives_node([largest_id])
bioactives_for_protein_string += bioacts_string
return bioacts, bioactives_for_protein_string, bioacts_images
def get_predictions_for_protein(smiles_list: list[str], query_protein: str):
'''
Uses Uniprot to find chembl IDs for the protein, and then queries chembl for
bioactive molecules to train a model and predict the activity of the given smiles.
Args:
smiles_list: A list of SMILES strings.
query_protein: The protein to search for.
Returns:
bioactives_for_protein_string: A string containing bioactive molecules.
bioacts_images: A list of images of the bioactives.
'''
predictions_string = ''
# find UP accession codes for protein
up_ac_list, ids_string, _ = uniprot_node([query_protein])
predictions_string += 'Found the following Uniprot ACs: \n'
for up_ac in up_ac_list[0]:
predictions_string += up_ac + ', \n'
# find chembl IDs for each accession code
bioacts, chembl_string, chembl_ids = listbioactives_node(up_ac_list[0])
predictions_string += 'Found the following chembl IDs: \n'
#check for chembl IDs with bioactives:
largest = 0
for bioact_num, chemblid in zip(bioacts[0], chembl_ids[0]):
predictions_string += f'{chemblid}: {bioact_num} bioactive molecules. \n'
if bioact_num > largest:
largest = bioact_num
largest_id = chemblid
if largest == 0:
return [], 'No bioactives found for protein'
predictions_string += f'Chose the Chembl ID {largest_id} with {largest} bioactive molecules. \n'
# train the model on the chembl ID and then predict
preds, preds_string, _ = predict_node(smiles_list, largest_id)
predictions_string += preds_string
return preds, predictions_string, None
def dock_from_names(names_list: list[str], protein: str):
'''
Accepts names of molecules and docks them in a given protein.
Args:
names_list: A list of names of molecules.
protein: The protein to dock in.
Returns:
dock_from_names_string: A string containing the docking scores for the molecules.
'''
dock_from_names_string = ''
# get SMILES for names:
smiles_list, smiles_string, _ = smiles_node(names_list)
for smiles, names in zip(smiles_list, names_list):
dock_from_names_string += f'The SMILES for {names} is {smiles}. \n'
#send for docking
scores, scores_string, _ = docking_node(smiles_list, protein)
for score, name in zip(scores, names_list):
dock_from_names_string += f'The docking score for {name} is {score}. \n'
return scores, dock_from_names_string, None
|