File size: 4,322 Bytes
baeba65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from modrag_protein_functions import uniprot_node, listbioactives_node, getbioactives_node, predict_node, docking_node
from modrag_molecule_functions import smiles_node

def get_actives_for_protein(query_protein: str):
  '''

  Finds Bioactive molecules for a give protein. Uses Uniprot to find chembl IDs for the 

  protein, and then queries chembl for bioative molecules. 



  Args:

    query_protein: The protein to search for.



  Returns:

    bioactives_for_protein_string: A string containing bioactive molecules.

    bioacts_images: A list of images of the bioactives.

  '''

  bioactives_for_protein_string = ''

  # find UP accession codes for protein
  up_ac_list, ids_string, _ = uniprot_node([query_protein])

  bioactives_for_protein_string += 'Found the following Uniprot ACs: \n'
  for up_ac in up_ac_list[0]:
    bioactives_for_protein_string += up_ac + ', \n'

  # find chembl IDs for each accession code
  bioacts, chembl_string, chembl_ids = listbioactives_node(up_ac_list[0])

  bioactives_for_protein_string += 'Found the following chembl IDs: \n'
   
  #check for chembl IDs with bioactives:
  largest = 0
  for bioact_num, chemblid in zip(bioacts[0], chembl_ids[0]):
    bioactives_for_protein_string += f'{chemblid}: {bioact_num} bioactive molecules. \n'
    if bioact_num > largest:
      largest = bioact_num
      largest_id = chemblid
  if largest == 0:
    return 'No bioactives found for protein', None

  bioactives_for_protein_string += f'Chose the Chembl ID {largest_id} with {largest} bioactive molecules. \n'
  # get list of bioactives for best chembl_id
  bioacts, bioacts_string, bioacts_images = getbioactives_node([largest_id])

  bioactives_for_protein_string += bioacts_string

  return bioacts, bioactives_for_protein_string, bioacts_images

def get_predictions_for_protein(smiles_list: list[str], query_protein: str):
  '''

  

  Uses Uniprot to find chembl IDs for the protein, and then queries chembl for 

  bioactive molecules to train a model and predict the activity of the given smiles.



  Args:

    smiles_list: A list of SMILES strings.

    query_protein: The protein to search for.



  Returns:

    bioactives_for_protein_string: A string containing bioactive molecules.

    bioacts_images: A list of images of the bioactives.

  '''

  predictions_string = ''

  # find UP accession codes for protein
  up_ac_list, ids_string, _ = uniprot_node([query_protein])

  predictions_string += 'Found the following Uniprot ACs: \n'
  for up_ac in up_ac_list[0]:
    predictions_string += up_ac + ', \n'

  # find chembl IDs for each accession code
  bioacts, chembl_string, chembl_ids = listbioactives_node(up_ac_list[0])

  predictions_string += 'Found the following chembl IDs: \n'
   
  #check for chembl IDs with bioactives:
  largest = 0
  for bioact_num, chemblid in zip(bioacts[0], chembl_ids[0]):
    predictions_string += f'{chemblid}: {bioact_num} bioactive molecules. \n'
    if bioact_num > largest:
      largest = bioact_num
      largest_id = chemblid
  if largest == 0:
    return [], 'No bioactives found for protein'

  predictions_string += f'Chose the Chembl ID {largest_id} with {largest} bioactive molecules. \n'
  # train the model on the chembl ID and then predict
  preds, preds_string, _ = predict_node(smiles_list, largest_id)

  predictions_string += preds_string

  return preds, predictions_string, None

def dock_from_names(names_list: list[str], protein: str):
  '''

  Accepts names of molecules and docks them in a given protein.



  Args:

    names_list: A list of names of molecules.

    protein: The protein to dock in.



  Returns:

    dock_from_names_string: A string containing the docking scores for the molecules.

  '''
  dock_from_names_string = ''

  # get SMILES for names:
  smiles_list, smiles_string, _ = smiles_node(names_list)

  for smiles, names in zip(smiles_list, names_list):
    dock_from_names_string += f'The SMILES for {names} is {smiles}. \n'

  #send for docking
  scores, scores_string, _ = docking_node(smiles_list, protein)

  for score, name in zip(scores, names_list):
    dock_from_names_string += f'The docking score for {name} is {score}. \n'

  return scores, dock_from_names_string, None