Spaces:

cafierom
/

MoDrAg2

Sleeping

App Files Files Community

cafierom commited on Feb 9

Commit

706c748

verified ·

1 Parent(s): 14d79fa

Upload 4 files

Browse files

Files changed (4) hide show

app.py +1 -0
input_parsing.py +8 -2
intake_function.py +3 -2
modrag_molecule_functions.py +43 -1

app.py CHANGED Viewed

@@ -660,6 +660,7 @@ full_tool_descriptions = {
   'smiles_node' : 'Queries Pubchem for the smiles string of the molecule based on the name.',
   'name_node' : 'Queries Pubchem for the name of the molecule based on the smiles string.',
   'related_node' : 'Queries Pubchem for similar molecules based on the smiles string or name.',
   'substitution_node' : 'A simple substitution routine that looks for a substituent on a phenyl ring and\
 substitutes different fragments in that location. Returns a list of novel molecules and their\
 QED score (1 is most drug-like, 0 is least drug-like).',

   'smiles_node' : 'Queries Pubchem for the smiles string of the molecule based on the name.',
   'name_node' : 'Queries Pubchem for the name of the molecule based on the smiles string.',
   'related_node' : 'Queries Pubchem for similar molecules based on the smiles string or name.',
+  'structure_node' : 'Generates the 3D structure and chemical formula of the molecule based on the name or smiles string.',
   'substitution_node' : 'A simple substitution routine that looks for a substituent on a phenyl ring and\
 substitutes different fragments in that location. Returns a list of novel molecules and their\
 QED score (1 is most drug-like, 0 is least drug-like).',

input_parsing.py CHANGED Viewed

@@ -3,7 +3,7 @@ from gliner import GLiNER
 import re
 import numpy as np
 from rdkit import Chem
-from modrag_molecule_functions import name_node, smiles_node, related_node
 from modrag_task_graphs import get_actives_for_protein, get_predictions_for_protein, dock_from_names
 from modrag_protein_functions import uniprot_node, listbioactives_node, getbioactives_node, predict_node, gpt_node, pdb_node, find_node, docking_node, target_node
 from modrag_property_functions import substitution_node, lipinski_node, pharmfeature_node
@@ -38,7 +38,7 @@ Find the similarity in the pharmacophores between ibuprofen and aspirin.',
     'name_node': 'Find the name of this molecule c1cc(O)ccc1',
     'smiles_node': 'Finds SMILES strings for cyclohexane and aspirin',
     'related_node': 'Find molecules similar to c1cc(O)ccc1',
     # modrag_task_graphs.py
     'get_actives_for_protein': 'Find the bioactive molecules for the protein DNA gyrase.',
     'get_predictions_for_protein': 'Predict the IC50 value for c1cc(O)ccc1 in the protein DNA gyrase.',
@@ -251,6 +251,9 @@ def define_tool_hash(tool: str, proteins_list, names_list, diseases_list, smiles
   elif tool == 'related_node':
     tool_function_hash = {
         'related_node': [related_node, [smiles_list]]}
   elif tool == 'get_predictions_for_protein':
     tool_function_hash = {
         'get_predictions_for_protein': [get_predictions_for_protein, [smiles_list, proteins_list[0]]]}
@@ -325,6 +328,9 @@ def define_tool_reqs(tool: str, proteins_list, names_list, diseases_list, smiles
   elif tool == 'related_node':
     tool_function_reqs = {
         'related_node': [[smiles_list], ['SMILES strings']]}
   elif tool == 'get_predictions_for_protein':
     tool_function_reqs = {
         'get_predictions_for_protein': [[smiles_list, proteins_list], ['SMILES strings', 'protein names']]}

 import re
 import numpy as np
 from rdkit import Chem
+from modrag_molecule_functions import name_node, smiles_node, related_node, structure_node
 from modrag_task_graphs import get_actives_for_protein, get_predictions_for_protein, dock_from_names
 from modrag_protein_functions import uniprot_node, listbioactives_node, getbioactives_node, predict_node, gpt_node, pdb_node, find_node, docking_node, target_node
 from modrag_property_functions import substitution_node, lipinski_node, pharmfeature_node
     'name_node': 'Find the name of this molecule c1cc(O)ccc1',
     'smiles_node': 'Finds SMILES strings for cyclohexane and aspirin',
     'related_node': 'Find molecules similar to c1cc(O)ccc1',
+    'structure_node': 'Find the structure of the molecule with SMILES string c1cc(O)ccc1, or the name Aspirin.',
     # modrag_task_graphs.py
     'get_actives_for_protein': 'Find the bioactive molecules for the protein DNA gyrase.',
     'get_predictions_for_protein': 'Predict the IC50 value for c1cc(O)ccc1 in the protein DNA gyrase.',
   elif tool == 'related_node':
     tool_function_hash = {
         'related_node': [related_node, [smiles_list]]}
+  elif tool == 'structure_node':
+    tool_function_hash = {
+        'structure_node': [structure_node, [smiles_list]]}
   elif tool == 'get_predictions_for_protein':
     tool_function_hash = {
         'get_predictions_for_protein': [get_predictions_for_protein, [smiles_list, proteins_list[0]]]}
   elif tool == 'related_node':
     tool_function_reqs = {
         'related_node': [[smiles_list], ['SMILES strings']]}
+  elif tool == 'structure_node':
+    tool_function_reqs = {
+        'structure_node': [[smiles_list], ['SMILES strings']]}
   elif tool == 'get_predictions_for_protein':
     tool_function_reqs = {
         'get_predictions_for_protein': [[smiles_list, proteins_list], ['SMILES strings', 'protein names']]}

intake_function.py CHANGED Viewed

@@ -637,6 +637,7 @@ full_tool_descriptions = {
   'smiles_node' : 'Queries Pubchem for the smiles string of the molecule based on the name.',
   'name_node' : 'Queries Pubchem for the name of the molecule based on the smiles string.',
   'related_node' : 'Queries Pubchem for similar molecules based on the smiles string or name.',
   'substitution_node' : 'A simple substitution routine that looks for a substituent on a phenyl ring and\
 substitutes different fragments in that location. Returns a list of novel molecules and their\
 QED score (1 is most drug-like, 0 is least drug-like).',
@@ -717,14 +718,14 @@ def websearch_node(query: str, embed_model, proxy_flag: bool = True) -> (list[st
       max_hits = len(scores)
     top_hits = []
     hits_idx = 0
-    while hits_idx < 10:
       current_hit_idx = np.argmax(scores[0])
       current_score = scores[0][current_hit_idx].item()
       top_hits.append((titles[current_hit_idx], links[current_hit_idx], current_score))
       scores[0][current_hit_idx] = -1
       hits_idx += 1
-    search_string = f'The top 10 hits for your query are:\n'
     i = 0
     for title, link, score in top_hits:
       search_string += f'{i}. {title}\nLink: {link}\nScore: {score:.3f}\n\n'

   'smiles_node' : 'Queries Pubchem for the smiles string of the molecule based on the name.',
   'name_node' : 'Queries Pubchem for the name of the molecule based on the smiles string.',
   'related_node' : 'Queries Pubchem for similar molecules based on the smiles string or name.',
+  'structure_node' : 'Generates the 3D structure and chemical formula of the molecule based on the name or smiles string.',
   'substitution_node' : 'A simple substitution routine that looks for a substituent on a phenyl ring and\
 substitutes different fragments in that location. Returns a list of novel molecules and their\
 QED score (1 is most drug-like, 0 is least drug-like).',
       max_hits = len(scores)
     top_hits = []
     hits_idx = 0
+    while hits_idx < max_hits:
       current_hit_idx = np.argmax(scores[0])
       current_score = scores[0][current_hit_idx].item()
       top_hits.append((titles[current_hit_idx], links[current_hit_idx], current_score))
       scores[0][current_hit_idx] = -1
       hits_idx += 1
+    search_string = f'The top {max_hits} hits for your query are:\n'
     i = 0
     for title, link, score in top_hits:
       search_string += f'{i}. {title}\nLink: {link}\nScore: {score:.3f}\n\n'

modrag_molecule_functions.py CHANGED Viewed

@@ -10,6 +10,7 @@ import os, re
 from rdkit import RDConfig
 import pubchempy as pcp
 from PIL import Image
 def name_node(smiles_list: list[str]) -> (list[str], str):
   '''
@@ -118,4 +119,45 @@ def related_node(smiles_list: list[str]) -> (list[list[str]], str, list):
         total_similar_list.append([])
         all_images.append(None)
-  return total_similar_list, related_string, all_images

 from rdkit import RDConfig
 import pubchempy as pcp
 from PIL import Image
+from collections import Counter
 def name_node(smiles_list: list[str]) -> (list[str], str):
   '''
         total_similar_list.append([])
         all_images.append(None)
+  return total_similar_list, related_string, all_images
+def structure_node(smiles_list: list[str]) -> (list[str], str, list):
+  '''
+    Generates the 3D structure of the molecule based on the smiles string.
+      Args:
+        smiles: the input smiles string
+      Returns:
+        all_structures: a list of strings of the 3D structure of the molecule
+        output_string: a string of the chemical formulae.
+        all_images: a list of images of the 3D structure of the molecule
+  '''
+  print("structure tool")
+  all_mols = []
+  all_structures = []
+  output_string = ''
+  for smile in smiles_list:
+    mol = Chem.MolFromSmiles(smile)
+    molH = Chem.AddHs(mol)
+    AllChem.EmbedMolecule(molH)
+    AllChem.MMFFOptimizeMolecule(molH)
+    structure_string = ""
+    all_symbols = []
+    for atom in molH.GetAtoms():
+      symbol = atom.GetSymbol()
+      all_symbols.append(symbol)
+      pos = molH.GetConformer().GetAtomPosition(atom.GetIdx())
+      structure_string += f'{symbol}  {pos[0]}  {pos[1]}  {pos[2]}\n'
+    atom_freqs = Counter(all_symbols)
+    formula = ''.join([f'{atom}{count}' for atom, count in atom_freqs.items()])
+    output_string += f'For {smile}: Formula is: {formula}\n'
+    all_structures.append(structure_string)
+    all_mols.append(molH)
+  img = Draw.MolsToGridImage(all_mols, molsPerRow=3, subImgSize=(250, 250))
+  return all_structures, output_string, [img]