Spaces:

cafierom
/

MoDrAg2-OpenAI

Sleeping

File size: 6,084 Bytes

import matplotlib.pyplot as plt

from rdkit import Chem
from rdkit.Chem import AllChem, QED
from rdkit.Chem import Draw
from rdkit.Chem.Draw import MolsToGridImage
from rdkit import rdBase
from rdkit.Chem import rdMolAlign
import os, re
from rdkit import RDConfig
import pubchempy as pcp
from PIL import Image
from collections import Counter
from langchain_core.tools import tool

@tool
def name_node(smiles_list: list[str]) -> (list[str], str):
  '''

    Queries Pubchem for the name of the molecule based on the smiles string.

      Args:

        smiles_list: the list of input smiles strings

      Returns:

        names_list: the list of names of the molecules

        name_string: a string of the tool results

  '''
  print("name tool")
  print('===================================================')

  names = []
  name_string = ''
  for smiles in smiles_list:
    try:
        res = pcp.get_compounds(smiles, "smiles")
        name = res[0].iupac_name
        names.append(name)
        name_string += f'{smiles}: IUPAC molecule name: {name}\n'
        print(smiles, name)
        syn_list = pcp.get_synonyms(res[0].cid)
        for alt_name in syn_list[0]['Synonym'][:5]:
            name_string += f'{smiles}: alternative or common name: {alt_name}\n'
    except:
        name = "unknown"
        name_string += f'{smiles}: Fail\n'

  return names, name_string, None

@tool
def smiles_node(names_list: list[str]) -> (list[str], str):
  '''

    Queries Pubchem for the smiles string of the molecule based on the name.

      Args:

        names_list: the list of molecule names

      Returns:

        smiles_list: the list of smiles strings of the molecules    

        smiles_string: a string of the tool results

  '''
  print("smiles tool")
  print('===================================================')

  smiles_list = []
  smiles_string = ''
  for name in names_list:
    try:
        res = pcp.get_compounds(name, "name")
        smiles = res[0].smiles
        #smiles = smiles.replace('#','~')
        smiles_list.append(smiles)
        smiles_string += f'{name}: The SMILES string for the molecule is: {smiles}\n'
    except:
        smiles = "unknown"
        smiles_string += f'{name}: Fail\n'

  return smiles_list, smiles_string, None

@tool
def related_node(smiles_list: list[str]) -> (list[list[str]], str, list):
  '''

    Queries Pubchem for similar molecules based on the smiles string or name

      Args:

        smiles: the input smiles string, OR

        name: the molecule name

      Returns:

        total_similar_list: a list of lists of similar molecules

        related_string: a string of the tool results

        all_images: a list of images of the similar molecules

  '''
  print("related tool")
  print('===================================================')


  total_similar_list = []
  all_images = []
  related_string = ''
  for smiles in smiles_list:
    try:
        res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=50)
        related_string += f'The following molecules are similar to {smiles}: \n'
        print('got related molecules with smiles')

        sub_smiles = []

        i = 0
        for compound in res:
            if i == 0:
                print(compound.iupac_name)
                i+=1
            sub_smiles.append(compound.smiles)
            related_string += f'Name: {compound.iupac_name}\n'
            related_string += f'SMILES: {compound.smiles}\n'
            related_string += f'Molecular Weight: {compound.molecular_weight}\n'
            related_string += f'LogP: {compound.xlogp}\n'
            related_string += '===================\n'

        sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles]
        legend = [str(compound.smiles) for compound in res]

        total_similar_list.append(sub_smiles)
        img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250))
        #pic = img.data
        all_images.append(img)
    except:
        related_string += f'{smiles}: Fail\n'
        total_similar_list.append([])
        all_images.append(None)
  
  try:
    img.save('current_image.png')
  except:
    pic = img.data
    with open('current_image.png', 'wb') as f:
      f.write(pic)
  img = Image.open('current_image.png')

  return total_similar_list, related_string, img

@tool
def structure_node(smiles_list: list[str]) -> (list[str], str, list):
  '''

    Generates the 3D structure of the molecule based on the smiles string.

      Args:

        smiles: the input smiles string

      Returns:

        all_structures: a list of strings of the 3D structure of the molecule

        output_string: a string of the chemical formulae.

        all_images: a list of images of the 3D structure of the molecule

  '''
  print("structure tool")

  all_mols = []
  all_structures = []
  output_string = ''

  for smile in smiles_list:
    mol = Chem.MolFromSmiles(smile)
    molH = Chem.AddHs(mol)
    AllChem.EmbedMolecule(molH)
    AllChem.MMFFOptimizeMolecule(molH)

    structure_string = ""
    all_symbols = []
    for atom in molH.GetAtoms():
      symbol = atom.GetSymbol()
      all_symbols.append(symbol)
      pos = molH.GetConformer().GetAtomPosition(atom.GetIdx())
      structure_string += f'{symbol}  {pos[0]}  {pos[1]}  {pos[2]}\n'
      
    atom_freqs = Counter(all_symbols)
    formula = ''.join([f'{atom}{count}' for atom, count in atom_freqs.items()]) 

    output_string += f'For {smile}: Formula is: {formula}\n'
    all_structures.append(structure_string)
    all_mols.append(molH)
  
  img = Draw.MolsToGridImage(all_mols, molsPerRow=3, subImgSize=(250, 250))

  #save the image as current_image.png
  try:
    img.save('current_image.png')
  except:
    pic = img.data
    with open('current_image.png', 'wb') as f:
      f.write(pic)
  img = Image.open('current_image.png')
  
  return all_structures, output_string, img