File size: 6,084 Bytes
426afd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7863544
 
 
 
 
 
426afd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68fe4a8
 
 
 
 
 
426afd8
68fe4a8
426afd8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import matplotlib.pyplot as plt

from rdkit import Chem
from rdkit.Chem import AllChem, QED
from rdkit.Chem import Draw
from rdkit.Chem.Draw import MolsToGridImage
from rdkit import rdBase
from rdkit.Chem import rdMolAlign
import os, re
from rdkit import RDConfig
import pubchempy as pcp
from PIL import Image
from collections import Counter
from langchain_core.tools import tool

@tool
def name_node(smiles_list: list[str]) -> (list[str], str):
  '''

    Queries Pubchem for the name of the molecule based on the smiles string.

      Args:

        smiles_list: the list of input smiles strings

      Returns:

        names_list: the list of names of the molecules

        name_string: a string of the tool results

  '''
  print("name tool")
  print('===================================================')

  names = []
  name_string = ''
  for smiles in smiles_list:
    try:
        res = pcp.get_compounds(smiles, "smiles")
        name = res[0].iupac_name
        names.append(name)
        name_string += f'{smiles}: IUPAC molecule name: {name}\n'
        print(smiles, name)
        syn_list = pcp.get_synonyms(res[0].cid)
        for alt_name in syn_list[0]['Synonym'][:5]:
            name_string += f'{smiles}: alternative or common name: {alt_name}\n'
    except:
        name = "unknown"
        name_string += f'{smiles}: Fail\n'

  return names, name_string, None

@tool
def smiles_node(names_list: list[str]) -> (list[str], str):
  '''

    Queries Pubchem for the smiles string of the molecule based on the name.

      Args:

        names_list: the list of molecule names

      Returns:

        smiles_list: the list of smiles strings of the molecules    

        smiles_string: a string of the tool results

  '''
  print("smiles tool")
  print('===================================================')

  smiles_list = []
  smiles_string = ''
  for name in names_list:
    try:
        res = pcp.get_compounds(name, "name")
        smiles = res[0].smiles
        #smiles = smiles.replace('#','~')
        smiles_list.append(smiles)
        smiles_string += f'{name}: The SMILES string for the molecule is: {smiles}\n'
    except:
        smiles = "unknown"
        smiles_string += f'{name}: Fail\n'

  return smiles_list, smiles_string, None

@tool
def related_node(smiles_list: list[str]) -> (list[list[str]], str, list):
  '''

    Queries Pubchem for similar molecules based on the smiles string or name

      Args:

        smiles: the input smiles string, OR

        name: the molecule name

      Returns:

        total_similar_list: a list of lists of similar molecules

        related_string: a string of the tool results

        all_images: a list of images of the similar molecules

  '''
  print("related tool")
  print('===================================================')


  total_similar_list = []
  all_images = []
  related_string = ''
  for smiles in smiles_list:
    try:
        res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=50)
        related_string += f'The following molecules are similar to {smiles}: \n'
        print('got related molecules with smiles')

        sub_smiles = []

        i = 0
        for compound in res:
            if i == 0:
                print(compound.iupac_name)
                i+=1
            sub_smiles.append(compound.smiles)
            related_string += f'Name: {compound.iupac_name}\n'
            related_string += f'SMILES: {compound.smiles}\n'
            related_string += f'Molecular Weight: {compound.molecular_weight}\n'
            related_string += f'LogP: {compound.xlogp}\n'
            related_string += '===================\n'

        sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles]
        legend = [str(compound.smiles) for compound in res]

        total_similar_list.append(sub_smiles)
        img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250))
        #pic = img.data
        all_images.append(img)
    except:
        related_string += f'{smiles}: Fail\n'
        total_similar_list.append([])
        all_images.append(None)
  
  try:
    img.save('current_image.png')
  except:
    pic = img.data
    with open('current_image.png', 'wb') as f:
      f.write(pic)
  img = Image.open('current_image.png')

  return total_similar_list, related_string, img

@tool
def structure_node(smiles_list: list[str]) -> (list[str], str, list):
  '''

    Generates the 3D structure of the molecule based on the smiles string.

      Args:

        smiles: the input smiles string

      Returns:

        all_structures: a list of strings of the 3D structure of the molecule

        output_string: a string of the chemical formulae.

        all_images: a list of images of the 3D structure of the molecule

  '''
  print("structure tool")

  all_mols = []
  all_structures = []
  output_string = ''

  for smile in smiles_list:
    mol = Chem.MolFromSmiles(smile)
    molH = Chem.AddHs(mol)
    AllChem.EmbedMolecule(molH)
    AllChem.MMFFOptimizeMolecule(molH)

    structure_string = ""
    all_symbols = []
    for atom in molH.GetAtoms():
      symbol = atom.GetSymbol()
      all_symbols.append(symbol)
      pos = molH.GetConformer().GetAtomPosition(atom.GetIdx())
      structure_string += f'{symbol}  {pos[0]}  {pos[1]}  {pos[2]}\n'
      
    atom_freqs = Counter(all_symbols)
    formula = ''.join([f'{atom}{count}' for atom, count in atom_freqs.items()]) 

    output_string += f'For {smile}: Formula is: {formula}\n'
    all_structures.append(structure_string)
    all_mols.append(molH)
  
  img = Draw.MolsToGridImage(all_mols, molsPerRow=3, subImgSize=(250, 250))

  #save the image as current_image.png
  try:
    img.save('current_image.png')
  except:
    pic = img.data
    with open('current_image.png', 'wb') as f:
      f.write(pic)
  img = Image.open('current_image.png')
  
  return all_structures, output_string, img