Spaces:
Sleeping
Sleeping
File size: 6,084 Bytes
426afd8 7863544 426afd8 68fe4a8 426afd8 68fe4a8 426afd8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import AllChem, QED
from rdkit.Chem import Draw
from rdkit.Chem.Draw import MolsToGridImage
from rdkit import rdBase
from rdkit.Chem import rdMolAlign
import os, re
from rdkit import RDConfig
import pubchempy as pcp
from PIL import Image
from collections import Counter
from langchain_core.tools import tool
@tool
def name_node(smiles_list: list[str]) -> (list[str], str):
'''
Queries Pubchem for the name of the molecule based on the smiles string.
Args:
smiles_list: the list of input smiles strings
Returns:
names_list: the list of names of the molecules
name_string: a string of the tool results
'''
print("name tool")
print('===================================================')
names = []
name_string = ''
for smiles in smiles_list:
try:
res = pcp.get_compounds(smiles, "smiles")
name = res[0].iupac_name
names.append(name)
name_string += f'{smiles}: IUPAC molecule name: {name}\n'
print(smiles, name)
syn_list = pcp.get_synonyms(res[0].cid)
for alt_name in syn_list[0]['Synonym'][:5]:
name_string += f'{smiles}: alternative or common name: {alt_name}\n'
except:
name = "unknown"
name_string += f'{smiles}: Fail\n'
return names, name_string, None
@tool
def smiles_node(names_list: list[str]) -> (list[str], str):
'''
Queries Pubchem for the smiles string of the molecule based on the name.
Args:
names_list: the list of molecule names
Returns:
smiles_list: the list of smiles strings of the molecules
smiles_string: a string of the tool results
'''
print("smiles tool")
print('===================================================')
smiles_list = []
smiles_string = ''
for name in names_list:
try:
res = pcp.get_compounds(name, "name")
smiles = res[0].smiles
#smiles = smiles.replace('#','~')
smiles_list.append(smiles)
smiles_string += f'{name}: The SMILES string for the molecule is: {smiles}\n'
except:
smiles = "unknown"
smiles_string += f'{name}: Fail\n'
return smiles_list, smiles_string, None
@tool
def related_node(smiles_list: list[str]) -> (list[list[str]], str, list):
'''
Queries Pubchem for similar molecules based on the smiles string or name
Args:
smiles: the input smiles string, OR
name: the molecule name
Returns:
total_similar_list: a list of lists of similar molecules
related_string: a string of the tool results
all_images: a list of images of the similar molecules
'''
print("related tool")
print('===================================================')
total_similar_list = []
all_images = []
related_string = ''
for smiles in smiles_list:
try:
res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=50)
related_string += f'The following molecules are similar to {smiles}: \n'
print('got related molecules with smiles')
sub_smiles = []
i = 0
for compound in res:
if i == 0:
print(compound.iupac_name)
i+=1
sub_smiles.append(compound.smiles)
related_string += f'Name: {compound.iupac_name}\n'
related_string += f'SMILES: {compound.smiles}\n'
related_string += f'Molecular Weight: {compound.molecular_weight}\n'
related_string += f'LogP: {compound.xlogp}\n'
related_string += '===================\n'
sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles]
legend = [str(compound.smiles) for compound in res]
total_similar_list.append(sub_smiles)
img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250))
#pic = img.data
all_images.append(img)
except:
related_string += f'{smiles}: Fail\n'
total_similar_list.append([])
all_images.append(None)
try:
img.save('current_image.png')
except:
pic = img.data
with open('current_image.png', 'wb') as f:
f.write(pic)
img = Image.open('current_image.png')
return total_similar_list, related_string, img
@tool
def structure_node(smiles_list: list[str]) -> (list[str], str, list):
'''
Generates the 3D structure of the molecule based on the smiles string.
Args:
smiles: the input smiles string
Returns:
all_structures: a list of strings of the 3D structure of the molecule
output_string: a string of the chemical formulae.
all_images: a list of images of the 3D structure of the molecule
'''
print("structure tool")
all_mols = []
all_structures = []
output_string = ''
for smile in smiles_list:
mol = Chem.MolFromSmiles(smile)
molH = Chem.AddHs(mol)
AllChem.EmbedMolecule(molH)
AllChem.MMFFOptimizeMolecule(molH)
structure_string = ""
all_symbols = []
for atom in molH.GetAtoms():
symbol = atom.GetSymbol()
all_symbols.append(symbol)
pos = molH.GetConformer().GetAtomPosition(atom.GetIdx())
structure_string += f'{symbol} {pos[0]} {pos[1]} {pos[2]}\n'
atom_freqs = Counter(all_symbols)
formula = ''.join([f'{atom}{count}' for atom, count in atom_freqs.items()])
output_string += f'For {smile}: Formula is: {formula}\n'
all_structures.append(structure_string)
all_mols.append(molH)
img = Draw.MolsToGridImage(all_mols, molsPerRow=3, subImgSize=(250, 250))
#save the image as current_image.png
try:
img.save('current_image.png')
except:
pic = img.data
with open('current_image.png', 'wb') as f:
f.write(pic)
img = Image.open('current_image.png')
return all_structures, output_string, img |