Spaces:
Sleeping
Sleeping
| import matplotlib.pyplot as plt | |
| from rdkit import Chem | |
| from rdkit.Chem import AllChem, QED | |
| from rdkit.Chem import Draw | |
| from rdkit.Chem.Draw import MolsToGridImage | |
| from rdkit import rdBase | |
| from rdkit.Chem import rdMolAlign | |
| import os, re | |
| from rdkit import RDConfig | |
| import pubchempy as pcp | |
| from PIL import Image | |
| from collections import Counter | |
| from langchain_core.tools import tool | |
| def name_node(smiles_list: list[str]) -> (list[str], str): | |
| ''' | |
| Queries Pubchem for the name of the molecule based on the smiles string. | |
| Args: | |
| smiles_list: the list of input smiles strings | |
| Returns: | |
| names_list: the list of names of the molecules | |
| name_string: a string of the tool results | |
| ''' | |
| print("name tool") | |
| print('===================================================') | |
| names = [] | |
| name_string = '' | |
| for smiles in smiles_list: | |
| try: | |
| res = pcp.get_compounds(smiles, "smiles") | |
| name = res[0].iupac_name | |
| names.append(name) | |
| name_string += f'{smiles}: IUPAC molecule name: {name}\n' | |
| print(smiles, name) | |
| syn_list = pcp.get_synonyms(res[0].cid) | |
| for alt_name in syn_list[0]['Synonym'][:5]: | |
| name_string += f'{smiles}: alternative or common name: {alt_name}\n' | |
| except: | |
| name = "unknown" | |
| name_string += f'{smiles}: Fail\n' | |
| return names, name_string, None | |
| def smiles_node(names_list: list[str]) -> (list[str], str): | |
| ''' | |
| Queries Pubchem for the smiles string of the molecule based on the name. | |
| Args: | |
| names_list: the list of molecule names | |
| Returns: | |
| smiles_list: the list of smiles strings of the molecules | |
| smiles_string: a string of the tool results | |
| ''' | |
| print("smiles tool") | |
| print('===================================================') | |
| smiles_list = [] | |
| smiles_string = '' | |
| for name in names_list: | |
| try: | |
| res = pcp.get_compounds(name, "name") | |
| smiles = res[0].smiles | |
| #smiles = smiles.replace('#','~') | |
| smiles_list.append(smiles) | |
| smiles_string += f'{name}: The SMILES string for the molecule is: {smiles}\n' | |
| except: | |
| smiles = "unknown" | |
| smiles_string += f'{name}: Fail\n' | |
| return smiles_list, smiles_string, None | |
| def related_node(smiles_list: list[str]) -> (list[list[str]], str, list): | |
| ''' | |
| Queries Pubchem for similar molecules based on the smiles string or name | |
| Args: | |
| smiles: the input smiles string, OR | |
| name: the molecule name | |
| Returns: | |
| total_similar_list: a list of lists of similar molecules | |
| related_string: a string of the tool results | |
| all_images: a list of images of the similar molecules | |
| ''' | |
| print("related tool") | |
| print('===================================================') | |
| total_similar_list = [] | |
| all_images = [] | |
| related_string = '' | |
| for smiles in smiles_list: | |
| try: | |
| res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=50) | |
| related_string += f'The following molecules are similar to {smiles}: \n' | |
| print('got related molecules with smiles') | |
| sub_smiles = [] | |
| i = 0 | |
| for compound in res: | |
| if i == 0: | |
| print(compound.iupac_name) | |
| i+=1 | |
| sub_smiles.append(compound.smiles) | |
| related_string += f'Name: {compound.iupac_name}\n' | |
| related_string += f'SMILES: {compound.smiles}\n' | |
| related_string += f'Molecular Weight: {compound.molecular_weight}\n' | |
| related_string += f'LogP: {compound.xlogp}\n' | |
| related_string += '===================\n' | |
| sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles] | |
| legend = [str(compound.smiles) for compound in res] | |
| total_similar_list.append(sub_smiles) | |
| img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250)) | |
| #pic = img.data | |
| all_images.append(img) | |
| except: | |
| related_string += f'{smiles}: Fail\n' | |
| total_similar_list.append([]) | |
| all_images.append(None) | |
| try: | |
| img.save('current_image.png') | |
| except: | |
| pic = img.data | |
| with open('current_image.png', 'wb') as f: | |
| f.write(pic) | |
| img = Image.open('current_image.png') | |
| return total_similar_list, related_string, img | |
| def structure_node(smiles_list: list[str]) -> (list[str], str, list): | |
| ''' | |
| Generates the 3D structure of the molecule based on the smiles string. | |
| Args: | |
| smiles: the input smiles string | |
| Returns: | |
| all_structures: a list of strings of the 3D structure of the molecule | |
| output_string: a string of the chemical formulae. | |
| all_images: a list of images of the 3D structure of the molecule | |
| ''' | |
| print("structure tool") | |
| all_mols = [] | |
| all_structures = [] | |
| output_string = '' | |
| for smile in smiles_list: | |
| mol = Chem.MolFromSmiles(smile) | |
| molH = Chem.AddHs(mol) | |
| AllChem.EmbedMolecule(molH) | |
| AllChem.MMFFOptimizeMolecule(molH) | |
| structure_string = "" | |
| all_symbols = [] | |
| for atom in molH.GetAtoms(): | |
| symbol = atom.GetSymbol() | |
| all_symbols.append(symbol) | |
| pos = molH.GetConformer().GetAtomPosition(atom.GetIdx()) | |
| structure_string += f'{symbol} {pos[0]} {pos[1]} {pos[2]}\n' | |
| atom_freqs = Counter(all_symbols) | |
| formula = ''.join([f'{atom}{count}' for atom, count in atom_freqs.items()]) | |
| output_string += f'For {smile}: Formula is: {formula}\n' | |
| all_structures.append(structure_string) | |
| all_mols.append(molH) | |
| img = Draw.MolsToGridImage(all_mols, molsPerRow=3, subImgSize=(250, 250)) | |
| #save the image as current_image.png | |
| try: | |
| img.save('current_image.png') | |
| except: | |
| pic = img.data | |
| with open('current_image.png', 'wb') as f: | |
| f.write(pic) | |
| img = Image.open('current_image.png') | |
| return all_structures, output_string, img |