import matplotlib.pyplot as plt from rdkit import Chem from rdkit.Chem import AllChem, QED from rdkit.Chem import Draw from rdkit.Chem.Draw import MolsToGridImage from rdkit import rdBase from rdkit.Chem import rdMolAlign import os, re from rdkit import RDConfig import pubchempy as pcp from PIL import Image from collections import Counter def name_node(smiles_list: list[str]) -> (list[str], str): ''' Queries Pubchem for the name of the molecule based on the smiles string. Args: smiles_list: the list of input smiles strings Returns: names_list: the list of names of the molecules name_string: a string of the tool results ''' print("name tool") print('===================================================') names = [] name_string = '' for smiles in smiles_list: try: res = pcp.get_compounds(smiles, "smiles") name = res[0].iupac_name names.append(name) name_string += f'{smiles}: IUPAC molecule name: {name}\n' print(smiles, name) syn_list = pcp.get_synonyms(res[0].cid) for alt_name in syn_list[0]['Synonym'][:5]: name_string += f'{smiles}: alternative or common name: {alt_name}\n' except: name = "unknown" name_string += f'{smiles}: Fail\n' return names, name_string, None def smiles_node(names_list: list[str]) -> (list[str], str): ''' Queries Pubchem for the smiles string of the molecule based on the name. Args: names_list: the list of molecule names Returns: smiles_list: the list of smiles strings of the molecules smiles_string: a string of the tool results ''' print("smiles tool") print('===================================================') smiles_list = [] smiles_string = '' for name in names_list: try: res = pcp.get_compounds(name, "name") smiles = res[0].smiles #smiles = smiles.replace('#','~') smiles_list.append(smiles) smiles_string += f'{name}: The SMILES string for the molecule is: {smiles}\n' except: smiles = "unknown" smiles_string += f'{name}: Fail\n' return smiles_list, smiles_string, None def related_node(smiles_list: list[str]) -> (list[list[str]], str, list): ''' Queries Pubchem for similar molecules based on the smiles string or name Args: smiles: the input smiles string, OR name: the molecule name Returns: total_similar_list: a list of lists of similar molecules related_string: a string of the tool results all_images: a list of images of the similar molecules ''' print("related tool") print('===================================================') total_similar_list = [] all_images = [] related_string = '' for smiles in smiles_list: try: res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=50) related_string += f'The following molecules are similar to {smiles}: \n' print('got related molecules with smiles') sub_smiles = [] i = 0 for compound in res: if i == 0: print(compound.iupac_name) i+=1 sub_smiles.append(compound.smiles) related_string += f'Name: {compound.iupac_name}\n' related_string += f'SMILES: {compound.smiles}\n' related_string += f'Molecular Weight: {compound.molecular_weight}\n' related_string += f'LogP: {compound.xlogp}\n' related_string += '===================\n' sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles] legend = [str(compound.smiles) for compound in res] total_similar_list.append(sub_smiles) img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250)) #pic = img.data all_images.append(img) except: related_string += f'{smiles}: Fail\n' total_similar_list.append([]) all_images.append(None) return total_similar_list, related_string, all_images def structure_node(smiles_list: list[str]) -> (list[str], str, list): ''' Generates the 3D structure of the molecule based on the smiles string. Args: smiles: the input smiles string Returns: all_structures: a list of strings of the 3D structure of the molecule output_string: a string of the chemical formulae. all_images: a list of images of the 3D structure of the molecule ''' print("structure tool") all_mols = [] all_structures = [] output_string = '' for smile in smiles_list: mol = Chem.MolFromSmiles(smile) molH = Chem.AddHs(mol) AllChem.EmbedMolecule(molH) AllChem.MMFFOptimizeMolecule(molH) structure_string = "" all_symbols = [] for atom in molH.GetAtoms(): symbol = atom.GetSymbol() all_symbols.append(symbol) pos = molH.GetConformer().GetAtomPosition(atom.GetIdx()) structure_string += f'{symbol} {pos[0]} {pos[1]} {pos[2]}\n' atom_freqs = Counter(all_symbols) formula = ''.join([f'{atom}{count}' for atom, count in atom_freqs.items()]) output_string += f'For {smile}: Formula is: {formula}\n' all_structures.append(structure_string) all_mols.append(molH) img = Draw.MolsToGridImage(all_mols, molsPerRow=3, subImgSize=(250, 250)) return all_structures, output_string, [img]