MoDrAg2 / modrag_molecule_functions.py
cafierom's picture
Upload 4 files
706c748 verified
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import AllChem, QED
from rdkit.Chem import Draw
from rdkit.Chem.Draw import MolsToGridImage
from rdkit import rdBase
from rdkit.Chem import rdMolAlign
import os, re
from rdkit import RDConfig
import pubchempy as pcp
from PIL import Image
from collections import Counter
def name_node(smiles_list: list[str]) -> (list[str], str):
'''
Queries Pubchem for the name of the molecule based on the smiles string.
Args:
smiles_list: the list of input smiles strings
Returns:
names_list: the list of names of the molecules
name_string: a string of the tool results
'''
print("name tool")
print('===================================================')
names = []
name_string = ''
for smiles in smiles_list:
try:
res = pcp.get_compounds(smiles, "smiles")
name = res[0].iupac_name
names.append(name)
name_string += f'{smiles}: IUPAC molecule name: {name}\n'
print(smiles, name)
syn_list = pcp.get_synonyms(res[0].cid)
for alt_name in syn_list[0]['Synonym'][:5]:
name_string += f'{smiles}: alternative or common name: {alt_name}\n'
except:
name = "unknown"
name_string += f'{smiles}: Fail\n'
return names, name_string, None
def smiles_node(names_list: list[str]) -> (list[str], str):
'''
Queries Pubchem for the smiles string of the molecule based on the name.
Args:
names_list: the list of molecule names
Returns:
smiles_list: the list of smiles strings of the molecules
smiles_string: a string of the tool results
'''
print("smiles tool")
print('===================================================')
smiles_list = []
smiles_string = ''
for name in names_list:
try:
res = pcp.get_compounds(name, "name")
smiles = res[0].smiles
#smiles = smiles.replace('#','~')
smiles_list.append(smiles)
smiles_string += f'{name}: The SMILES string for the molecule is: {smiles}\n'
except:
smiles = "unknown"
smiles_string += f'{name}: Fail\n'
return smiles_list, smiles_string, None
def related_node(smiles_list: list[str]) -> (list[list[str]], str, list):
'''
Queries Pubchem for similar molecules based on the smiles string or name
Args:
smiles: the input smiles string, OR
name: the molecule name
Returns:
total_similar_list: a list of lists of similar molecules
related_string: a string of the tool results
all_images: a list of images of the similar molecules
'''
print("related tool")
print('===================================================')
total_similar_list = []
all_images = []
related_string = ''
for smiles in smiles_list:
try:
res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=50)
related_string += f'The following molecules are similar to {smiles}: \n'
print('got related molecules with smiles')
sub_smiles = []
i = 0
for compound in res:
if i == 0:
print(compound.iupac_name)
i+=1
sub_smiles.append(compound.smiles)
related_string += f'Name: {compound.iupac_name}\n'
related_string += f'SMILES: {compound.smiles}\n'
related_string += f'Molecular Weight: {compound.molecular_weight}\n'
related_string += f'LogP: {compound.xlogp}\n'
related_string += '===================\n'
sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles]
legend = [str(compound.smiles) for compound in res]
total_similar_list.append(sub_smiles)
img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250))
#pic = img.data
all_images.append(img)
except:
related_string += f'{smiles}: Fail\n'
total_similar_list.append([])
all_images.append(None)
return total_similar_list, related_string, all_images
def structure_node(smiles_list: list[str]) -> (list[str], str, list):
'''
Generates the 3D structure of the molecule based on the smiles string.
Args:
smiles: the input smiles string
Returns:
all_structures: a list of strings of the 3D structure of the molecule
output_string: a string of the chemical formulae.
all_images: a list of images of the 3D structure of the molecule
'''
print("structure tool")
all_mols = []
all_structures = []
output_string = ''
for smile in smiles_list:
mol = Chem.MolFromSmiles(smile)
molH = Chem.AddHs(mol)
AllChem.EmbedMolecule(molH)
AllChem.MMFFOptimizeMolecule(molH)
structure_string = ""
all_symbols = []
for atom in molH.GetAtoms():
symbol = atom.GetSymbol()
all_symbols.append(symbol)
pos = molH.GetConformer().GetAtomPosition(atom.GetIdx())
structure_string += f'{symbol} {pos[0]} {pos[1]} {pos[2]}\n'
atom_freqs = Counter(all_symbols)
formula = ''.join([f'{atom}{count}' for atom, count in atom_freqs.items()])
output_string += f'For {smile}: Formula is: {formula}\n'
all_structures.append(structure_string)
all_mols.append(molH)
img = Draw.MolsToGridImage(all_mols, molsPerRow=3, subImgSize=(250, 250))
return all_structures, output_string, [img]