Spaces:

cafierom
/

MoDrAg2-OpenAI

Sleeping

App Files Files Community

MoDrAg2-OpenAI / modrag_molecule_functions.py

cafierom

Upload modrag_molecule_functions.py

68fe4a8 verified about 1 month ago

raw

history blame contribute delete

6.08 kB

	import matplotlib.pyplot as plt

	from rdkit import Chem
	from rdkit.Chem import AllChem, QED
	from rdkit.Chem import Draw
	from rdkit.Chem.Draw import MolsToGridImage
	from rdkit import rdBase
	from rdkit.Chem import rdMolAlign
	import os, re
	from rdkit import RDConfig
	import pubchempy as pcp
	from PIL import Image
	from collections import Counter
	from langchain_core.tools import tool

	@tool
	def name_node(smiles_list: list[str]) -> (list[str], str):
	'''
	Queries Pubchem for the name of the molecule based on the smiles string.
	Args:
	smiles_list: the list of input smiles strings
	Returns:
	names_list: the list of names of the molecules
	name_string: a string of the tool results
	'''
	print("name tool")
	print('===================================================')

	names = []
	name_string = ''
	for smiles in smiles_list:
	try:
	res = pcp.get_compounds(smiles, "smiles")
	name = res[0].iupac_name
	names.append(name)
	name_string += f'{smiles}: IUPAC molecule name: {name}\n'
	print(smiles, name)
	syn_list = pcp.get_synonyms(res[0].cid)
	for alt_name in syn_list[0]['Synonym'][:5]:
	name_string += f'{smiles}: alternative or common name: {alt_name}\n'
	except:
	name = "unknown"
	name_string += f'{smiles}: Fail\n'

	return names, name_string, None

	@tool
	def smiles_node(names_list: list[str]) -> (list[str], str):
	'''
	Queries Pubchem for the smiles string of the molecule based on the name.
	Args:
	names_list: the list of molecule names
	Returns:
	smiles_list: the list of smiles strings of the molecules
	smiles_string: a string of the tool results
	'''
	print("smiles tool")
	print('===================================================')

	smiles_list = []
	smiles_string = ''
	for name in names_list:
	try:
	res = pcp.get_compounds(name, "name")
	smiles = res[0].smiles
	#smiles = smiles.replace('#','~')
	smiles_list.append(smiles)
	smiles_string += f'{name}: The SMILES string for the molecule is: {smiles}\n'
	except:
	smiles = "unknown"
	smiles_string += f'{name}: Fail\n'

	return smiles_list, smiles_string, None

	@tool
	def related_node(smiles_list: list[str]) -> (list[list[str]], str, list):
	'''
	Queries Pubchem for similar molecules based on the smiles string or name
	Args:
	smiles: the input smiles string, OR
	name: the molecule name
	Returns:
	total_similar_list: a list of lists of similar molecules
	related_string: a string of the tool results
	all_images: a list of images of the similar molecules
	'''
	print("related tool")
	print('===================================================')


	total_similar_list = []
	all_images = []
	related_string = ''
	for smiles in smiles_list:
	try:
	res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=50)
	related_string += f'The following molecules are similar to {smiles}: \n'
	print('got related molecules with smiles')

	sub_smiles = []

	i = 0
	for compound in res:
	if i == 0:
	print(compound.iupac_name)
	i+=1
	sub_smiles.append(compound.smiles)
	related_string += f'Name: {compound.iupac_name}\n'
	related_string += f'SMILES: {compound.smiles}\n'
	related_string += f'Molecular Weight: {compound.molecular_weight}\n'
	related_string += f'LogP: {compound.xlogp}\n'
	related_string += '===================\n'

	sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles]
	legend = [str(compound.smiles) for compound in res]

	total_similar_list.append(sub_smiles)
	img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250))
	#pic = img.data
	all_images.append(img)
	except:
	related_string += f'{smiles}: Fail\n'
	total_similar_list.append([])
	all_images.append(None)

	try:
	img.save('current_image.png')
	except:
	pic = img.data
	with open('current_image.png', 'wb') as f:
	f.write(pic)
	img = Image.open('current_image.png')

	return total_similar_list, related_string, img

	@tool
	def structure_node(smiles_list: list[str]) -> (list[str], str, list):
	'''
	Generates the 3D structure of the molecule based on the smiles string.
	Args:
	smiles: the input smiles string
	Returns:
	all_structures: a list of strings of the 3D structure of the molecule
	output_string: a string of the chemical formulae.
	all_images: a list of images of the 3D structure of the molecule
	'''
	print("structure tool")

	all_mols = []
	all_structures = []
	output_string = ''

	for smile in smiles_list:
	mol = Chem.MolFromSmiles(smile)
	molH = Chem.AddHs(mol)
	AllChem.EmbedMolecule(molH)
	AllChem.MMFFOptimizeMolecule(molH)

	structure_string = ""
	all_symbols = []
	for atom in molH.GetAtoms():
	symbol = atom.GetSymbol()
	all_symbols.append(symbol)
	pos = molH.GetConformer().GetAtomPosition(atom.GetIdx())
	structure_string += f'{symbol} {pos[0]} {pos[1]} {pos[2]}\n'

	atom_freqs = Counter(all_symbols)
	formula = ''.join([f'{atom}{count}' for atom, count in atom_freqs.items()])

	output_string += f'For {smile}: Formula is: {formula}\n'
	all_structures.append(structure_string)
	all_mols.append(molH)

	img = Draw.MolsToGridImage(all_mols, molsPerRow=3, subImgSize=(250, 250))

	#save the image as current_image.png
	try:
	img.save('current_image.png')
	except:
	pic = img.data
	with open('current_image.png', 'wb') as f:
	f.write(pic)
	img = Image.open('current_image.png')

	return all_structures, output_string, img