Spaces:

cafierom
/

MoleculeAgent

Sleeping

App Files Files Community

MoleculeAgent / chem_nodes.py

cafierom

Create chem_nodes.py

7c995c8 verified 4 months ago

raw

history blame contribute delete

4.8 kB

	import torch
	from typing import Annotated, TypedDict, Literal
	from langchain_community.tools import DuckDuckGoSearchRun
	from langchain_core.tools import tool
	from langgraph.prebuilt import ToolNode, tools_condition
	from langgraph.graph import StateGraph, START, END
	from langgraph.graph.message import add_messages
	from langchain_core.messages import SystemMessage, trim_messages, AIMessage, HumanMessage, ToolCall

	import re
	import matplotlib.pyplot as plt

	from rdkit import Chem
	from rdkit.Chem import AllChem, QED
	from rdkit.Chem import Draw
	from rdkit.Chem.Draw import MolsToGridImage
	from rdkit import rdBase
	from rdkit.Chem import rdMolAlign
	import os, re
	from rdkit import RDConfig
	import pubchempy as pcp
	import gradio as gr
	from PIL import Image

	class State(TypedDict):
	'''
	The state of the agent.
	'''
	messages: Annotated[list, add_messages]
	query_smiles: str
	query_task: str
	query_name: str
	query_reference: str
	tool_choice: tuple
	which_tool: int
	props_string: str
	similars_img: str
	loop_again: str

	def name_node(state: State) -> State:
	'''
	Queries Pubchem for the name of the molecule based on the smiles string.

	Args:
	smiles: the input smiles string
	Returns:
	name: the name of the molecule
	props_string: a string of the tool results
	'''
	print("name tool")
	print('===================================================')
	current_props_string = state["props_string"]

	try:
	smiles = state["query_smiles"]
	res = pcp.get_compounds(smiles, "smiles")
	name = res[0].iupac_name
	name_string = f'IUPAC molecule name: {name}\n'
	#print(smiles, name)
	syn_list = pcp.get_synonyms(res[0].cid)
	for alt_name in syn_list[0]['Synonym'][:5]:
	name_string += f'alternative or common name: {alt_name}\n'
	except:
	name = "unknown"
	name_string = 'Could not find name for molecule'

	state["query_name"] = name

	current_props_string += name_string
	state["props_string"] = current_props_string
	state["which_tool"] += 1
	return state

	def smiles_node(state: State) -> State:
	'''
	Queries Pubchem for the smiles string of the molecule based on the name.
	Args:
	smiles: the molecule name
	Returns:
	smiles: the smiles string of the molecule
	props_string: a string of the tool results
	'''
	print("smiles tool")
	print('===================================================')
	current_props_string = state["props_string"]

	try:
	name = state["query_name"]
	res = pcp.get_compounds(name, "name")
	smiles = res[0].smiles
	smiles_string = f'molecule SMILES: {smiles}\n'
	except:
	smiles = "unknown"
	smiles_string = 'Could not find smiles for molecule'

	state["query_smiles"] = smiles

	current_props_string += smiles_string
	state["props_string"] = current_props_string
	state["which_tool"] += 1
	return state

	def similars_node(state: State) -> State:
	'''
	Queries Pubchem for similar molecules based on the smiles string or name

	Args:
	smiles: the input smiles string, OR
	name: the molecule name
	Returns:
	props_string: a string of the tool results.
	'''
	print("similars tool")
	print('===================================================')
	current_props_string = state["props_string"]

	try:
	if state['query_smiles'] != None:
	smiles = state["query_smiles"]
	res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=20)
	elif state['query_name'] != None:
	name = state["query_name"]
	res = pcp.get_compounds(name, "name", searchtype="similarity",listkey_count=20)
	else:
	print('Not enough information to run similars tool')
	return state

	props_string = 'Found Similar compounds: \n'
	sub_smiles = []

	i = 0
	for compound in res:
	if i == 0:
	print(compound.iupac_name)
	i+=1
	sub_smiles.append(compound.smiles)
	props_string += f'Name: {compound.iupac_name}\n'
	props_string += f'SMILES: {compound.smiles}\n'
	props_string += f'Molecular Weight: {compound.molecular_weight}\n'
	props_string += f'LogP: {compound.xlogp}\n'
	props_string += '==================='

	sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles]
	legend = [str(compound.smiles) for compound in res]

	img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250))
	pic = img.data

	filename = "Similars_image"
	with open(filename+".png",'wb+') as outf:
	outf.write(pic)
	except:
	props_string = 'Could not find similar molecules'
	filename = None

	current_props_string += props_string
	state["props_string"] = current_props_string
	state['similars_img'] = filename
	state["which_tool"] += 1
	return state