MoleculeAgent / chem_nodes.py
cafierom's picture
Create chem_nodes.py
7c995c8 verified
import torch
from typing import Annotated, TypedDict, Literal
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode, tools_condition
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_core.messages import SystemMessage, trim_messages, AIMessage, HumanMessage, ToolCall
import re
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import AllChem, QED
from rdkit.Chem import Draw
from rdkit.Chem.Draw import MolsToGridImage
from rdkit import rdBase
from rdkit.Chem import rdMolAlign
import os, re
from rdkit import RDConfig
import pubchempy as pcp
import gradio as gr
from PIL import Image
class State(TypedDict):
'''
The state of the agent.
'''
messages: Annotated[list, add_messages]
query_smiles: str
query_task: str
query_name: str
query_reference: str
tool_choice: tuple
which_tool: int
props_string: str
similars_img: str
loop_again: str
def name_node(state: State) -> State:
'''
Queries Pubchem for the name of the molecule based on the smiles string.
Args:
smiles: the input smiles string
Returns:
name: the name of the molecule
props_string: a string of the tool results
'''
print("name tool")
print('===================================================')
current_props_string = state["props_string"]
try:
smiles = state["query_smiles"]
res = pcp.get_compounds(smiles, "smiles")
name = res[0].iupac_name
name_string = f'IUPAC molecule name: {name}\n'
#print(smiles, name)
syn_list = pcp.get_synonyms(res[0].cid)
for alt_name in syn_list[0]['Synonym'][:5]:
name_string += f'alternative or common name: {alt_name}\n'
except:
name = "unknown"
name_string = 'Could not find name for molecule'
state["query_name"] = name
current_props_string += name_string
state["props_string"] = current_props_string
state["which_tool"] += 1
return state
def smiles_node(state: State) -> State:
'''
Queries Pubchem for the smiles string of the molecule based on the name.
Args:
smiles: the molecule name
Returns:
smiles: the smiles string of the molecule
props_string: a string of the tool results
'''
print("smiles tool")
print('===================================================')
current_props_string = state["props_string"]
try:
name = state["query_name"]
res = pcp.get_compounds(name, "name")
smiles = res[0].smiles
smiles_string = f'molecule SMILES: {smiles}\n'
except:
smiles = "unknown"
smiles_string = 'Could not find smiles for molecule'
state["query_smiles"] = smiles
current_props_string += smiles_string
state["props_string"] = current_props_string
state["which_tool"] += 1
return state
def similars_node(state: State) -> State:
'''
Queries Pubchem for similar molecules based on the smiles string or name
Args:
smiles: the input smiles string, OR
name: the molecule name
Returns:
props_string: a string of the tool results.
'''
print("similars tool")
print('===================================================')
current_props_string = state["props_string"]
try:
if state['query_smiles'] != None:
smiles = state["query_smiles"]
res = pcp.get_compounds(smiles, "smiles", searchtype="similarity",listkey_count=20)
elif state['query_name'] != None:
name = state["query_name"]
res = pcp.get_compounds(name, "name", searchtype="similarity",listkey_count=20)
else:
print('Not enough information to run similars tool')
return state
props_string = 'Found Similar compounds: \n'
sub_smiles = []
i = 0
for compound in res:
if i == 0:
print(compound.iupac_name)
i+=1
sub_smiles.append(compound.smiles)
props_string += f'Name: {compound.iupac_name}\n'
props_string += f'SMILES: {compound.smiles}\n'
props_string += f'Molecular Weight: {compound.molecular_weight}\n'
props_string += f'LogP: {compound.xlogp}\n'
props_string += '==================='
sub_mols = [Chem.MolFromSmiles(smile) for smile in sub_smiles]
legend = [str(compound.smiles) for compound in res]
img = Draw.MolsToGridImage(sub_mols, legends=legend, molsPerRow=4, subImgSize=(250, 250))
pic = img.data
filename = "Similars_image"
with open(filename+".png",'wb+') as outf:
outf.write(pic)
except:
props_string = 'Could not find similar molecules'
filename = None
current_props_string += props_string
state["props_string"] = current_props_string
state['similars_img'] = filename
state["which_tool"] += 1
return state