Spaces:

cafierom
/

MoDrAg

Sleeping

App Files Files Community

MoDrAg / app.py

cafierom

Update app.py

9a8bb26 verified 5 months ago

raw

history blame contribute delete

36.1 kB

	import torch
	from typing import Annotated, TypedDict, Literal
	from langchain_community.tools import DuckDuckGoSearchRun
	from langchain_core.tools import tool
	from langgraph.prebuilt import ToolNode, tools_condition
	from langgraph.graph import StateGraph, START, END
	from langgraph.graph.message import add_messages
	from langchain_core.messages import SystemMessage, trim_messages, AIMessage, HumanMessage, ToolCall

	from langchain_huggingface.llms import HuggingFacePipeline
	from langchain_huggingface import ChatHuggingFace
	from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
	from langchain_core.runnables import chain
	from uuid import uuid4
	import re
	import matplotlib.pyplot as plt
	import spaces
	from elevenlabs.client import ElevenLabs
	from elevenlabs import stream
	import base64

	from rdkit import Chem
	from rdkit.Chem import AllChem, QED
	from rdkit.Chem import Draw
	from rdkit.Chem.Draw import MolsToGridImage
	from rdkit import rdBase
	from rdkit.Chem import rdMolAlign
	import os
	from rdkit import RDConfig
	import pubchempy as pcp
	import gradio as gr
	from PIL import Image
	from gradio_client import Client
	from anthropic import Anthropic

	device = "cuda" if torch.cuda.is_available() else "cpu"

	hf = HuggingFacePipeline.from_model_id(
	model_id= "microsoft/Phi-4-mini-instruct",
	task="text-generation",
	pipeline_kwargs = {"max_new_tokens": 1000, "temperature": 0.2})

	chat_model = ChatHuggingFace(llm=hf)

	class State(TypedDict):
	'''
	The state of the agent.
	'''
	messages: Annotated[list, add_messages]
	query_smiles: str
	query_task: str
	query_name: str
	query_reference: str
	query_protein: str
	query_up_id: str
	query_chembl: str
	query_pdb: str
	tool_choice: tuple
	which_tool: int
	props_string: str
	similars_img: str
	loop_again: str

	def molecule_node(state: State) -> State:
	'''
	Calls the Molecule Agent, which can find Names and SMILES strings of molecules
	and return a list of similar molecules with names, SMILES, molecular weights and logP values.
	Args:
	smiles: the input smiles string or name
	Returns:
	name: the name of the molecule
	smiles: the smiles string of the molecule
	related: a list of related or similar molecules with names, SMILES, molecular weights and logP values
	'''
	print("molecule tool")
	print('===================================================')
	current_props_string = state["props_string"]
	query_smiles = state["query_smiles"]
	query_name = state["query_name"]
	full_query_task = state["query_task"]
	print(f"in mol node input: {query_smiles}, {query_name}, {full_query_task}")
	print('===================================================')

	## separate query task into the part needed for this agent ans disregard the rest
	# prompt = f'Read the FULL_QUERY below. Also read the AGENT_DESCRIPTION. If the FULL_QUERY contains \
	# tasks that cannot be solved by the agent described in the AGENT_DESCRIPTION, then separate put the \
	# portion of the FULL_QUERY that can be solved by this agent. \
	# If all of the QUERY_TASK can be completed by the agent, the return the original FULL_QUERY only. If you \
	# do separate a portion of the FULL_QUERY, be sure to formulate it as a complete sentence. \n \
	# The only text in the response should be either: 1. the orginal FULL_QUERY or 2. Just the part of the \
	# query that can be answered by the agent. DO NOT ADD ANY OTHER COMMENTARY TO THE OUTPUT. \
	# FULL_QUERY: {full_query_task} \n \
	# AGENT_DESCRIPTION: Can complete three different tasks: query Pubchem for a molecule name based on the SMILES string \
	# or query Pubchem for a SMILES string based on the molecule name, or find molecules related or similar to the given \
	# molecule based on the SMILES string or name. \n'

	# process_message = anth_client.messages.create(
	# model="claude-3-haiku-20240307",
	# max_tokens=200,
	# system = "You are part of a node in an AI Agent that separating tasks into parts fort each agent.",
	# messages=[
	# {"role": "user", "content": prompt},
	# ]
	# )

	# query_task = process_message.content[0].text
	# print('new_task: ',query_task)

	client = Client("cafierom/MoleculeAgent")
	try:
	new_text, img = client.predict(full_query_task, query_smiles, query_name, api_name="/MoleculeAgent")
	image = Image.open(img)
	plt.imshow(image)
	plt.axis('off')
	plt.show()
	image.save("Substitution_image.png")
	except:
	new_text = ''
	current_props_string += new_text
	#print(f"in mol node output: {new_text}")
	#print('===================================================')

	#state["similars_img"] = filename
	state["props_string"] = current_props_string
	state["which_tool"] += 1

	# prompt = f'query_name is a name of a molecule. query_smiles is the SMILES string of a molecule. \
	# Read the PROPS STRING below. It may contain a query_name or a query_smiles. If so, respond with \
	# the following only: # query_name: molecule name # query_smiles: molecule smiles #. If the molecule name \
	# is not present in the answer but the SMILES is, respond with: # query_smiles: molecule smiles #. If the molecule \
	# SMILES is not present but the name is present, respond with: # query_name: molecule name #. \n \
	# PROPS STRING: {new_text} \
	# '
	# res = chat_model.invoke(prompt)

	# reply = str(res).split("<\|assistant\|>")[-1].split('#')[1:]
	# reply[-1] = reply[-1].split("\' additional_kwargs={}")[0]

	# for part in reply:
	# query = part.split(':')
	# if 'name' in part:
	# state['query_name'] = part[1].strip()
	# if 'smiles' in part:
	# state['query_smiles'] = part[1].strip()

	#print(f"in mol node output: {state['query_smiles']}, {state['query_name']}")
	#print('===================================================')

	return state

	def property_node(state: State) -> State:
	'''
	Calls the property agent, which can calculate Lipinski properties of molecules, find the
	similarity between two pharmacophores, and generate analogues of molecules with their QED
	values.
	Args:
	smiles: the input smiles string or name
	reference (optional): the smiles string of a reference molecule
	Returns:
	prop_string: a string containing the properties of the molecule
	'''
	print("property tool")
	print('===================================================')
	current_props_string = state["props_string"]
	query_smiles = state["query_smiles"]
	query_reference = state["query_reference"]
	query_task = state["query_task"]
	print(f"in prop node input: {query_smiles}, {query_reference}, {query_task}")
	print('===================================================')

	## separate query task into the part needed for this agent ans disregard the rest

	client = Client("cafierom/PropAgent")

	try:
	new_text, img = client.predict(query_task, query_smiles, query_reference, api_name="/PropAgent")
	image = Image.open(img)
	plt.imshow(image)
	plt.axis('off')
	plt.show()
	image.save("Substitution_image.png")
	except:
	new_text = ''
	current_props_string += new_text

	filename = "analogues_image.png"
	#img.save(filename)
	print(type(filename))

	#state["similars_img"] = filename
	state["props_string"] = current_props_string
	state["which_tool"] += 1
	return state

	def protein_node(state: State) -> State:
	'''
	Calls the protein agent, which can answer protein-centric questions
	regarding Uniprot, Chembl bioactivity, and PDB structural data.
	'''
	print("protein tool")
	print('===================================================')
	current_props_string = state["props_string"]
	query_task = state["query_task"]
	query_protein = state["query_protein"]
	query_up_id = state["query_up_id"]
	query_chembl = state["query_chembl"]
	query_pdb = state["query_pdb"]
	query_smiles = state["query_smiles"]
	print(f"in protein node input: task={query_task}, protein={query_protein}, up_id={query_up_id}, chembl={query_chembl}, pdb={query_pdb}, smiles={query_smiles}")
	print('===================================================')

	client = Client("cafierom/ProteinAgent")

	try:
	new_text, img = client.predict(
	query_task,
	query_protein,
	query_up_id,
	query_chembl,
	query_pdb,
	query_smiles,
	api_name="/ProteinAgent"
	)
	image = Image.open(img)
	plt.imshow(image)
	plt.axis('off')
	plt.show()
	image.save("Substitution_image.png")
	except:
	new_text = ''
	current_props_string += new_text

	filename = "proteinagent_image.png"
	state["similars_img"] = filename
	state["props_string"] = current_props_string
	state["which_tool"] += 1
	return state

	def dock_node(state: State) -> State:
	'''
	Calls the protein agent, which can answer protein-centric questions
	regarding Uniprot, Chembl bioactivity, and PDB structural data.
	'''
	print("docking tool")
	print('===================================================')
	current_props_string = state["props_string"]
	query_task = state["query_task"]
	query_smiles = state["query_smiles"]
	query_protein = state["query_protein"]
	# add variables as needed

	print(f"in docking node input: task={query_task}, smiles = {query_smiles}, protein = {query_protein}.")
	print('===================================================')

	client = Client("cafierom/DockAgent") # fill in agent

	try:
	new_text, img = client.predict(
	query_task, #add as needed
	query_smiles,
	query_protein,
	api_name="/DockAgent"
	)
	except:
	new_text = ''
	current_props_string += new_text

	filename = "agent_image.png"
	state["similars_img"] = filename
	state["props_string"] = current_props_string
	state["which_tool"] += 1
	return state

	def get_smile(name: str):
	try:
	res = pcp.get_compounds(name, "name")
	smiles = res[0].smiles
	print(f'got smiles: {smiles}')
	return smiles
	except:
	print(f'could not get smiles for {name}')
	return None

	def first_node(state: State) -> State:
	'''
	The first node of the agent. This node receives the input and asks the LLM
	to determine which is the best tool to use to answer the QUERY TASK.
	Input: the initial prompt from the user. should contain only one of more of the following:
	smiles: the smiles string, task: the query task, path: the path to the file,
	reference: the reference smiles
	the value should be separated from the name by a ':' and each field should
	be separated from the previous one by a ','.
	All of these values are saved to the state
	Output: the tool choice
	'''
	user_input = state["messages"][-1].content

	query_smiles = None
	state["query_smiles"] = query_smiles
	query_task = None
	state["query_task"] = query_task
	query_name = None
	state["query_name"] = query_name
	query_reference = None
	state["query_reference"] = query_reference
	query_protein = None
	state["query_protein"] = query_protein
	query_up_id = None
	state["query_up_id"] = query_up_id
	query_chembl = None
	state["query_chembl"] = query_chembl
	query_pdb = None
	state["query_pdb"] = query_pdb
	state['similars_img'] = None
	props_string = ""
	state["props_string"] = props_string
	state["loop_again"] = None

	prompt = f'Background information: \
	QUERY_TASK is the task the user is asking us to perform. It should have as much information as possible about the task. \
	QUERY_SMILES is the SMILES string for a molecule that the user provided. \
	QUERY_NAME is the name of a molecule that the user provided. It may be an IUPAC name or a common name, such as a drug name. \
	QUERY_PROTEIN is the protein that the user provided. \
	QUERY_REFERENCE is the SMILES string of a second molecule that the user provided to serve as a reference. \n \
	QUERY_UP_ID is a Uniprot ID the user provided. \
	QUERY_CHEMBL is a Chembl ID the user provided. \
	QUERY_PDB is a PDB ID the user provided. \n \
	Examine the USER INPUT below. It should always contain a QUERY_TASK. It should also contain one or more of the following: a QUERY_SMILES, QUERY_NAME, \
	a QUERY_PROTEIN, a QUERY_REFERENCE, a QUERY_UP_ID, a QUERY CHEMBL or a QUERY_PDB. Your task is to extract any of these that are present. \n \
	Report your results in the following format: # QUERY_TASK: the task # QUERY_SMILES: the smiles string # QUERY_NAME: the name # \
	QUERY_PROTEIN: the protein # QUERY_REFERENCE: the reference smiles string # QUERY_UP_ID: the uniprot id # QUERY_CHEMBL: the chembl id # QUERY_PDB: the pdb id. \
	If one of the requested items is not present in the USER INPUT, use NONE as the value. \n \
	The QUERY_NAME, QUERY_REFERENCE, QUERY_PROTEIN or QUERY_SMILES may appear in the QUERY_TASK as well. \n \
	USER INPUT: {user_input}.\n \
	'

	res1 = chat_model.invoke(prompt)


	reply = str(res1).replace('C#','C~').split("<\|assistant\|>")[-1].split('#')[1:]
	reply[-1] = reply[-1].split("\' additional_kwargs={}")[0]
	for chunk in reply:
	if 'QUERY_SMILES' in chunk:
	query_smiles = chunk.split(':')[1]
	if query_smiles.lower() == 'none':
	query_smiles = None
	else:
	query_smiles = query_smiles.replace('~','#').strip().strip("n").strip('\\').strip('n').strip('\\')
	state["query_smiles"] = query_smiles
	if 'QUERY_TASK' in chunk:
	query_task = chunk.split(':')[1]
	if query_task.lower() == 'none':
	query_task = None
	else:
	query_task = query_task.strip().strip("n").strip('\\').strip('n').strip('\\')
	query_task = query_task.replace('protei','protein')
	state["query_task"] = query_task
	if 'QUERY_NAME' in chunk:
	query_name = chunk.split(':')[1]
	if query_name.lower() == 'none':
	query_name = None
	else:
	query_name = query_name.strip().strip("n").strip('\\').strip('n').strip('\\')
	if 'QUERY_PROTEIN' in chunk:
	query_protein = chunk.split(':')[1]
	if query_protein.lower() == 'none':
	query_protein = None
	else:
	query_protein = query_protein.strip().strip("n").strip('\\').strip('n').strip('\\')
	if 'QUERY_REFERENCE' in chunk:
	query_reference = chunk.split(':')[1]
	if query_reference.lower() == 'none':
	query_reference = None
	else:
	query_reference = query_reference.strip().strip("n").strip('\\').strip('n').strip('\\')
	if 'QUERY_UP_ID' in chunk:
	query_up_id = chunk.split(':')[1]
	if query_up_id.lower() == 'none':
	query_up_id = None
	else:
	query_up_id = query_up_id.strip().strip("n").strip('\\').strip('n').strip('\\')
	if 'QUERY_CHEMBL' in chunk:
	query_chembl = chunk.split(':')[1]
	if query_chembl.lower() == 'none':
	query_chembl = None
	else:
	query_chembl = query_chembl.strip().strip("n").strip('\\').strip('n').strip('\\')
	if 'QUERY_PDB' in chunk:
	query_pdb = chunk.split(':')[1]
	if query_pdb.lower() == 'none':
	query_pdb = None
	else:
	query_pdb = query_pdb.strip().strip("n").strip('\\').strip('n').strip('\\')

	state["query_name"] = query_name
	state["query_task"] = query_task
	state["query_smiles"] = query_smiles
	state['query_protein'] = query_protein
	state['query_up_id'] = query_up_id
	state['query_chembl'] = query_chembl
	state['query_pdb'] = query_pdb
	state['messages'] = res1
	state["query_reference"] = query_reference


	return state

	def calling_node(state: State) -> State:
	'''
	'''
	query_task = state["query_task"]
	query_smiles = state["query_smiles"]
	query_name = state["query_name"]
	query_protein = state["query_protein"]
	query_reference = state["query_reference"]
	query_up_id = state["query_up_id"]
	query_chembl = state["query_chembl"]
	query_pdb = state["query_pdb"]

	prompt = f'Examine the QUERY_TASK below as well as the other information provided (SMILES, NAME, PROTEIN, PDB, CHEMBL, UP_ID, REFERENCE) \
	and determine if ONE or TWO of the AGENTS descibed below could complete the task. If the AGENTS can complete \
	the task, reply as follows. If only one agent is needed: # first_agent_name; if two agents are needed: \
	# first_agent_name, second_agent_name. Carefully consider of two agents are needed by the QUERY TASK. \
	If the AGENTS cannot complete the task, reply with "# None ". \n \
	Do not offer any additional information. \n \
	MOLECULE_AGENT: Can complete three different tasks: query Pubchem for a molecule name based on the SMILES string \
	or query Pubchem for a SMILES string based on the molecule name, or find molecules related or similar to the given molecule based on the SMILES string \
	or name. \n \
	PROPERTY_AGENT: Can calculate Lipinski properties of molecules, find the pharmacophore-similarity between two molecule (a molecule and a reference), \
	and generate analogues of molecules with their QED values. \n \
	PROTEIN_AGENT: Can call Uniprot to find uniprot ids for a protein, can call Chembl to find hits for a given uniprot id and report the \
	number of bioactive molecules in the hit, can call Chembl to find a list bioactive molecules for a given chembl id and their IC50 values, \
	can call PDB to find the number of chains in a protein, or the protein sequence and any small molecules in the protein structure, \
	predicts the IC50 value for the molecule indicated by the SMILES string provided using the LightGBM model, and can generate novel \
	molecules using a GPT.\n \
	DOCK_AGENT: Can dock a molecule in a protein using AutoDock Vina and return a docking score and the coordinates/XYZ positions of conformation of \
	the docked molecule. \n \
	QUERY_TASK: {query_task}.\n \
	QUERY_SMILES: {query_smiles}.\n \
	QUERY_NAME: {query_name}.\n \
	QUERY_PROTEIN: {query_protein}.\n \
	QUERY_REFERENCE: {query_reference}.\n \
	QUERY_UP_ID: {query_up_id}.\n \
	QUERY_CHEMBL: {query_chembl}.\n \
	QUERY_PDB: {query_pdb}.\n \
	'

	res2 = chat_model.invoke(prompt)
	state["messages"] = res2

	reply = str(res2).split("<\|assistant\|>")[-1].split("\' additional_kwargs={}")[0]
	agents = reply.split(',')
	agents_list = []
	for agent in agents:
	#use regex to replace a space between two letters with an underscore
	agent = re.sub(r'([a-z]) ([A-Z])', r'\1_\2', agent)
	agent = agent.upper()
	agents_list.append(agent.strip('#').strip('*').strip(';').strip('.').strip())

	# ['# Protein Agent']
	print('in calling node: ',agents_list)

	if len(agents_list) == 1:
	agent = agents_list[0]
	if agent.lower() == 'none':
	tool_choice = (None, None)
	else:
	tool_choice = (agent, None)
	elif len(agents_list) == 2:
	agent1 = agents_list[0]
	agent2 = agents_list[1]
	if agent1.lower() == 'none' and agent2.lower() == 'none':
	tool_choice = (None, None)
	elif agent1.lower() == 'none' and agent2.lower() != 'none':
	tool_choice = (None, agent2)
	elif agent2.lower() == 'none' and agent1.lower() != 'none':
	tool_choice = (agent1, None)
	else:
	tool_choice = (agent1, agent2)
	else:
	tool_choice = (None, None)

	tools_that_need_smiles = ['PROPERTY_AGENT', 'DOCK_AGENT', 'PROTEIN_AGENT']

	if (state["query_smiles"] == None) or ('none' in state["query_smiles"].lower()):
	for tool in tool_choice:
	if tool in tools_that_need_smiles:
	smile = get_smile(state["query_name"])
	state["query_smiles"] = smile

	state["tool_choice"] = tool_choice
	state["which_tool"] = 0
	print(f"The chosen tools are: {tool_choice}")

	return state

	def loop_node(state: State) -> State:
	'''
	This node accepts the tool returns and decides if it needs to call another
	tool or go on to the parser node.
	Input: the tool returns.
	Output: the next node to call.
	'''
	return state

	def parser_node(state: State) -> State:
	'''
	This is the third node in the agent. It receives the output from the tool,
	puts it into a prompt as CONTEXT, and asks the LLM to answer the original
	query.
	Input: the output from the tool.
	Output: the answer to the original query.
	'''
	props_string = state["props_string"]
	query_task = state["query_task"]
	tool_choice = state["tool_choice"]

	if type(tool_choice) != tuple and tool_choice == None:
	state["loop_again"] = "finish_gracefully"
	return state
	elif type(tool_choice) == tuple and (tool_choice[0] == None) and (tool_choice[1] == None):
	state["loop_again"] = "finish_gracefully"
	return state

	prompt = f'Using the CONTEXT below, answer the original QUERY_TASK. Include any useful context provided \
	in the CONTEXT. Remeber that any docking scores reported were calculatd with AutoDock Vina. Begin your answer with a "#" \n \
	QUERY_TASK: {query_task}.\n \
	CONTEXT: {props_string}.\n '

	res = chat_model.invoke(prompt)
	trial_answer = str(res).split('<\|assistant\|>')[1]
	print('parser 1 ', trial_answer)
	state["messages"] = res

	check_prompt = f'Determine if the TRIAL ANSWER below answers the original \
	QUERY TASK. If it does, respond with "PROCEED #" . If the TRIAL ANSWER did not \
	answer the QUERY TASK, respond with "LOOP #" \n \
	Only loop again if the TRIAL ANSWER did not answer the QUERY TASK. \
	TRIAL ANSWER: {trial_answer}.\n \
	QUERY_TASK: {query_task}.\n'

	res = chat_model.invoke(check_prompt)
	# print(''50)
	print('parser, loop again? ', res)
	# print(''50)
	if str(res).split('<\|assistant\|>')[1].split('#')[0].strip().lower() == "loop":
	state["loop_again"] = "loop_again"
	state["messages"] = res
	return state
	elif str(res).split('<\|assistant\|>')[1].split('#')[0].strip().lower() == "proceed":
	state["loop_again"] = None

	return state

	def gracefulexit_node(state: State) -> State:
	'''
	Called when the Agent cannot assign any tools for the task
	'''
	props_string = state["props_string"]
	prompt = f'Summarize the information in the CONTEXT, including any useful chemical information. Start your answer with: \
	Here is what I found: \n \
	CONTEXT: {props_string}'

	res = chat_model.invoke(prompt)

	return {"messages": res}

	def get_agent(state):
	'''
	'''
	which_tool = state["which_tool"]
	tool_choice = state["tool_choice"]
	#print(tool_choice)
	if tool_choice is None or tool_choice == (None, None):
	return None
	if which_tool == 0 or which_tool == 1:
	current_tool = tool_choice[which_tool]
	if current_tool is None:
	return None
	elif which_tool > 1:
	current_tool = None

	return current_tool

	def loop_or_not(state):
	'''
	'''
	print(f"(line 482) Loop? {state['loop_again']}")
	if state["loop_again"] == "loop_again":
	return True
	elif state["loop_again"] == "finish_gracefully":
	return 'lets_get_outta_here'
	else:
	return False

	def pretty_print(answer):
	final = str(answer['messages'][-1]).split('<\|assistant\|>')[-1].split('#')[0].strip("n").strip('\\').strip('n').strip('\\')
	for i in range(0,len(final),100):
	print(final[i:i+100])

	def print_short(answer):
	for i in range(0,len(answer),100):
	print(answer[i:i+100])

	builder = StateGraph(State)
	builder.add_node("first_node", first_node)
	builder.add_node("calling_node", calling_node)
	builder.add_node("molecule_node", molecule_node)
	builder.add_node("property_node", property_node)
	builder.add_node("protein_node", protein_node)
	builder.add_node("dock_node", dock_node)
	builder.add_node("loop_node", loop_node)
	builder.add_node("parser_node", parser_node)
	builder.add_node("gracefulexit_node", gracefulexit_node)

	builder.add_edge(START, "first_node")
	builder.add_edge("first_node", "calling_node")
	builder.add_conditional_edges("calling_node", get_agent, {
	"MOLECULE_AGENT": "molecule_node",
	"PROPERTY_AGENT": "property_node",
	"PROTEIN_AGENT": "protein_node",
	"DOCK_AGENT": "dock_node",
	None: "parser_node"})

	builder.add_edge("molecule_node", "loop_node")
	builder.add_edge("property_node", "loop_node")
	builder.add_edge("protein_node", "loop_node")
	builder.add_edge("dock_node", "loop_node")

	builder.add_conditional_edges("loop_node", get_agent, {
	"MOLECULE_AGENT": "molecule_node",
	"PROPERTY_AGENT": "property_node",
	"PROTEIN_AGENT": "protein_node",
	"DOCK_AGENT": "dock_node",
	None: "parser_node"})

	builder.add_conditional_edges("parser_node", loop_or_not, {
	True: "calling_node",
	'lets_get_outta_here': "gracefulexit_node",
	False: END})

	builder.add_edge("gracefulexit_node", END)

	graph = builder.compile()

	chat_history = []
	claude_key = os.getenv("anthropic_key")
	anth_client = Anthropic(api_key=claude_key)

	@spaces.GPU
	def DDAgent(task, voice_flag):

	chat_history.append(
	{"role": "user", "content": task}
	)
	#if Similars_image.png exists, remove it
	if os.path.exists('Similars_image.png'):
	os.remove('Similars_image.png')

	# prompt_for_claude = f'Read the QUERY_TASK and the CONTEXT below. The QUERY_TASK should contain one or more of the following: \
	# QUERY_SMILES is the SMILES string for a molecule that the user provided. \
	# QUERY_NAME is the name of a molecule that the user provided. It may be an IUPAC name or a common name, such as a drug name. \
	# QUERY_PROTEIN is the protein that the user provided. \
	# QUERY_REFERENCE is the SMILES string of a second molecule that the user provided to serve as a reference. \n \
	# QUERY_UP_ID is a Uniprot ID the user provided. \
	# QUERY_CHEMBL is a Chembl ID the user provided. \
	# QUERY_PDB is a PDB ID the user provided. \n \
	# Decide if any information from the CONTEXT \
	# If the QUERY_TASK provided by the user requires one of QUERY properties but it is not present in the QUERY_TASK, look through the CONTEXT \
	# to see if they are present there. If they are, rewrite the QUERY_TASK to include the specific information from the CONTEXT. \
	# For example, if the QUERY_TASK says: "find bioactive molecules for that chembl id", and the CONTEXT contains a \
	# chembl id (P9877 for example), the you can rewrite the QUERY_TASK as "find bioactive molecules for the chembl ID P9877. \
	# Remeber that the rewritten task should contain the original task and any additional specific information extracted from the context. \
	# Do not refer to the CONTEXT in your response, just insert the needed specific information when appropriate. \
	# Output the rewritten task and nothing else. If there is no extra helpful information in the CONTEXT, then output the original \
	# QUERY_TASK in its original form with no changes. \n \
	# QUERY_TASK: {task}.\n \
	# CONTEXT: {str(chat_history)}.\n '

	# process_message = anth_client.messages.create(
	# model="claude-3-haiku-20240307",
	# max_tokens=200,
	# system = "You are part of a node in an AI Agent that is looking through past conversations for context to add to new queries..",
	# messages=[
	# {"role": "user", "content": prompt_for_claude},
	# ]
	# )

	# new_task = process_message.content[0].text
	# print('new_task: ',new_task)

	input = {
	"messages": [
	HumanMessage(f'{task}')
	]
	}
	#print(input)

	replies = []
	reply = None
	for c in graph.stream(input): #, stream_mode='updates'):
	m = re.findall(r'[a-z]+\_node', str(c))
	if len(m) != 0:
	#print(c)
	if 'messages' in str(c):
	reply = c[str(m[0])]['messages']
	else:
	reply = c[str(m[0])]
	if 'assistant' in str(reply):
	reply = str(reply).split("<\|assistant\|>")[-1].split('#')[1:]
	reply = ' '.join(reply).split("\' additional_kwargs={}")[0]
	reply = reply.replace('~', '#')
	print(reply)
	print('===================================================')
	replies.append(reply)

	if reply is None:
	reply = "No response generated."
	replies.append(reply)

	#check if image exists
	if os.path.exists('Similars_image.png'):
	img_loc = 'Similars_image.png'
	img = Image.open(img_loc)
	#else create a dummy blank image
	else:
	img = Image.new('RGB', (250, 250), color = (255, 255, 255))

	chat_history.append(
	{"role": "assistant", "content": replies[-1]}
	)

	if voice_flag == 'On':
	audio_player = render_voice(replies[-1])
	else:
	audio_player = ''

	return "", chat_history, img, audio_player

	def clear_history():
	global chat_history
	chat_history = []

	eleven_key = os.getenv("eleven_key")
	#from google.colab import userdata
	#eleven_key = userdata.get('eleven_key')
	elevenlabs = ElevenLabs(api_key=eleven_key)

	def render_voice(text_in: str):
	voice_settings = {
	"stability": 0.37,
	"similarity_boost": 0.90,
	"style": 0.0,
	"speed": 0.95
	}

	audio_stream = elevenlabs.text_to_speech.convert(
	text = text_in,
	voice_id = 'vxO9F6g9yqYJ4RsWvMbc',
	model_id = 'eleven_multilingual_v2',
	output_format='mp3_44100_128',
	voice_settings=voice_settings
	)

	audio_converted = b"".join(audio_stream)
	audio = base64.b64encode(audio_converted).decode("utf-8")
	audio_player = f'<audio src="data:audio/mpeg;base64,{audio}" controls autoplay></audio>'

	return audio_player

	def voice_from_file(file_name):
	audio_file = file_name
	with open(audio_file, 'rb') as audio_bytes:
	audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
	audio_player = f'<audio src="data:audio/mpeg;base64,{audio}" controls autoplay></audio>'
	return audio_player

	def mol_accordions():
	elita_text = 'Try queries like: Find the name of CCCF, find the smiles for paracetamol, or find molecules similar to paracetamol.'
	messages = [{'role': 'assistant', 'content': elita_text}]
	audio_player = voice_from_file('mol.mp3')
	return audio_player, messages

	def prop_accordions():
	elita_text = 'Try queries like: Find Lipinski properties for CCCF, find pharmacophore-similarity between \
	CCCF and CCCBr, or generate analogues of c1ccc(O)cc1.'
	messages = [{'role': 'assistant', 'content': elita_text}]
	audio_player = voice_from_file('Props.mp3')
	return audio_player, messages

	def prot_accordions():
	elita_text = 'Try queries like: find UNIPROT IDs for the protein MAOB; find PDB IDs for MAOB; how many chains \
	are in the PDB structure 4A7G; find PDB IDs matching the protein MAOB; list the bioactive molecules for the CHEMBL \
	ID CHEMBL2039; dock the molecule CCCC(F) in the protein DRD2; predict the IC50 value for CCCC(F) based on the CHEMBL \
	ID CHEMBL2039; or generate novel molecules based on the CHEMBL ID CHEMBL2039.'
	messages = [{'role': 'assistant', 'content': elita_text}]
	audio_player = voice_from_file('protein.mp3')
	return audio_player, messages

	def dock_accordions():
	elita_text = 'Try queries like: dock CCC(F) in the protein MAOB'
	messages = [{'role': 'assistant', 'content': elita_text}]
	audio_player = voice_from_file('Dock.mp3')
	return audio_player, messages

	def mol_workflow():
	elita_text = "Starting with a molecule, try finding it's Lipinski properties, or its pharmacophore similarity to a known active. \
	Find similar molecules or, if it has substituted rings, find analogues."
	messages = [{'role': 'assistant', 'content': elita_text}]
	audio_player = voice_from_file('mol_wf.mp3')
	return audio_player, messages

	def prot_workflow():
	elita_text = "Starting with a protein, try searching for Uniprot IDs, followed by Chembl IDs. Then you can look for bioactive molecules \
	for each Chembl ID. You can also search for crystal structures in the PDB. Generate novel bioactive molecules based on a protein Chembl ID."
	messages = [{'role': 'assistant', 'content': elita_text}]
	audio_player = voice_from_file('prot_wf.mp3')
	return audio_player, messages

	def combo_workflow():
	elita_text ="Starting with a protein and a molecule, try docking the molecule in the protein. If you have a Chembl ID, predict the IC50 \
	value of the molecule in the protein."
	messages = [{'role': 'assistant', 'content': elita_text}]
	audio_player = voice_from_file('combo_wf.mp3')
	return audio_player, messages

	with gr.Blocks() as forest:
	top = gr.Markdown('''
	# MoDrAg - the Modular Drug Design Agent!
	- Here to perform all of your small molecule and protein based drug design tasks! Currently directing the sub-agents below. Click to see what each agent can do.
	''')

	with gr.Row():
	with gr.Accordion("Molecule Agent - Click to open/close.", open=False) as mol:
	gr.Markdown('''
	- find the name of a molecule from the SMILES string.
	- find the SMILES string of a molecule from the name
	- find similar or related molecules with some basic properties from a name or SMILES.
	''')
	with gr.Accordion("Property Agent - Click to open/close.", open=False) as prop:
	gr.Markdown('''
	- calculate Lipinski properties from a SMILES string.
	- find the pharmacophore-similarity between two molecules (a molecule and a reference).
	- generate analogues of ring molecules and report their QED values.
	''')
	with gr.Accordion("Protein Agent - Click to open/close.", open=False)as prot:
	gr.Markdown('''
	- Find Uniprot IDs for a protein/gene name.
	- report the number of bioactive molecules for a protein, organized by Chembl ID.
	- report the SMILES and IC50 values of bioactive molecules for a particular Chembl ID.
	- find protein sequences, report number fo chains.
	- find small molecules present in a PDB structure.
	- find PDB IDs that match a protein.
	- predict the IC50 value of a small molecule based on a Chembl ID.
	- generate novel molecules based on a Chembl ID.
	''')
	with gr.Accordion("Docking Agent - Click to open/close.", open=False) as dock:
	gr.Markdown('''
	- Find the docking score and pose coordinates for a molecules defined by a SMILES string in on of the proteins below:
	- IGF1R,JAK2,KIT,LCK,MAPK14,MAPKAPK2,MET,PTK2,PTPN1,SRC,ABL1,AKT1,AKT2,CDK2,CSF1R,EGFR,KDR,MAPK1,FGFR1,ROCK1,MAP2K1,
	PLK1,HSD11B1,PARP1,PDE5A,PTGS2,ACHE,MAOB,CA2,GBA,HMGCR,NOS1,REN,DHFR,ESR1,ESR2,NR3C1,PGR,PPARA,PPARD,PPARG,AR,THRB,
	ADAM17,F10,F2,BACE1,CASP3,MMP13,DPP4,ADRB1,ADRB2,DRD2,DRD3,ADORA2A,CYP2C9,CYP3A4,HSP90AA1
	''')
	with gr.Row():
	molecule_workflow = gr.Button(value = "Sample Molecule Workflow")
	protein_workflow = gr.Button(value = "Sample Protein Workflow")
	combined_workflow = gr.Button(value = "Sample Combined Workflow")

	chatbot = gr.Chatbot(type="messages", placeholder="## Hello, I'm MoDrAg! Let's design together!")

	task = gr.Textbox(label="Type your messages here and hit enter.", scale = 2)
	with gr.Row():
	chat_btn = gr.Button(value = "Send", scale = 2)
	voice_choice = gr.Radio(choices = ['On', 'Off'],label="Audio Voice Response?", interactive=True, value='Off', scale = 2)

	clear = gr.ClearButton([task])
	pic = gr.Image(label="Molecules (if needed)")
	talk_ele = gr.HTML()

	chat_btn.click(DDAgent, inputs = [task, voice_choice], outputs = [task, chatbot, pic, talk_ele])
	task.submit(DDAgent, [task,voice_choice], [task, chatbot, pic, talk_ele])
	mol.expand(mol_accordions, outputs = [talk_ele, chatbot])
	prop.expand(prop_accordions, outputs = [talk_ele, chatbot])
	prot.expand(prot_accordions, outputs = [talk_ele, chatbot])
	dock.expand(dock_accordions, outputs = [talk_ele, chatbot])
	molecule_workflow.click(mol_workflow, outputs = [talk_ele, chatbot])
	protein_workflow.click(prot_workflow, outputs = [talk_ele, chatbot])
	combined_workflow.click(combo_workflow, outputs = [talk_ele, chatbot])
	clear.click(clear_history)

	@gr.render(inputs=top)
	def get_speech(args):
	audio_file = 'MoDrAg_hello.mp3'
	with open(audio_file, 'rb') as audio_bytes:
	audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
	audio_player = f'<audio src="data:audio/mpeg;base64,{audio}" controls autoplay></audio>'
	talk_ele = gr.HTML(audio_player)


	forest.launch(debug=False, mcp_server=True)