Spaces:

cafierom
/

PropAgent

Sleeping

App Files Files Community

cafierom commited on Nov 23, 2025

Commit

bed1e14

verified ·

1 Parent(s): 5f27d36

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -514

app.py DELETED Viewed

@@ -1,514 +0,0 @@
-import torch
-from typing import Annotated, TypedDict, Literal
-from langchain_community.tools import DuckDuckGoSearchRun
-from langchain_core.tools import tool
-from langgraph.prebuilt import ToolNode, tools_condition
-from langgraph.graph import StateGraph, START, END
-from langgraph.graph.message import add_messages
-from langchain_core.messages import SystemMessage, trim_messages, AIMessage, HumanMessage, ToolCall
-from langchain_huggingface.llms import HuggingFacePipeline
-from langchain_huggingface import ChatHuggingFace
-from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
-from langchain_core.runnables import chain
-from uuid import uuid4
-import re
-import matplotlib.pyplot as plt
-import PIL.Image as Image
-import gradio as gr
-import spaces
-from rdkit import Chem
-from rdkit.Chem import AllChem, QED
-from rdkit.Chem import Draw
-from rdkit import rdBase
-from rdkit.Chem import rdMolAlign
-import os
-from rdkit import RDConfig
-from rdkit.Chem.Features.ShowFeats import _featColors as featColors
-from rdkit.Chem.FeatMaps import FeatMaps
-fdef = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef'))
-fmParams = {}
-for k in fdef.GetFeatureFamilies():
-    fparams = FeatMaps.FeatMapParams()
-    fmParams[k] = fparams
-device = "cuda" if torch.cuda.is_available() else "cpu"
-hf = HuggingFacePipeline.from_model_id(
-    #model_id= "swiss-ai/Apertus-8B-Instruct-2509",
-    model_id= "microsoft/Phi-4-mini-instruct",
-    task="text-generation",
-    pipeline_kwargs = {"max_new_tokens": 500, "temperature": 0.4})
-chat_model = ChatHuggingFace(llm=hf)
-class State(TypedDict):
-  '''
-    The state of the agent.
-  '''
-  messages: Annotated[list, add_messages]
-  query_smiles: str
-  query_task: str
-  query_path: str
-  query_reference: str
-  tool_choice: tuple
-  which_tool: int
-  props_string: str
-  #(Literal["lipinski_tool", "substitution_tool", "pharm_feature_tool"],
-  #                   Literal["lipinski_tool", "substitution_tool", "pharm_feature_tool"])
-def substitution_node(state: State) -> State:
-  '''
-    A simple substitution routine that looks for a substituent on a phenyl ring and
-    substitutes different fragments in that location. Returns a list of novel molecules and their
-    QED score (1 is most drug-like, 0 is least drug-like).
-      Args:
-        smiles: the input smiles string
-      Returns:
-        new_smiles_string: a string of novel molecules and their QED scores.
-  '''
-  print("substitution tool")
-  print('===================================================')
-  smiles = state["query_smiles"]
-  current_props_string = state["props_string"]
-  new_fragments = ["c(Cl)c", "c(F)c", "c(O)c", "c(C)c", "c(OC)c", "c([NH3+])c",
-                   "c(Br)c", "c(C(F)(F)(F))c"]
-  new_smiles = []
-  for fragment in new_fragments:
-    m = re.findall(r"c(\D\D*)c", smiles)
-    if len(m) != 0:
-      for group in m:
-        #print(group)
-        if fragment not in group:
-          new_smile = smiles.replace(group[1:], fragment)
-          new_smiles.append(new_smile)
-  qeds = []
-  for new_smile in new_smiles:
-    qeds.append(get_qed(new_smile))
-  original_qed = get_qed(smiles)
-  new_smiles_string = "Substitution or Analogue creation tool results: \n"
-  new_smiles_string += f"The original molecule SMILES was {smiles} with QED {original_qed}.\n"
-  new_smiles_string += "Novel Molecules or Analogues and QED values: \n"
-  for i in range(len(new_smiles)):
-    new_smiles_string += f"SMILES: {new_smiles[i]}, QED: {qeds[i]:.3f}\n"
-  new_mols = [Chem.MolFromSmiles(x) for x in new_smiles]
-  if len(new_smiles) > 0:
-    img = Draw.MolsToGridImage(new_mols, molsPerRow=3, subImgSize=(200,200), legends=[f"QED: {qeds[i]:.3f}" for i in range(len(new_smiles))])
-    img.save('Substitution_image.png')
-  else:
-    new_smiles_string += "No valid substitutions were found.\n"
-  print(new_smiles_string)
-  current_props_string += new_smiles_string
-  state["props_string"] = current_props_string
-  state["which_tool"] += 1
-  return state
-def get_qed(smiles):
-  '''
-    Helper function to compute QED for a given molecule.
-      Args:
-        smiles: the input smiles string
-      Returns:
-        qed: the QED score of the molecule.
-  '''
-  mol = Chem.MolFromSmiles(smiles)
-  qed = Chem.QED.default(mol)
-  return qed
-def lipinski_node(state: State) -> State:
-  '''
-    A tool to calculate QED and other lipinski properties of a molecule.
-      Args:
-        smiles: the input smiles string
-      Returns:
-        props_string: a string of the QED and other lipinski properties of the molecule,
-                      including Molecular Weight, LogP, HBA, HBD, Polar Surface Area,
-                      Rotatable Bonds, Aromatic Rings and Undesireable Moieties.
-  '''
-  print("lipinski tool")
-  print('===================================================')
-  smiles = state["query_smiles"]
-  current_props_string = state["props_string"]
-  mol = Chem.MolFromSmiles(smiles)
-  qed = Chem.QED.default(mol)
-  p = Chem.QED.properties(mol)
-  mw = p[0]
-  logP = p[1]
-  hba = p[2]
-  hbd = p[3]
-  psa = p[4]
-  rb = p[5]
-  ar = p[6]
-  um = p[7]
-  props_string = "Lipinski tool results: \n"
-  props_string += f'''QED and other lipinski properties of the molecule:
-    SMILES: {smiles},
-    QED: {qed:.3f},
-    Molecular Weight: {mw:.3f},
-    LogP: {logP:.3f},
-    Hydrogen bond acceptors: {hba},
-    Hydrogen bond donors: {hbd},
-    Polar Surface Area: {psa:.3f},
-    Rotatable Bonds: {rb},
-    Aromatic Rings: {ar},
-    Undesireable moieties: {um}
-  '''
-  current_props_string += props_string
-  state["props_string"] = current_props_string
-  state["which_tool"] += 1
-  return state
-def pharmfeature_node(state: State) -> State:
-  '''
-    A tool to compare the pharmacophore features of a query molecule against
-    a those of a reference molecule and report the pharmacophore features of both and the feature
-    score of the query molecule.
-      Args:
-        known_smiles: the reference smiles string
-        test_smiles: the query smiles string
-      Returns:
-        props_string: a string of the pharmacophore features of both molecules and the feature
-                      score of the query molecule.
-  '''
-  print("pharmfeature tool")
-  print('===================================================')
-  test_smiles = state["query_smiles"]
-  known_smiles = state["query_reference"]
-  current_props_string = state["props_string"]
-  smiles = [known_smiles, test_smiles]
-  mols = [Chem.MolFromSmiles(x) for x in smiles]
-  mols = [Chem.AddHs(m) for m in mols]
-  ps = AllChem.ETKDGv3()
-  for m in mols:
-      AllChem.EmbedMolecule(m,ps)
-  o3d = rdMolAlign.GetO3A(mols[1],mols[0])
-  o3d.Align()
-  keep = ('Donor', 'Acceptor', 'NegIonizable', 'PosIonizable', 'ZnBinder', 'Aromatic', 'LumpedHydrophobe')
-  feat_hash = {'Donor': 'Hydrogen bond donors', 'Acceptor': 'Hydrogen bond acceptors',
-               'NegIonizable': 'Negatively ionizable groups', 'PosIonizable': 'Positively ionizable groups',
-               'ZnBinder': 'Zinc Binders', 'Aromatic': 'Aromatic rings', 'LumpedHydrophobe': 'Hydrophobic/non-polar groups' }
-  feat_vectors = []
-  for m in mols:
-      rawFeats = fdef.GetFeaturesForMol(m)
-      feat_vectors.append([f for f in rawFeats if f.GetFamily() in keep])
-  feat_maps = [FeatMaps.FeatMap(feats = x,weights=[1]*len(x),params=fmParams) for x in feat_vectors]
-  test_score = feat_maps[0].ScoreFeats(feat_maps[1].GetFeatures())/(feat_maps[0].GetNumFeatures())
-  feats_known = {}
-  feats_test = {}
-  for feat in feat_vectors[0]:
-    if feat.GetFamily() not in feats_known.keys():
-      feats_known[feat.GetFamily()]  = 1
-    else:
-      feats_known[feat.GetFamily()] += 1
-  for feat in feat_vectors[1]:
-    if feat.GetFamily() not in feats_test.keys():
-      feats_test[feat.GetFamily()]  = 1
-    else:
-      feats_test[feat.GetFamily()] += 1
-  props_string = "PharmFeature tool results: \n"
-  props_string += f"The Pharmacophore Feature Overlap Score of the test molecule \
-versus the reference molecule is {test_score:.3f}. \n\n"
-  for feat in feats_known.keys():
-    props_string += f"There are {feats_known[feat]} {feat_hash[feat]} in the reference molecule. \n"
-  for feat in feats_test.keys():
-    props_string += f"There are {feats_test[feat]} {feat_hash[feat]} in the test molecule. \n"
-  current_props_string += props_string
-  state["props_string"] = current_props_string
-  state["which_tool"] += 1
-  return state
-def first_node(state: State) -> State:
-  '''
-    The first node of the agent. This node receives the input and asks the LLM
-    to determine which is the best tool to use to answer the QUERY TASK.
-      Input: the initial prompt from the user. should contain only one of more of the following:
-             smiles: the smiles string, task: the query task, path: the path to the file,
-             reference: the reference smiles
-             the value should be separated from the name by a ':' and each field should
-             be separated from the previous one by a ','.
-             All of these values are saved to the state
-      Output: the tool choice
-  '''
-  query_smiles = None
-  state["query_smiles"] = query_smiles
-  query_task = None
-  state["query_task"] = query_task
-  query_path = None
-  state["query_path"] = query_path
-  query_reference = None
-  state["query_reference"] = query_reference
-  props_string = ""
-  state["props_string"] = props_string
-  raw_input = state["messages"][-1].content
-  parts = raw_input.split(',')
-  for part in parts:
-    if 'smiles' in part:
-      query_smiles = part.split(':')[1]
-      if query_smiles.lower() == 'none':
-        query_smiles = None
-      state["query_smiles"] = query_smiles
-    if 'task' in part:
-      query_task = part.split(':')[1]
-      state["query_task"] = query_task
-    if 'path' in part:
-      query_path = part.split(':')[1]
-      if query_path.lower() == 'none':
-        query_path = None
-      state["query_path"] = query_path
-    if 'reference' in part:
-      query_reference = part.split(':')[1]
-      if query_reference.lower() == 'none':
-        query_reference = None
-      state["query_reference"] = query_reference
-  prompt = f'For the QUERY_TASK given below, determine if one or two of the tools descibed below \
-can complete the task. If so, reply with only the tool names followed by "#". If two tools \
-are required, reply with both tool names separated by a comma and followed by "#". \
-If the tools cannot complete the task, reply with "None #".\n \
-QUERY_TASK: {query_task}.\n \
-Tools: \n \
-lipinski_tool: this tool can calculate the following properties: Quantitative \
-Estimate of Drug-likeness (QED), Molecular weight, LogP (measures lipophilicity, higher is more lipophilic), \
-HBA, HBD, Polar Surface Area, Rotatable Bonds, Aromatic Rings and Undesireable Moieties. \n \
-substitution_tool: this tool can generate analogues of the molecule by substituting \
-different chemical groups on the original molecule. Returns a list of novel molecules and their \
-QED score (1 is most drug-like, 0 is least drug-like). \n \
-pharm_feature_tool: this tool can compare the pharmacophore features of a query molecule against \
-a those of a reference molecule and report the pharmacophore features of both and the feature \
-score of the query molecule. This score tells how the common features score against each other, but \
-does not inform about features unique to each molecule.'
-  res = chat_model.invoke(prompt)
-  tool_choices = str(res).split('<|assistant|>')[1].split('#')[0].strip()
-  tool_choices = tool_choices.split(',')
-  if len(tool_choices) == 1:
-    tool1 = tool_choices[0].strip()
-    if tool1.lower() == 'none':
-      tool_choice = (None, None)
-    else:
-      tool_choice = (tool1, None)
-  elif len(tool_choices) == 2:
-    tool1 = tool_choices[0].lower().strip()
-    tool2 = tool_choices[1].lower().strip()
-    if tool1.lower() == 'none' and tool2.lower() == 'none':
-      tool_choice = (None, None)
-    elif tool1.lower() == 'none' and tool2.lower() != 'none':
-      tool_choice = (None, tool2)
-    elif tool2.lower() == 'none' and tool1.lower() != 'none':
-      tool_choice = (tool1, None)
-    else:
-      tool_choice = (tool1, tool2)
-  else:
-    tool_choice = (None, None)
-  state["tool_choice"] = tool_choice
-  state["which_tool"] = 0
-  print(f"The chosen tools are: {tool_choice}")
-  return state
-def loop_node(state: State) -> State:
-  '''
-    This node accepts the tool returns and decides if it needs to call another
-    tool or go on to the parser node.
-      Input: the tool returns.
-      Output: the next node to call.
-  '''
-  return state
-def parser_node(state: State) -> State:
-  '''
-    This is the third node in the agent. It receives the output from the tool,
-    puts it into a prompt as CONTEXT, and asks the LLM to answer the original
-    query.
-      Input: the output from the tool.
-      Output: the answer to the original query.
-  '''
-  props_string = state["props_string"]
-  query_task = state["query_task"]
-  prompt = f'Using the CONTEXT below, answer the original query, which \
-was to answer the QUERY_TASK. End your answer with a "#" \
-QUERY_TASK: {query_task}.\n \
-CONTEXT: {props_string}.\n '
-  res = chat_model.invoke(prompt)
-  return {"messages": res}
-def reflect_node(state: State) -> State:
-  '''
-    This is the fourth node of the agent. It recieves the LLMs previous answer and
-    tries to improve it.
-      Input: the LLMs last answer.
-      Output: the improved answer.
-  '''
-  previous_answer = state["messages"][-1].content
-  props_string = state["props_string"]
-  prompt = f'Look at the PREVIOUS ANSWER below which you provided and the \
-TOOL RESULTS. Write an improved answer based on the PREVIOUS ANSWER and the \
-TOOL RESULTS by adding additional clarifying and enriching information. End \
-your new answer with a "#" \
-PREVIOUS ANSWER: {previous_answer}.\n \
-TOOL RESULTS: {props_string}. '
-  res = chat_model.invoke(prompt)
-  return {"messages": res}
-def get_chemtool(state):
-  '''
-  '''
-  which_tool = state["which_tool"]
-  tool_choice = state["tool_choice"]
-  if tool_choice is None or tool_choice == (None, None):
-    return None
-  if which_tool == 0 or which_tool == 1:
-    current_tool = tool_choice[which_tool]
-    if current_tool is None:
-      return None
-  elif which_tool > 1:
-    current_tool = None
-  return current_tool
-def pretty_print(answer):
-  final = str(answer['messages'][-1]).split('<|assistant|>')[-1].split('#')[0].strip("n").strip('\\').strip('n').strip('\\')
-  for i in range(0,len(final),100):
-    print(final[i:i+100])
-def print_short(answer):
-  for i in range(0,len(answer),100):
-    print(answer[i:i+100])
-builder = StateGraph(State)
-builder.add_node("first_node", first_node)
-builder.add_node("substitution_node", substitution_node)
-builder.add_node("lipinski_node", lipinski_node)
-builder.add_node("pharmfeature_node", pharmfeature_node)
-builder.add_node("loop_node", loop_node)
-builder.add_node("parser_node", parser_node)
-builder.add_node("reflect_node", reflect_node)
-builder.add_edge(START, "first_node")
-builder.add_conditional_edges("first_node", get_chemtool, {
-    "substitution_tool": "substitution_node",
-    "lipinski_tool": "lipinski_node",
-    "pharm_feature_tool": "pharmfeature_node",
-    None: "parser_node"})
-builder.add_edge("lipinski_node", "loop_node")
-builder.add_edge("substitution_node", "loop_node")
-builder.add_edge("pharmfeature_node", "loop_node")
-builder.add_conditional_edges("loop_node", get_chemtool, {
-    "substitution_tool": "substitution_node",
-    "lipinski_tool": "lipinski_node",
-    "pharm_feature_tool": "pharmfeature_node",
-    None: "parser_node"})
-builder.add_edge("parser_node", "reflect_node")
-builder.add_edge("reflect_node", END)
-graph = builder.compile()
-@spaces.GPU
-def PropAgent(task, smiles, reference):
-  #if Substitution_image.png exists, remove it
-  if os.path.exists('Substitution_image.png'):
-    os.remove('Substitution_image.png')
-  input = {
-    "messages": [
-        HumanMessage(f'query_smiles: {smiles}, query_task: {task}, query_reference: {reference}')
-    ]
-  }
-  #print(input)
-  replies = []
-  for c in graph.stream(input): #, stream_mode='updates'):
-    m = re.findall(r'[a-z]+\_node', str(c))
-    if len(m) != 0:
-      reply = c[str(m[0])]['messages']
-      if 'assistant' in str(reply):
-        reply = str(reply).split("<|assistant|>")[-1].split('#')[0].strip()
-        replies.append(reply)
-  #check if image exists
-  if os.path.exists('Substitution_image.png'):
-    img_loc = 'Substitution_image.png'
-    img = Image.open(img_loc)
-  #else create a dummy blank image
-  else:
-    img = Image.new('RGB', (250, 250), color = (255, 255, 255))
-  return replies[-1], img
-with gr.Blocks(fill_height=True) as forest:
-  gr.Markdown('''
-              # Properties Agent
-              - uses RDKit to calculate lipinski properties
-              - finds pharmacophore similarity between two molecules
-              - generated analogues of a molecule
-              ''')
-  name, smiles = None, None
-  with gr.Row():
-    with gr.Column():
-      smiles = gr.Textbox(label="Molecule SMILES of interest (optional): ", placeholder='none')
-      ref = gr.Textbox(label="Reference molecule SMILES of interest (optional): ", placeholder='none')
-      task = gr.Textbox(label="Task for Agent: ")
-      calc_btn = gr.Button(value = "Submit to Agent")
-    with gr.Column():
-      props = gr.Textbox(label="Agent results: ", lines=20 )
-      pic = gr.Image(label="Molecule")
-      calc_btn.click(PropAgent, inputs = [task, smiles, ref], outputs = [props, pic])
-      task.submit(PropAgent, inputs = [task, smiles, ref], outputs = [props, pic])
-forest.launch(debug=False, mcp_server=True)