Commit
·
2f9e3a2
1
Parent(s):
f4d3755
test first question
Browse files- agents/search_agent.py +9 -13
- app.py +5 -1
- graphs/evaluation.py +109 -0
- graphs/question_map.py +11 -0
- graphs/search.py +0 -51
- models/models.py +2 -2
- tools/search.py +5 -4
agents/search_agent.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from graphs.
|
| 2 |
from langchain_core.messages import HumanMessage, SystemMessage
|
| 3 |
from langfuse.callback import CallbackHandler
|
| 4 |
from dotenv import load_dotenv
|
|
@@ -11,21 +11,17 @@ class SearchAgent:
|
|
| 11 |
def __call__(self, question: str) -> str:
|
| 12 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 13 |
workflow = build_workflow()
|
| 14 |
-
messages= [SystemMessage("""You are a general AI assistant. I will ask you a question. Report your thoughts, and finish with only the answer. \n
|
| 15 |
-
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 16 |
-
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
| 17 |
-
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 18 |
-
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.""")]
|
| 19 |
-
messages = messages + [HumanMessage(content=question)]
|
| 20 |
-
messages = workflow.invoke({
|
| 21 |
-
"messages":messages
|
| 22 |
-
}, config={"callbacks": [langfuse_handler]})
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
|
| 28 |
agent = SearchAgent()
|
| 29 |
submit_answer = agent(question)
|
| 30 |
|
| 31 |
-
print(submit_answer)
|
|
|
|
| 1 |
+
from graphs.evaluation import build_workflow
|
| 2 |
from langchain_core.messages import HumanMessage, SystemMessage
|
| 3 |
from langfuse.callback import CallbackHandler
|
| 4 |
from dotenv import load_dotenv
|
|
|
|
| 11 |
def __call__(self, question: str) -> str:
|
| 12 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 13 |
workflow = build_workflow()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
messages = [HumanMessage(content=question)]
|
| 16 |
+
|
| 17 |
+
state = workflow.invoke({
|
| 18 |
+
"messages":messages,
|
| 19 |
+
}, config={"callbacks": [langfuse_handler]})
|
| 20 |
+
return state["answer"]
|
| 21 |
|
| 22 |
+
if __name__ == "__main__":
|
| 23 |
question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
|
| 24 |
agent = SearchAgent()
|
| 25 |
submit_answer = agent(question)
|
| 26 |
|
| 27 |
+
print(submit_answer)
|
app.py
CHANGED
|
@@ -71,7 +71,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 71 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 72 |
continue
|
| 73 |
try:
|
| 74 |
-
submitted_answer =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 76 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 77 |
except Exception as e:
|
|
|
|
| 71 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 72 |
continue
|
| 73 |
try:
|
| 74 |
+
submitted_answer = ""
|
| 75 |
+
if question_text == "":
|
| 76 |
+
submitted_answer = agent(question_text)
|
| 77 |
+
else:
|
| 78 |
+
submitted_answer= "In progress"
|
| 79 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 80 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 81 |
except Exception as e:
|
graphs/evaluation.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from models.models import groq_model, anthropic_model
|
| 2 |
+
from tools.search import arxiv_search, taivily_search, serper_search
|
| 3 |
+
from langgraph.graph import StateGraph, START, END, MessagesState
|
| 4 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 5 |
+
from typing import List, TypedDict
|
| 6 |
+
from langgraph.prebuilt import ToolNode
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
tools = [
|
| 10 |
+
taivily_search,
|
| 11 |
+
serper_search,
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
class EvaluationState(TypedDict):
|
| 15 |
+
messages: List
|
| 16 |
+
tasks: str
|
| 17 |
+
current_task: str
|
| 18 |
+
question: str
|
| 19 |
+
answer: str
|
| 20 |
+
external_information: str
|
| 21 |
+
has_enough_information: bool
|
| 22 |
+
|
| 23 |
+
bound_model_llama = groq_model.bind_tools(tools)
|
| 24 |
+
bound_model_antrhropic = anthropic_model.bind_tools(tools)
|
| 25 |
+
|
| 26 |
+
def call_node(state: EvaluationState):
|
| 27 |
+
question = state["messages"][-1].content
|
| 28 |
+
state["question"] = question
|
| 29 |
+
response = bound_model_llama.invoke(state["messages"])
|
| 30 |
+
|
| 31 |
+
state["messages"].append(response)
|
| 32 |
+
return state
|
| 33 |
+
|
| 34 |
+
tool_node = ToolNode(tools)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def parse_response(state: EvaluationState):
|
| 38 |
+
"""
|
| 39 |
+
Parse the response from the model and return the final answer
|
| 40 |
+
"""
|
| 41 |
+
prompt = f"""I will ask you a question. Report your thoughts, and finish with only YOUR FINAL ANSWER.
|
| 42 |
+
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 43 |
+
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
| 44 |
+
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 45 |
+
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 46 |
+
---question---
|
| 47 |
+
{state["question"]}
|
| 48 |
+
---relevant information---
|
| 49 |
+
{state["external_information"]}
|
| 50 |
+
---answer---
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
response = groq_model.invoke(prompt)
|
| 55 |
+
state["messages"].append(response)
|
| 56 |
+
state["answer"] = response.content
|
| 57 |
+
return state
|
| 58 |
+
|
| 59 |
+
def map_answer(state: EvaluationState):
|
| 60 |
+
"""
|
| 61 |
+
Map the answer to the final answer
|
| 62 |
+
"""
|
| 63 |
+
answer = anthropic_model.invoke("Map the answer, I want only the number, string or list. ANSWER:"+ state["answer"])
|
| 64 |
+
print(answer.content)
|
| 65 |
+
return {
|
| 66 |
+
"answer": answer.content
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
def map_tool_answer(state: EvaluationState):
|
| 70 |
+
"""
|
| 71 |
+
Map the tool answer to the final answer
|
| 72 |
+
"""
|
| 73 |
+
last_message = state["messages"][-1]
|
| 74 |
+
state["external_information"] = last_message.content
|
| 75 |
+
|
| 76 |
+
return state
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def build_workflow():
|
| 80 |
+
"""
|
| 81 |
+
Build search workflow
|
| 82 |
+
"""
|
| 83 |
+
workflow = StateGraph(EvaluationState)
|
| 84 |
+
workflow.add_node("agent", call_node)
|
| 85 |
+
workflow.add_node("action", tool_node)
|
| 86 |
+
workflow.add_node("map_tool_answer", map_tool_answer)
|
| 87 |
+
workflow.add_node("parse_response", parse_response)
|
| 88 |
+
workflow.add_node("map_answer", map_answer)
|
| 89 |
+
""" workflow.add_node("action",tool_node)
|
| 90 |
+
workflow.add_node("answer", parse_response) """
|
| 91 |
+
|
| 92 |
+
workflow.add_edge(START,"agent")
|
| 93 |
+
workflow.add_edge("agent", "action")
|
| 94 |
+
workflow.add_edge("action", "map_tool_answer")
|
| 95 |
+
workflow.add_edge("map_tool_answer", "parse_response")
|
| 96 |
+
workflow.add_edge("parse_response", "map_answer")
|
| 97 |
+
workflow.add_edge("map_answer", END)
|
| 98 |
+
|
| 99 |
+
return workflow.compile()
|
| 100 |
+
|
| 101 |
+
""" if __name__ == "__main__":
|
| 102 |
+
question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
|
| 103 |
+
# Build the graph
|
| 104 |
+
graph = build_workflow()
|
| 105 |
+
# Run the graph
|
| 106 |
+
messages = [HumanMessage(content=question)]
|
| 107 |
+
messages = graph.invoke({"messages": messages})
|
| 108 |
+
for m in messages["messages"]:
|
| 109 |
+
m.pretty_print() """
|
graphs/question_map.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 2 |
+
from models.models import groq_model
|
| 3 |
+
|
| 4 |
+
def question_map(state:HumanMessage):
|
| 5 |
+
"""
|
| 6 |
+
This funtion replace the initial question for a one with a detail expected answer.
|
| 7 |
+
"""
|
| 8 |
+
question = """/ \n Question:""" + state["content"]
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
graphs/search.py
DELETED
|
@@ -1,51 +0,0 @@
|
|
| 1 |
-
from models.models import groq_model, anthropic_model
|
| 2 |
-
from tools.search import arxiv_search, web_search, google_search
|
| 3 |
-
from langgraph.graph import StateGraph, START, END, MessagesState
|
| 4 |
-
from langgraph.prebuilt import ToolNode
|
| 5 |
-
from langchain_core.messages import HumanMessage
|
| 6 |
-
|
| 7 |
-
tools = [
|
| 8 |
-
arxiv_search,
|
| 9 |
-
web_search,
|
| 10 |
-
google_search,
|
| 11 |
-
]
|
| 12 |
-
|
| 13 |
-
tool_node = ToolNode(tools)
|
| 14 |
-
#bound_model = groq_model.bind_tools(tools)
|
| 15 |
-
bound_model = anthropic_model.bind_tools(tools)
|
| 16 |
-
# Define the function that calls the model
|
| 17 |
-
def call_model(state: MessagesState):
|
| 18 |
-
response = bound_model.invoke(state["messages"])
|
| 19 |
-
# We return a list, because this will get added to the existing list
|
| 20 |
-
return {"messages": response}
|
| 21 |
-
|
| 22 |
-
def should_continue(state:MessagesState):
|
| 23 |
-
last_message = state["messages"][-1]
|
| 24 |
-
|
| 25 |
-
if not last_message.tool_calls:
|
| 26 |
-
return END
|
| 27 |
-
|
| 28 |
-
return "action"
|
| 29 |
-
|
| 30 |
-
def build_workflow():
|
| 31 |
-
"""
|
| 32 |
-
Build search workflow
|
| 33 |
-
"""
|
| 34 |
-
workflow = StateGraph(MessagesState)
|
| 35 |
-
workflow.add_node("agent", call_model)
|
| 36 |
-
workflow.add_node("action",tool_node)
|
| 37 |
-
|
| 38 |
-
workflow.add_edge(START,"agent")
|
| 39 |
-
workflow.add_conditional_edges("agent", should_continue)
|
| 40 |
-
workflow.add_edge("action", "agent")
|
| 41 |
-
return workflow.compile()
|
| 42 |
-
|
| 43 |
-
""" if __name__ == "__main__":
|
| 44 |
-
question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
|
| 45 |
-
# Build the graph
|
| 46 |
-
graph = build_workflow()
|
| 47 |
-
# Run the graph
|
| 48 |
-
messages = [HumanMessage(content=question)]
|
| 49 |
-
messages = graph.invoke({"messages": messages})
|
| 50 |
-
for m in messages["messages"]:
|
| 51 |
-
m.pretty_print() """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/models.py
CHANGED
|
@@ -5,11 +5,11 @@ from dotenv import load_dotenv
|
|
| 5 |
load_dotenv()
|
| 6 |
|
| 7 |
anthropic_model = ChatAnthropic(
|
| 8 |
-
model="claude-3-
|
| 9 |
temperature=0
|
| 10 |
)
|
| 11 |
|
| 12 |
groq_model = ChatGroq(
|
| 13 |
-
model="
|
| 14 |
temperature=0
|
| 15 |
)
|
|
|
|
| 5 |
load_dotenv()
|
| 6 |
|
| 7 |
anthropic_model = ChatAnthropic(
|
| 8 |
+
model="claude-3-5-haiku-latest",
|
| 9 |
temperature=0
|
| 10 |
)
|
| 11 |
|
| 12 |
groq_model = ChatGroq(
|
| 13 |
+
model="qwen-qwq-32b",
|
| 14 |
temperature=0
|
| 15 |
)
|
tools/search.py
CHANGED
|
@@ -26,10 +26,11 @@ def wikipedia_search(query: str) -> str:
|
|
| 26 |
return {"wiki_results": formatted_search_docs}
|
| 27 |
|
| 28 |
@tool
|
| 29 |
-
def
|
| 30 |
-
"""
|
| 31 |
Args:
|
| 32 |
query: The search query."""
|
|
|
|
| 33 |
search_docs = TavilySearchResults(max_results=1).invoke(input=query)
|
| 34 |
|
| 35 |
formatted_search_docs = "\n\n---\n\n".join(
|
|
@@ -58,12 +59,12 @@ def arxiv_search(query: str) -> str:
|
|
| 58 |
return {"arxiv_results": formatted_search_docs}
|
| 59 |
|
| 60 |
@tool
|
| 61 |
-
def
|
| 62 |
"""
|
| 63 |
Search Google for a query and return maximum 2 result.
|
| 64 |
Args: query: The search query.
|
| 65 |
"""
|
| 66 |
-
search_docs = GoogleSerperAPIWrapper()
|
| 67 |
result = search_docs.run(query)
|
| 68 |
|
| 69 |
return {"google_results": result}
|
|
|
|
| 26 |
return {"wiki_results": formatted_search_docs}
|
| 27 |
|
| 28 |
@tool
|
| 29 |
+
def taivily_search(query: str) -> str:
|
| 30 |
+
"""Tavily is a search engine optimized for LLMs, aimed at efficient, quick and persistent search results. Tavily take care of all the burden of searching, scraping, filtering and extracting the most relevant information from online sources.
|
| 31 |
Args:
|
| 32 |
query: The search query."""
|
| 33 |
+
|
| 34 |
search_docs = TavilySearchResults(max_results=1).invoke(input=query)
|
| 35 |
|
| 36 |
formatted_search_docs = "\n\n---\n\n".join(
|
|
|
|
| 59 |
return {"arxiv_results": formatted_search_docs}
|
| 60 |
|
| 61 |
@tool
|
| 62 |
+
def serper_search(query: str) -> str:
|
| 63 |
"""
|
| 64 |
Search Google for a query and return maximum 2 result.
|
| 65 |
Args: query: The search query.
|
| 66 |
"""
|
| 67 |
+
search_docs = GoogleSerperAPIWrapper(k=2)
|
| 68 |
result = search_docs.run(query)
|
| 69 |
|
| 70 |
return {"google_results": result}
|