Spaces:
Sleeping
Sleeping
New version using OpenAI
Browse files- app.py +35 -38
- gaia_graph.py +120 -0
- gaia_graph_legacy.py +188 -0
- langgraph_openai.py +151 -0
- qa_graph.py +0 -69
- requirements.txt +33 -9
- test_gaia.py +0 -8
- test_gaia_questions.py +22 -0
- test_openai_agent.py +141 -0
- tools/search_tool.py +3 -3
app.py
CHANGED
|
@@ -3,24 +3,24 @@ import gradio as gr
|
|
| 3 |
import requests
|
| 4 |
import pandas as pd
|
| 5 |
import asyncio
|
| 6 |
-
from
|
| 7 |
from typing import Optional
|
| 8 |
|
| 9 |
-
#
|
| 10 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 11 |
-
user_answers_cache = {} #
|
| 12 |
|
| 13 |
class GaiaAgent:
|
| 14 |
def __init__(self):
|
| 15 |
-
print("
|
| 16 |
|
| 17 |
def __call__(self, question: str) -> str:
|
| 18 |
-
print("
|
| 19 |
state = {"question": question, "answer": ""}
|
| 20 |
-
|
| 21 |
-
return
|
| 22 |
-
|
| 23 |
|
|
|
|
| 24 |
async def run_agent(profile: gr.OAuthProfile | None):
|
| 25 |
if not profile:
|
| 26 |
return "Please login to Hugging Face.", None
|
|
@@ -28,27 +28,25 @@ async def run_agent(profile: gr.OAuthProfile | None):
|
|
| 28 |
username = profile.username
|
| 29 |
agent = GaiaAgent()
|
| 30 |
|
| 31 |
-
|
| 32 |
-
questions_url = f"{api_url}/questions"
|
| 33 |
-
print(f"Fetching questions from: {questions_url}")
|
| 34 |
-
|
| 35 |
try:
|
| 36 |
-
response = requests.get(
|
| 37 |
response.raise_for_status()
|
| 38 |
questions_data = response.json()
|
| 39 |
except Exception as e:
|
| 40 |
return f"Error fetching questions: {e}", None
|
| 41 |
|
| 42 |
-
|
|
|
|
| 43 |
task_id = item.get("task_id")
|
| 44 |
-
|
| 45 |
try:
|
| 46 |
-
answer = await asyncio.to_thread(agent,
|
| 47 |
-
return {"task_id": task_id, "question":
|
| 48 |
except Exception as e:
|
| 49 |
-
return {"task_id": task_id, "question":
|
| 50 |
|
| 51 |
-
results = await asyncio.gather(*(
|
| 52 |
user_answers_cache[username] = results
|
| 53 |
|
| 54 |
df = pd.DataFrame(results)
|
|
@@ -61,50 +59,49 @@ def submit_answers(profile: gr.OAuthProfile | None):
|
|
| 61 |
|
| 62 |
username = profile.username.strip()
|
| 63 |
if username not in user_answers_cache:
|
| 64 |
-
return "No cached answers
|
| 65 |
|
| 66 |
answers_payload = [
|
| 67 |
{"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
|
| 68 |
for item in user_answers_cache[username]
|
| 69 |
]
|
| 70 |
|
| 71 |
-
space_id = os.getenv("SPACE_ID")
|
| 72 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
|
| 73 |
submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
|
| 74 |
|
| 75 |
-
|
| 76 |
-
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 77 |
-
|
| 78 |
try:
|
| 79 |
-
response = requests.post(
|
| 80 |
response.raise_for_status()
|
| 81 |
result = response.json()
|
| 82 |
final_status = (
|
| 83 |
-
f"Submission Successful!\n"
|
| 84 |
-
f"User: {result.get('username')}\n"
|
| 85 |
-
f"
|
| 86 |
f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
|
| 87 |
-
f"Message: {result.get('message', 'No message received.')}"
|
| 88 |
)
|
| 89 |
df = pd.DataFrame(user_answers_cache[username])
|
| 90 |
return final_status, df
|
| 91 |
except Exception as e:
|
| 92 |
-
return f"Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
|
| 93 |
|
| 94 |
|
|
|
|
| 95 |
with gr.Blocks() as demo:
|
| 96 |
-
gr.Markdown("#
|
| 97 |
gr.LoginButton()
|
| 98 |
|
| 99 |
-
run_button = gr.Button("Run Agent on Questions")
|
| 100 |
-
submit_button = gr.Button("Submit Cached Answers")
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
|
| 105 |
-
run_button.click(run_agent, outputs=[
|
| 106 |
-
submit_button.click(submit_answers, outputs=[
|
| 107 |
|
| 108 |
if __name__ == "__main__":
|
| 109 |
-
print("Launching app...")
|
| 110 |
demo.launch(debug=True, share=False)
|
|
|
|
| 3 |
import requests
|
| 4 |
import pandas as pd
|
| 5 |
import asyncio
|
| 6 |
+
from gaia_graph import graph # Use your agent
|
| 7 |
from typing import Optional
|
| 8 |
|
| 9 |
+
# Constants
|
| 10 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 11 |
+
user_answers_cache = {} # session-based cache
|
| 12 |
|
| 13 |
class GaiaAgent:
|
| 14 |
def __init__(self):
|
| 15 |
+
print("GaiaAgent initialized.")
|
| 16 |
|
| 17 |
def __call__(self, question: str) -> str:
|
| 18 |
+
print(f"Running agent on: {question}")
|
| 19 |
state = {"question": question, "answer": ""}
|
| 20 |
+
result = graph["invoke"](state)
|
| 21 |
+
return result["answer"]
|
|
|
|
| 22 |
|
| 23 |
+
# Async runner
|
| 24 |
async def run_agent(profile: gr.OAuthProfile | None):
|
| 25 |
if not profile:
|
| 26 |
return "Please login to Hugging Face.", None
|
|
|
|
| 28 |
username = profile.username
|
| 29 |
agent = GaiaAgent()
|
| 30 |
|
| 31 |
+
# 1. Load questions
|
|
|
|
|
|
|
|
|
|
| 32 |
try:
|
| 33 |
+
response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10)
|
| 34 |
response.raise_for_status()
|
| 35 |
questions_data = response.json()
|
| 36 |
except Exception as e:
|
| 37 |
return f"Error fetching questions: {e}", None
|
| 38 |
|
| 39 |
+
# 2. Process questions
|
| 40 |
+
async def process(item):
|
| 41 |
task_id = item.get("task_id")
|
| 42 |
+
question = item.get("question")
|
| 43 |
try:
|
| 44 |
+
answer = await asyncio.to_thread(agent, question)
|
| 45 |
+
return {"task_id": task_id, "question": question, "submitted_answer": answer}
|
| 46 |
except Exception as e:
|
| 47 |
+
return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"}
|
| 48 |
|
| 49 |
+
results = await asyncio.gather(*(process(item) for item in questions_data))
|
| 50 |
user_answers_cache[username] = results
|
| 51 |
|
| 52 |
df = pd.DataFrame(results)
|
|
|
|
| 59 |
|
| 60 |
username = profile.username.strip()
|
| 61 |
if username not in user_answers_cache:
|
| 62 |
+
return "No cached answers. Please run the agent first.", None
|
| 63 |
|
| 64 |
answers_payload = [
|
| 65 |
{"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
|
| 66 |
for item in user_answers_cache[username]
|
| 67 |
]
|
| 68 |
|
| 69 |
+
space_id = os.getenv("SPACE_ID", "")
|
| 70 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
|
| 71 |
submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
|
| 72 |
|
| 73 |
+
# 3. Submit to scoring API
|
|
|
|
|
|
|
| 74 |
try:
|
| 75 |
+
response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
|
| 76 |
response.raise_for_status()
|
| 77 |
result = response.json()
|
| 78 |
final_status = (
|
| 79 |
+
f"β
Submission Successful!\n"
|
| 80 |
+
f"π€ User: {result.get('username')}\n"
|
| 81 |
+
f"π― Score: {result.get('score', 'N/A')}% "
|
| 82 |
f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
|
| 83 |
+
f"π© Message: {result.get('message', 'No message received.')}"
|
| 84 |
)
|
| 85 |
df = pd.DataFrame(user_answers_cache[username])
|
| 86 |
return final_status, df
|
| 87 |
except Exception as e:
|
| 88 |
+
return f"β Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
|
| 89 |
|
| 90 |
|
| 91 |
+
# ββββββββββ Gradio UI ββββββββββ
|
| 92 |
with gr.Blocks() as demo:
|
| 93 |
+
gr.Markdown("# π§ GAIA Agent Evaluation")
|
| 94 |
gr.LoginButton()
|
| 95 |
|
| 96 |
+
run_button = gr.Button("βΆοΈ Run Agent on GAIA Questions")
|
| 97 |
+
submit_button = gr.Button("π€ Submit Cached Answers")
|
| 98 |
|
| 99 |
+
status = gr.Textbox(label="Status", lines=6, interactive=False)
|
| 100 |
+
results = gr.DataFrame(label="Answers", wrap=True)
|
| 101 |
|
| 102 |
+
run_button.click(run_agent, outputs=[status, results])
|
| 103 |
+
submit_button.click(submit_answers, outputs=[status, results])
|
| 104 |
|
| 105 |
if __name__ == "__main__":
|
| 106 |
+
print("Launching Gradio app...")
|
| 107 |
demo.launch(debug=True, share=False)
|
gaia_graph.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# gaia_graph.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import ast
|
| 5 |
+
import operator
|
| 6 |
+
import yaml
|
| 7 |
+
from typing import TypedDict
|
| 8 |
+
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
from langchain.tools import Tool
|
| 11 |
+
from langchain.agents import initialize_agent, AgentType
|
| 12 |
+
from langchain_openai import ChatOpenAI
|
| 13 |
+
from langgraph.graph import StateGraph, END
|
| 14 |
+
|
| 15 |
+
# βββ Load Environment Variables ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
+
load_dotenv()
|
| 17 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 18 |
+
assert OPENAI_API_KEY, "OPENAI_API_KEY is not set"
|
| 19 |
+
|
| 20 |
+
# βββ Define Calculator Tool ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
def safe_eval(expr: str) -> str:
|
| 22 |
+
ops = {
|
| 23 |
+
ast.Add: operator.add,
|
| 24 |
+
ast.Sub: operator.sub,
|
| 25 |
+
ast.Mult: operator.mul,
|
| 26 |
+
ast.Div: operator.truediv,
|
| 27 |
+
ast.Pow: operator.pow,
|
| 28 |
+
ast.USub: operator.neg,
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
def _eval(node):
|
| 32 |
+
if isinstance(node, ast.Constant):
|
| 33 |
+
return node.value
|
| 34 |
+
if isinstance(node, ast.BinOp):
|
| 35 |
+
return ops[type(node.op)](_eval(node.left), _eval(node.right))
|
| 36 |
+
if isinstance(node, ast.UnaryOp):
|
| 37 |
+
return ops[type(node.op)](_eval(node.operand))
|
| 38 |
+
raise TypeError(f"Unsupported AST node: {node!r}")
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
node = ast.parse(expr, mode="eval").body
|
| 42 |
+
return str(_eval(node))
|
| 43 |
+
except Exception as e:
|
| 44 |
+
return f"Error: {e}"
|
| 45 |
+
|
| 46 |
+
calculator_tool = Tool(
|
| 47 |
+
name="calculator",
|
| 48 |
+
func=safe_eval,
|
| 49 |
+
description="Evaluate basic math expressions. Input: a math string like '2 + 2'. Output: the result.",
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# βββ Define Search Tool using Tavily βββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
+
from tavily import TavilyClient
|
| 54 |
+
|
| 55 |
+
with open("config.yaml") as f:
|
| 56 |
+
cfg = yaml.safe_load(f)
|
| 57 |
+
|
| 58 |
+
TAVILY_API_KEY = cfg.get("tavily_api_key")
|
| 59 |
+
assert TAVILY_API_KEY, "TAVILY API key is missing in config.yaml"
|
| 60 |
+
|
| 61 |
+
tavily = TavilyClient(api_key=TAVILY_API_KEY)
|
| 62 |
+
|
| 63 |
+
def search_tool_fn(query: str) -> str:
|
| 64 |
+
try:
|
| 65 |
+
resp = tavily.search(query)
|
| 66 |
+
results = resp.get("results", [])
|
| 67 |
+
if not results:
|
| 68 |
+
return "No results found."
|
| 69 |
+
return results[0].get("title") or results[0].get("snippet") or "No snippet."
|
| 70 |
+
except Exception as e:
|
| 71 |
+
return f"Search error: {e}"
|
| 72 |
+
|
| 73 |
+
search_tool = Tool(
|
| 74 |
+
name="search",
|
| 75 |
+
func=search_tool_fn,
|
| 76 |
+
description="Useful for answering factual questions using a search engine.",
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# βββ Create LLM Agent ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 80 |
+
llm = ChatOpenAI(
|
| 81 |
+
temperature=0.0,
|
| 82 |
+
model="gpt-4o",
|
| 83 |
+
openai_api_key=OPENAI_API_KEY
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
agent_executor = initialize_agent(
|
| 87 |
+
tools=[calculator_tool, search_tool],
|
| 88 |
+
llm=llm,
|
| 89 |
+
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 90 |
+
verbose=False,
|
| 91 |
+
handle_parsing_errors=True,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# βββ Clean Output ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
def clean_answer(ans: str) -> str:
|
| 96 |
+
if "```" in ans:
|
| 97 |
+
ans = ans.split("```")[-1]
|
| 98 |
+
if "Answer:" in ans:
|
| 99 |
+
return ans.split("Answer:")[-1].strip()
|
| 100 |
+
if "β" in ans:
|
| 101 |
+
return ans.split("β")[-1].strip()
|
| 102 |
+
return ans.strip()
|
| 103 |
+
|
| 104 |
+
# βββ Define State ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 105 |
+
class GaiaState(TypedDict):
|
| 106 |
+
question: str
|
| 107 |
+
answer: str
|
| 108 |
+
|
| 109 |
+
# βββ Define Node Function ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 110 |
+
def agent_node(state: GaiaState) -> GaiaState:
|
| 111 |
+
raw = agent_executor.run(state["question"])
|
| 112 |
+
return {"question": state["question"], "answer": clean_answer(raw)}
|
| 113 |
+
|
| 114 |
+
# βββ Build LangGraph βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 115 |
+
builder = StateGraph(GaiaState)
|
| 116 |
+
builder.add_node("agent", agent_node)
|
| 117 |
+
builder.set_entry_point("agent")
|
| 118 |
+
builder.set_finish_point("agent")
|
| 119 |
+
|
| 120 |
+
graph = builder.compile()
|
gaia_graph_legacy.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# gaia_graph.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import re
|
| 5 |
+
import yaml
|
| 6 |
+
from typing import TypedDict
|
| 7 |
+
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
from transformers import pipeline
|
| 10 |
+
from langchain_huggingface import HuggingFacePipeline
|
| 11 |
+
from langchain_core.tools.structured import StructuredTool
|
| 12 |
+
from langgraph.graph import StateGraph, START, END
|
| 13 |
+
from langgraph.prebuilt.chat_agent_executor import create_react_agent
|
| 14 |
+
|
| 15 |
+
#
|
| 16 |
+
# βββ 1) LOAD ENVIRONMENT VARIABLES ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
#
|
| 18 |
+
# Make sure you have a valid HF token in your shell or .env:
|
| 19 |
+
# export HUGGINGFACE_API_TOKEN="<your token>"
|
| 20 |
+
load_dotenv()
|
| 21 |
+
HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
|
| 22 |
+
assert HF_TOKEN, "Please set HUGGINGFACE_API_TOKEN in your environment or .env."
|
| 23 |
+
|
| 24 |
+
#
|
| 25 |
+
# βββ 2) LOAD config.yaml βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
+
#
|
| 27 |
+
# Expect config.yaml with:
|
| 28 |
+
# tavily_api_key: "<your Tavily key>"
|
| 29 |
+
# huggingface_api_token: "<your HF token>" (optional duplication)
|
| 30 |
+
with open("config.yaml", "r") as f:
|
| 31 |
+
cfg = yaml.safe_load(f)
|
| 32 |
+
|
| 33 |
+
TAVILY_API_KEY = cfg.get("tavily_api_key")
|
| 34 |
+
assert TAVILY_API_KEY, "Put your Tavily key under 'tavily_api_key' in config.yaml."
|
| 35 |
+
|
| 36 |
+
#
|
| 37 |
+
# βββ 3) DEFINE βTOOLβ WRAPPERS ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
+
#
|
| 39 |
+
|
| 40 |
+
# 3a) Calculator (a βsafe evalβ of simple expressions)
|
| 41 |
+
def _safe_eval(expr: str) -> str:
|
| 42 |
+
import ast, operator
|
| 43 |
+
|
| 44 |
+
ops = {
|
| 45 |
+
ast.Add: operator.add,
|
| 46 |
+
ast.Sub: operator.sub,
|
| 47 |
+
ast.Mult: operator.mul,
|
| 48 |
+
ast.Div: operator.truediv,
|
| 49 |
+
ast.Pow: operator.pow,
|
| 50 |
+
ast.USub: operator.neg,
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
def _eval(node):
|
| 54 |
+
if isinstance(node, ast.Constant):
|
| 55 |
+
return node.n
|
| 56 |
+
elif isinstance(node, ast.BinOp):
|
| 57 |
+
return ops[type(node.op)](_eval(node.left), _eval(node.right))
|
| 58 |
+
elif isinstance(node, ast.UnaryOp):
|
| 59 |
+
return ops[type(node.op)](_eval(node.operand))
|
| 60 |
+
else:
|
| 61 |
+
raise TypeError(f"Unsupported AST node: {node}")
|
| 62 |
+
|
| 63 |
+
node = ast.parse(expr, mode="eval").body
|
| 64 |
+
return str(_eval(node))
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _calculator_tool(text: str) -> str:
|
| 68 |
+
try:
|
| 69 |
+
return _safe_eval(text)
|
| 70 |
+
except Exception as e:
|
| 71 |
+
return f"Error evaluating expression: {e}"
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
calculator_tool = StructuredTool.from_function(
|
| 75 |
+
func=_calculator_tool,
|
| 76 |
+
name="calculator",
|
| 77 |
+
description="Evaluate simple arithmetic expressions; return the numeric result as a string.",
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# 3b) Tavilyβbased search
|
| 81 |
+
from tavily import TavilyClient
|
| 82 |
+
|
| 83 |
+
class _TavilySearch:
|
| 84 |
+
def __init__(self, api_key: str):
|
| 85 |
+
self.client = TavilyClient(api_key=api_key)
|
| 86 |
+
|
| 87 |
+
def __call__(self, query: str) -> str:
|
| 88 |
+
resp = self.client.search(query)
|
| 89 |
+
results = resp.get("results", [])
|
| 90 |
+
if not results:
|
| 91 |
+
return "No results found."
|
| 92 |
+
snippets = []
|
| 93 |
+
for r in results[:3]:
|
| 94 |
+
title = r.get("title")
|
| 95 |
+
snippet = r.get("snippet")
|
| 96 |
+
if title:
|
| 97 |
+
snippets.append(title)
|
| 98 |
+
elif snippet:
|
| 99 |
+
snippets.append(snippet)
|
| 100 |
+
return " | ".join(snippets)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
_tavily_search = _TavilySearch(api_key=TAVILY_API_KEY)
|
| 104 |
+
|
| 105 |
+
# Note: pass the instanceβs __call__, not the instance itself.
|
| 106 |
+
search_tool = StructuredTool.from_function(
|
| 107 |
+
func=_tavily_search.__call__,
|
| 108 |
+
name="search",
|
| 109 |
+
description="Look up facts via Tavily; return up to three summaries joined by ' | '.",
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
TOOLS = [calculator_tool, search_tool]
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
#
|
| 116 |
+
# βββ 4) PRELOAD A FREE HF MODEL & WRAP IT AS HuggingFacePipeline βββββββββββββββββββ
|
| 117 |
+
#
|
| 118 |
+
# We choose βgoogle/flan-t5-smallβ (free, CPUβfriendly). Load as a text2text pipeline:
|
| 119 |
+
hf_gen = pipeline(
|
| 120 |
+
"text2text-generation",
|
| 121 |
+
model="google/flan-t5-small",
|
| 122 |
+
device=-1, # CPU only
|
| 123 |
+
max_new_tokens=128,
|
| 124 |
+
do_sample=False, # greedy
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# Now wrap that pipeline into a HuggingFacePipeline LLM.
|
| 128 |
+
# (No API token needed here for a local βgoogle/flan-t5-smallβ)
|
| 129 |
+
llm = HuggingFacePipeline(pipeline=hf_gen)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
#
|
| 133 |
+
# βββ 5) CREATE A LANGGRAPH ReAct AGENT βββββββββββββββββββββββββββββββββββββββββββββ
|
| 134 |
+
#
|
| 135 |
+
# This `create_react_agent` will add the Thought/Action/Observation framing
|
| 136 |
+
# so that the LLM can call βcalculatorβ or βsearchβ as needed,
|
| 137 |
+
# and then eventually emit βFinal Answer: β¦β.
|
| 138 |
+
#
|
| 139 |
+
react_agent = create_react_agent(
|
| 140 |
+
llm=llm,
|
| 141 |
+
tools=TOOLS,
|
| 142 |
+
max_iterations=3,
|
| 143 |
+
verbose=False,
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
#
|
| 148 |
+
# βββ 6) DEFINE STATE SCHEMA & SINGLE GRAPH NODE βββββββββββββββββββββββββββββββββ
|
| 149 |
+
#
|
| 150 |
+
class AgentState(TypedDict):
|
| 151 |
+
question: str
|
| 152 |
+
tool_output: str # (ignored by ReAct, but must exist)
|
| 153 |
+
final_answer: str
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def AgentNode(state: AgentState) -> AgentState:
|
| 157 |
+
q = state["question"].strip()
|
| 158 |
+
# Invoke the internal ReAct loop:
|
| 159 |
+
answer = react_agent.invoke(q).strip()
|
| 160 |
+
state["final_answer"] = answer
|
| 161 |
+
return state
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
#
|
| 165 |
+
# βββ 7) WIRE UP THE LANGGRAPH βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 166 |
+
#
|
| 167 |
+
builder = StateGraph(AgentState)
|
| 168 |
+
builder.set_entry_point("AgentNode")
|
| 169 |
+
builder.add_node("AgentNode", AgentNode)
|
| 170 |
+
builder.add_edge(START, "AgentNode")
|
| 171 |
+
builder.add_edge("AgentNode", END)
|
| 172 |
+
|
| 173 |
+
graph = builder.compile()
|
| 174 |
+
|
| 175 |
+
#
|
| 176 |
+
# βββ 8) SMOKE TESTS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 177 |
+
#
|
| 178 |
+
if __name__ == "__main__":
|
| 179 |
+
print("Device set to use CPU\n")
|
| 180 |
+
tests = [
|
| 181 |
+
"How much is 2 + 2",
|
| 182 |
+
"What is the capital of France?",
|
| 183 |
+
"Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
|
| 184 |
+
]
|
| 185 |
+
for q in tests:
|
| 186 |
+
state = {"question": q, "tool_output": "", "final_answer": ""}
|
| 187 |
+
out = graph.invoke(state)
|
| 188 |
+
print(f"Q: {q!r}\nβ A: {out['final_answer']!r}\n")
|
langgraph_openai.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# langgraph_openai.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import ast
|
| 5 |
+
import operator
|
| 6 |
+
import yaml
|
| 7 |
+
from typing import TypedDict
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
from langchain_core.tools.structured import StructuredTool
|
| 11 |
+
from langchain_community.chat_models import ChatOpenAI
|
| 12 |
+
from langgraph.prebuilt.chat_agent_executor import create_react_agent
|
| 13 |
+
from langgraph.graph import StateGraph, START, END
|
| 14 |
+
from langsmith import traceable
|
| 15 |
+
from tavily import TavilyClient
|
| 16 |
+
|
| 17 |
+
# βββ 1) LOAD KEYS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
+
load_dotenv()
|
| 19 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 20 |
+
assert OPENAI_API_KEY, "Set OPENAI_API_KEY in your .env"
|
| 21 |
+
|
| 22 |
+
with open("config.yaml") as f:
|
| 23 |
+
cfg = yaml.safe_load(f)
|
| 24 |
+
TAVILY_API_KEY = cfg.get("tavily_api_key")
|
| 25 |
+
assert TAVILY_API_KEY, "Put your Tavily key under 'tavily_api_key' in config.yaml"
|
| 26 |
+
|
| 27 |
+
# βββ 2) TOOLS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
def _safe_eval(expr: str) -> str:
|
| 29 |
+
ops = {
|
| 30 |
+
ast.Add: operator.add,
|
| 31 |
+
ast.Sub: operator.sub,
|
| 32 |
+
ast.Mult: operator.mul,
|
| 33 |
+
ast.Div: operator.truediv,
|
| 34 |
+
ast.Pow: operator.pow,
|
| 35 |
+
ast.USub: operator.neg,
|
| 36 |
+
}
|
| 37 |
+
def _eval(node):
|
| 38 |
+
if isinstance(node, ast.Constant):
|
| 39 |
+
return node.value
|
| 40 |
+
if isinstance(node, ast.BinOp):
|
| 41 |
+
return ops[type(node.op)](_eval(node.left), _eval(node.right))
|
| 42 |
+
if isinstance(node, ast.UnaryOp):
|
| 43 |
+
return ops[type(node.op)](_eval(node.operand))
|
| 44 |
+
raise TypeError(f"Unsupported AST node: {node!r}")
|
| 45 |
+
node = ast.parse(expr, mode="eval").body
|
| 46 |
+
return str(_eval(node))
|
| 47 |
+
|
| 48 |
+
def calculator_fn(expr: str) -> str:
|
| 49 |
+
try:
|
| 50 |
+
return _safe_eval(expr)
|
| 51 |
+
except Exception as e:
|
| 52 |
+
return f"Error: {e}"
|
| 53 |
+
|
| 54 |
+
calculator_tool = StructuredTool.from_function(
|
| 55 |
+
func=calculator_fn,
|
| 56 |
+
name="calculator",
|
| 57 |
+
description="Useful for evaluating simple math expressions like '2 + 2'. Takes a single input 'expr'.",
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
tavily = TavilyClient(api_key=TAVILY_API_KEY)
|
| 61 |
+
|
| 62 |
+
def search_fn(query: str) -> str:
|
| 63 |
+
try:
|
| 64 |
+
response = tavily.search(query)
|
| 65 |
+
results = response.get("results", [])
|
| 66 |
+
if not results:
|
| 67 |
+
return "No results found."
|
| 68 |
+
snippets = []
|
| 69 |
+
for r in results[:3]:
|
| 70 |
+
t = r.get("title") or r.get("snippet") or ""
|
| 71 |
+
snippets.append(t.strip())
|
| 72 |
+
return " | ".join(snippets)
|
| 73 |
+
except Exception as e:
|
| 74 |
+
return f"Search error: {e}"
|
| 75 |
+
|
| 76 |
+
search_tool = StructuredTool.from_function(
|
| 77 |
+
func=search_fn,
|
| 78 |
+
name="search",
|
| 79 |
+
description="Useful for general purpose web search queries.",
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
TOOLS = [calculator_tool, search_tool]
|
| 83 |
+
|
| 84 |
+
# βββ 3) AGENT + ANALYZER ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 85 |
+
chat = ChatOpenAI(
|
| 86 |
+
model="gpt-4o",
|
| 87 |
+
temperature=0.0,
|
| 88 |
+
openai_api_key=OPENAI_API_KEY,
|
| 89 |
+
)
|
| 90 |
+
chat_with_tools = chat.bind_tools(TOOLS)
|
| 91 |
+
|
| 92 |
+
react_agent = create_react_agent(chat_with_tools, TOOLS, max_steps=3, verbose=False)
|
| 93 |
+
|
| 94 |
+
analyzer = ChatOpenAI(
|
| 95 |
+
model="gpt-4o",
|
| 96 |
+
temperature=0.0,
|
| 97 |
+
openai_api_key=OPENAI_API_KEY,
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
def analyze_answer(text: str) -> str:
|
| 101 |
+
prompt = (
|
| 102 |
+
"Extract the final concise answer from the following text."
|
| 103 |
+
"Respond with only the core answer, no explanation."
|
| 104 |
+
f"{text}"
|
| 105 |
+
)
|
| 106 |
+
result = analyzer.invoke(prompt)
|
| 107 |
+
return result.content.strip()
|
| 108 |
+
|
| 109 |
+
# βββ 4) LANGGRAPH βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 110 |
+
class AgentState(TypedDict):
|
| 111 |
+
question: str
|
| 112 |
+
tool_output: str
|
| 113 |
+
final_answer: str
|
| 114 |
+
|
| 115 |
+
@traceable(name="AgentNode OpenAI")
|
| 116 |
+
def AgentNode(state: AgentState) -> AgentState:
|
| 117 |
+
q = state["question"].strip()
|
| 118 |
+
out = react_agent.invoke(q).strip()
|
| 119 |
+
state["tool_output"] = out
|
| 120 |
+
return state
|
| 121 |
+
|
| 122 |
+
@traceable(name="AnalyzerNode")
|
| 123 |
+
def AnalyzerNode(state: AgentState) -> AgentState:
|
| 124 |
+
clean = analyze_answer(state["tool_output"])
|
| 125 |
+
state["final_answer"] = clean
|
| 126 |
+
return state
|
| 127 |
+
|
| 128 |
+
builder = StateGraph(AgentState)
|
| 129 |
+
builder.set_entry_point("AgentNode")
|
| 130 |
+
builder.add_node("AgentNode", AgentNode)
|
| 131 |
+
builder.add_node("AnalyzerNode", AnalyzerNode)
|
| 132 |
+
builder.add_edge(START, "AgentNode")
|
| 133 |
+
builder.add_edge("AgentNode", "AnalyzerNode")
|
| 134 |
+
builder.add_edge("AnalyzerNode", END)
|
| 135 |
+
graph = builder.build()
|
| 136 |
+
|
| 137 |
+
# βββ 5) RUN TEST βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 138 |
+
@traceable(name="OpenAI LangGraph Final Test")
|
| 139 |
+
def run_tests():
|
| 140 |
+
questions = [
|
| 141 |
+
"How much is 2 + 2",
|
| 142 |
+
"What is the capital of France?",
|
| 143 |
+
"Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
|
| 144 |
+
]
|
| 145 |
+
for q in questions:
|
| 146 |
+
state = {"question": q, "tool_output": "", "final_answer": ""}
|
| 147 |
+
out = graph.invoke(state)
|
| 148 |
+
print(f"Q: {q}\nA: {out['final_answer']}\n")
|
| 149 |
+
|
| 150 |
+
if __name__ == "__main__":
|
| 151 |
+
run_tests()
|
qa_graph.py
DELETED
|
@@ -1,69 +0,0 @@
|
|
| 1 |
-
from typing import TypedDict
|
| 2 |
-
import re
|
| 3 |
-
from langgraph.graph import StateGraph, START, END
|
| 4 |
-
from tools.calculator_tool import calculator_tool
|
| 5 |
-
from tools.search_tool import search_tool
|
| 6 |
-
from transformers import pipeline
|
| 7 |
-
|
| 8 |
-
# Shape of the state
|
| 9 |
-
class QAState(TypedDict):
|
| 10 |
-
question: str # incoming question
|
| 11 |
-
answer: str # to store tool output or synthesized answer
|
| 12 |
-
|
| 13 |
-
# Use text2text-generation for llm model
|
| 14 |
-
synthesizer = pipeline(
|
| 15 |
-
"text2text-generation",
|
| 16 |
-
model="google/flan-t5-small",
|
| 17 |
-
device=-1, # CPU; change to 0 for GPU
|
| 18 |
-
max_new_tokens=100,
|
| 19 |
-
do_sample=True,
|
| 20 |
-
top_p=0.95,
|
| 21 |
-
temperature=0.7
|
| 22 |
-
)
|
| 23 |
-
|
| 24 |
-
# Tool agent: calculator for math, search for other
|
| 25 |
-
def QAAgent(state: QAState) -> QAState:
|
| 26 |
-
q = state["question"].strip()
|
| 27 |
-
if re.fullmatch(r"[0-9\s\+\-\*\/\.\(\)]+", q):
|
| 28 |
-
state["answer"] = calculator_tool.invoke(q)
|
| 29 |
-
else:
|
| 30 |
-
state["answer"] = search_tool.invoke(q)
|
| 31 |
-
return state
|
| 32 |
-
|
| 33 |
-
# Synthesis agent to generate final response
|
| 34 |
-
def SynthesisAgent(state: QAState) -> QAState:
|
| 35 |
-
question = state["question"]
|
| 36 |
-
tool_out = state["answer"]
|
| 37 |
-
prompt = (
|
| 38 |
-
f"Question: {question}\n"
|
| 39 |
-
f"Tool output: {tool_out}\n"
|
| 40 |
-
"Answer in a comma-separated list (no extra text):"
|
| 41 |
-
)
|
| 42 |
-
outputs = synthesizer(prompt)
|
| 43 |
-
completion = outputs[0]["generated_text"].strip()
|
| 44 |
-
state["answer"] = completion
|
| 45 |
-
return state
|
| 46 |
-
|
| 47 |
-
# Build the graph
|
| 48 |
-
builder = StateGraph(QAState)
|
| 49 |
-
builder.set_entry_point("QAAgent")
|
| 50 |
-
builder.add_node("QAAgent", QAAgent)
|
| 51 |
-
builder.add_node("SynthesisAgent", SynthesisAgent)
|
| 52 |
-
|
| 53 |
-
builder.add_edge(START, "QAAgent")
|
| 54 |
-
builder.add_edge("QAAgent", "SynthesisAgent")
|
| 55 |
-
builder.add_edge("SynthesisAgent", END)
|
| 56 |
-
|
| 57 |
-
graph = builder.compile()
|
| 58 |
-
|
| 59 |
-
# Local testing
|
| 60 |
-
if __name__ == "__main__":
|
| 61 |
-
# Math example
|
| 62 |
-
s1: QAState = {"question": "2 + 2", "answer": ""}
|
| 63 |
-
o1 = graph.invoke(s1)
|
| 64 |
-
print("Q:", s1["question"], "-> A:", o1["answer"])
|
| 65 |
-
|
| 66 |
-
# Search + synthesis example
|
| 67 |
-
s2: QAState = {"question": "What is the capital of France?", "answer": ""}
|
| 68 |
-
o2 = graph.invoke(s2)
|
| 69 |
-
print("Q:", s2["question"], "-> A:", o2["answer"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,14 +1,38 @@
|
|
| 1 |
-
gradio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
requests
|
| 3 |
-
langgraph
|
| 4 |
-
openai
|
| 5 |
-
tavily-python
|
| 6 |
-
langchain
|
| 7 |
pandas
|
| 8 |
python-dotenv
|
| 9 |
-
huggingface_hub
|
| 10 |
-
transformers
|
| 11 |
-
langchain-huggingface
|
| 12 |
IPython
|
| 13 |
numpy<2.0
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# gradio
|
| 2 |
+
# requests
|
| 3 |
+
# pandas
|
| 4 |
+
# python-dotenv
|
| 5 |
+
# IPython
|
| 6 |
+
# numpy<2.0
|
| 7 |
+
|
| 8 |
+
# huggingface_hub
|
| 9 |
+
# transformers
|
| 10 |
+
# langchain-huggingface
|
| 11 |
+
# langgraph
|
| 12 |
+
# langsmith
|
| 13 |
+
# langchain>=0.1.20
|
| 14 |
+
# langchain-community
|
| 15 |
+
# tavily-python
|
| 16 |
+
# huggingface_hub[hf_xet]
|
| 17 |
+
# pydantic
|
| 18 |
+
# PyYAML
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
gradio==5.30.0
|
| 22 |
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
pandas
|
| 24 |
python-dotenv
|
|
|
|
|
|
|
|
|
|
| 25 |
IPython
|
| 26 |
numpy<2.0
|
| 27 |
+
huggingface_hub
|
| 28 |
+
transformers==4.51.3
|
| 29 |
+
langchain-huggingface==0.2.0
|
| 30 |
+
langgraph==0.4.5
|
| 31 |
+
langsmith==0.3.42
|
| 32 |
+
langchain>=0.1.20,<0.4.0
|
| 33 |
+
langchain-community==0.3.24
|
| 34 |
+
tavily-python==0.7.2
|
| 35 |
+
pydantic>=2.0
|
| 36 |
+
PyYAML
|
| 37 |
+
hf-xet~=1.1.1
|
| 38 |
+
langchain-openai
|
test_gaia.py
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
from qa_graph import graph
|
| 2 |
-
import requests
|
| 3 |
-
import pandas as pd
|
| 4 |
-
|
| 5 |
-
QUESTIONS = requests.get("https://agents-course-unit4-scoring.hf.space/questions").json()
|
| 6 |
-
for q in QUESTIONS[:5]:
|
| 7 |
-
out = graph.invoke({"question": q["question"], "answer": ""})
|
| 8 |
-
print(q["task_id"], out["answer"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_gaia_questions.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# test_gaia_questions.py
|
| 2 |
+
|
| 3 |
+
import requests
|
| 4 |
+
from gaia_graph import graph
|
| 5 |
+
|
| 6 |
+
def test_with_real_gaia_questions():
|
| 7 |
+
# Fetch questions directly from the benchmark API
|
| 8 |
+
url = "https://agents-course-unit4-scoring.hf.space/questions"
|
| 9 |
+
response = requests.get(url)
|
| 10 |
+
questions = response.json()
|
| 11 |
+
|
| 12 |
+
for q in questions[:5]: # Limit to first 5 for testing
|
| 13 |
+
question = q["question"]
|
| 14 |
+
task_id = q["task_id"]
|
| 15 |
+
state = {"question": question, "answer": ""}
|
| 16 |
+
result = graph.invoke(state)
|
| 17 |
+
print(f"[{task_id}] Q: {question}")
|
| 18 |
+
print(f"β {result['answer']}")
|
| 19 |
+
print("-" * 60)
|
| 20 |
+
|
| 21 |
+
if __name__ == "__main__":
|
| 22 |
+
test_with_real_gaia_questions()
|
test_openai_agent.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# test_openai_agent.py
|
| 2 |
+
|
| 3 |
+
import os, json
|
| 4 |
+
from typing import Dict
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
from tavily import TavilyClient
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
# βββ 1) OpenAI client (v1) βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 13 |
+
assert OPENAI_API_KEY, "Set OPENAI_API_KEY in your .env"
|
| 14 |
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
| 15 |
+
|
| 16 |
+
# βββ 2) Tavily search client βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
| 18 |
+
assert TAVILY_API_KEY, "Set TAVILY_API_KEY in your .env"
|
| 19 |
+
tavily = TavilyClient(api_key=TAVILY_API_KEY)
|
| 20 |
+
|
| 21 |
+
# βββ 3) Tool implementations βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
+
def calculator(expr: str) -> str:
|
| 23 |
+
try:
|
| 24 |
+
return str(eval(expr, {}, {}))
|
| 25 |
+
except Exception as e:
|
| 26 |
+
return f"Error: {e}"
|
| 27 |
+
|
| 28 |
+
def search(query: str) -> str:
|
| 29 |
+
try:
|
| 30 |
+
resp = tavily.search(query=query, search_depth="basic")
|
| 31 |
+
results = resp.get("results", [])
|
| 32 |
+
if not results:
|
| 33 |
+
return "No results."
|
| 34 |
+
# join up to three titles/snippets
|
| 35 |
+
snippets = []
|
| 36 |
+
for r in results[:3]:
|
| 37 |
+
text = (r.get("title") or r.get("snippet") or "").strip()
|
| 38 |
+
if text:
|
| 39 |
+
snippets.append(text)
|
| 40 |
+
return " | ".join(snippets) or "No results."
|
| 41 |
+
except Exception as e:
|
| 42 |
+
return f"Search error: {e}"
|
| 43 |
+
|
| 44 |
+
# βββ 4) Functionβcalling schemas ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
+
functions = [
|
| 46 |
+
{
|
| 47 |
+
"name": "calculator",
|
| 48 |
+
"description": "Evaluate a math expression, return result as string",
|
| 49 |
+
"parameters": {
|
| 50 |
+
"type": "object",
|
| 51 |
+
"properties": {
|
| 52 |
+
"expr": {"type": "string", "description": "The expression to evaluate"}
|
| 53 |
+
},
|
| 54 |
+
"required": ["expr"],
|
| 55 |
+
},
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"name": "search",
|
| 59 |
+
"description": "Look up facts on the web via Tavily; return up to three summaries.",
|
| 60 |
+
"parameters": {
|
| 61 |
+
"type": "object",
|
| 62 |
+
"properties": {
|
| 63 |
+
"query": {"type": "string", "description": "The search query"}
|
| 64 |
+
},
|
| 65 |
+
"required": ["query"],
|
| 66 |
+
},
|
| 67 |
+
},
|
| 68 |
+
]
|
| 69 |
+
tool_map = {"calculator": calculator, "search": search}
|
| 70 |
+
|
| 71 |
+
# βββ 5) A single ReAct loop using freeβtier mini model ββββββββββββββββββββββββββββ
|
| 72 |
+
def run_react(question: str, max_steps: int = 3) -> str:
|
| 73 |
+
messages = [{"role": "user", "content": question}]
|
| 74 |
+
for _ in range(max_steps):
|
| 75 |
+
resp = client.chat.completions.create(
|
| 76 |
+
model="gpt-4o-mini", # freeβtier βminiβ model
|
| 77 |
+
messages=messages,
|
| 78 |
+
functions=functions,
|
| 79 |
+
function_call="auto",
|
| 80 |
+
)
|
| 81 |
+
msg = resp.choices[0].message
|
| 82 |
+
|
| 83 |
+
# if the model decided to call a tool:
|
| 84 |
+
if msg.function_call:
|
| 85 |
+
name = msg.function_call.name
|
| 86 |
+
args = json.loads(msg.function_call.arguments)
|
| 87 |
+
print(f"[Tool Call] {name}({args})")
|
| 88 |
+
out = tool_map[name](**args)
|
| 89 |
+
# append both the tool call and your toolβs response
|
| 90 |
+
messages.append({
|
| 91 |
+
"role": "assistant",
|
| 92 |
+
"content": None,
|
| 93 |
+
"function_call": msg.function_call.to_dict(),
|
| 94 |
+
})
|
| 95 |
+
messages.append({
|
| 96 |
+
"role": "function",
|
| 97 |
+
"name": name,
|
| 98 |
+
"content": out,
|
| 99 |
+
})
|
| 100 |
+
# loop continues so model can see the tool output
|
| 101 |
+
continue
|
| 102 |
+
|
| 103 |
+
# otherwise this is the final answer:
|
| 104 |
+
return msg.content.strip()
|
| 105 |
+
|
| 106 |
+
return "[no final answer after max steps]"
|
| 107 |
+
|
| 108 |
+
def analyzer_agent(question: str, raw_answer: str) -> str:
|
| 109 |
+
prompt = (
|
| 110 |
+
f"You are a data extractor.\n"
|
| 111 |
+
f"Q: {question.strip()}\n"
|
| 112 |
+
f"Answer: {raw_answer.strip()}\n"
|
| 113 |
+
f"Return only the short answer (e.g., a number, place, or code)."
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
resp = client.chat.completions.create(
|
| 117 |
+
model="gpt-4o-mini",
|
| 118 |
+
messages=[{"role": "user", "content": prompt}],
|
| 119 |
+
temperature=0,
|
| 120 |
+
)
|
| 121 |
+
return resp.choices[0].message.content.strip()
|
| 122 |
+
|
| 123 |
+
def run_cleaned_answer(question: str) -> str:
|
| 124 |
+
raw = run_react(question)
|
| 125 |
+
return analyzer_agent(question, raw)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
# βββ 6) Smokeβtest it ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 129 |
+
if __name__ == "__main__":
|
| 130 |
+
tests = [
|
| 131 |
+
"How much is 2 + 2",
|
| 132 |
+
"What is the capital of France?",
|
| 133 |
+
"Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
|
| 134 |
+
]
|
| 135 |
+
for q in tests:
|
| 136 |
+
print(f"\n>>> {q}")
|
| 137 |
+
try:
|
| 138 |
+
result = run_cleaned_answer(q)
|
| 139 |
+
except Exception as e:
|
| 140 |
+
result = f"[ERROR] {e}"
|
| 141 |
+
print(f"β {result}")
|
tools/search_tool.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
from tavily import TavilyClient
|
| 2 |
from langchain.tools import tool
|
| 3 |
-
|
| 4 |
-
import os
|
| 5 |
-
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
| 6 |
|
| 7 |
class SearchTool:
|
| 8 |
def __init__(self, api_key: str):
|
|
|
|
| 1 |
from tavily import TavilyClient
|
| 2 |
from langchain.tools import tool
|
| 3 |
+
from config import TAVILY_API_KEY
|
| 4 |
+
# import os
|
| 5 |
+
# TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
| 6 |
|
| 7 |
class SearchTool:
|
| 8 |
def __init__(self, api_key: str):
|