Final_Assignment_D3MI4N

Sleeping

App Files Files Community

D3MI4N commited on Jun 15, 2025

Commit

7bccf8e

1 Parent(s): 0c06f61

New version using OpenAI

Browse files

Files changed (10) hide show

app.py +35 -38
gaia_graph.py +120 -0
gaia_graph_legacy.py +188 -0
langgraph_openai.py +151 -0
qa_graph.py +0 -69
requirements.txt +33 -9
test_gaia.py +0 -8
test_gaia_questions.py +22 -0
test_openai_agent.py +141 -0
tools/search_tool.py +3 -3

app.py CHANGED Viewed

@@ -3,24 +3,24 @@ import gradio as gr
 import requests
 import pandas as pd
 import asyncio
-from qa_graph import graph
 from typing import Optional
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-user_answers_cache = {}  # Stores answers per session
 class GaiaAgent:
     def __init__(self):
-        print("Graph-based agent initialized.")
     def __call__(self, question: str) -> str:
-        print("Received question:", question)
         state = {"question": question, "answer": ""}
-        out = graph.invoke(state)
-        return out["answer"]
 async def run_agent(profile: gr.OAuthProfile | None):
     if not profile:
         return "Please login to Hugging Face.", None
@@ -28,27 +28,25 @@ async def run_agent(profile: gr.OAuthProfile | None):
     username = profile.username
     agent = GaiaAgent()
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
         questions_data = response.json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
-    async def process_question(item):
         task_id = item.get("task_id")
-        question_text = item.get("question")
         try:
-            answer = await asyncio.to_thread(agent, question_text)
-            return {"task_id": task_id, "question": question_text, "submitted_answer": answer}
         except Exception as e:
-            return {"task_id": task_id, "question": question_text, "submitted_answer": f"ERROR: {e}"}
-    results = await asyncio.gather(*(process_question(item) for item in questions_data))
     user_answers_cache[username] = results
     df = pd.DataFrame(results)
@@ -61,50 +59,49 @@ def submit_answers(profile: gr.OAuthProfile | None):
     username = profile.username.strip()
     if username not in user_answers_cache:
-        return "No cached answers found. Please run the agent first.", None
     answers_payload = [
         {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
         for item in user_answers_cache[username]
     ]
-    space_id = os.getenv("SPACE_ID")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
     submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
-    submit_url = f"{DEFAULT_API_URL}/submit"
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result = response.json()
         final_status = (
-            f"Submission Successful!\n"
-            f"User: {result.get('username')}\n"
-            f"Overall Score: {result.get('score', 'N/A')}% "
             f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
-            f"Message: {result.get('message', 'No message received.')}"
         )
         df = pd.DataFrame(user_answers_cache[username])
         return final_status, df
     except Exception as e:
-        return f"Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.LoginButton()
-    run_button = gr.Button("Run Agent on Questions")
-    submit_button = gr.Button("Submit Cached Answers")
-    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Results", wrap=True)
-    run_button.click(run_agent, outputs=[status_output, results_table])
-    submit_button.click(submit_answers, outputs=[status_output, results_table])
 if __name__ == "__main__":
-    print("Launching app...")
     demo.launch(debug=True, share=False)

 import requests
 import pandas as pd
 import asyncio
+from gaia_graph import graph  # Use your agent
 from typing import Optional
+# Constants
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+user_answers_cache = {}  # session-based cache
 class GaiaAgent:
     def __init__(self):
+        print("GaiaAgent initialized.")
     def __call__(self, question: str) -> str:
+        print(f"Running agent on: {question}")
         state = {"question": question, "answer": ""}
+        result = graph["invoke"](state)
+        return result["answer"]
+# Async runner
 async def run_agent(profile: gr.OAuthProfile | None):
     if not profile:
         return "Please login to Hugging Face.", None
     username = profile.username
     agent = GaiaAgent()
+    # 1. Load questions
     try:
+        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10)
         response.raise_for_status()
         questions_data = response.json()
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    # 2. Process questions
+    async def process(item):
         task_id = item.get("task_id")
+        question = item.get("question")
         try:
+            answer = await asyncio.to_thread(agent, question)
+            return {"task_id": task_id, "question": question, "submitted_answer": answer}
         except Exception as e:
+            return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"}
+    results = await asyncio.gather(*(process(item) for item in questions_data))
     user_answers_cache[username] = results
     df = pd.DataFrame(results)
     username = profile.username.strip()
     if username not in user_answers_cache:
+        return "No cached answers. Please run the agent first.", None
     answers_payload = [
         {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
         for item in user_answers_cache[username]
     ]
+    space_id = os.getenv("SPACE_ID", "")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
     submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
+    # 3. Submit to scoring API
     try:
+        response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
         response.raise_for_status()
         result = response.json()
         final_status = (
+            f"✅ Submission Successful!\n"
+            f"👤 User: {result.get('username')}\n"
+            f"🎯 Score: {result.get('score', 'N/A')}% "
             f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
+            f"📩 Message: {result.get('message', 'No message received.')}"
         )
         df = pd.DataFrame(user_answers_cache[username])
         return final_status, df
     except Exception as e:
+        return f"❌ Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
+# ────────── Gradio UI ──────────
 with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 GAIA Agent Evaluation")
     gr.LoginButton()
+    run_button = gr.Button("▶️ Run Agent on GAIA Questions")
+    submit_button = gr.Button("📤 Submit Cached Answers")
+    status = gr.Textbox(label="Status", lines=6, interactive=False)
+    results = gr.DataFrame(label="Answers", wrap=True)
+    run_button.click(run_agent, outputs=[status, results])
+    submit_button.click(submit_answers, outputs=[status, results])
 if __name__ == "__main__":
+    print("Launching Gradio app...")
     demo.launch(debug=True, share=False)

gaia_graph.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# gaia_graph.py
+import os
+import ast
+import operator
+import yaml
+from typing import TypedDict
+from dotenv import load_dotenv
+from langchain.tools import Tool
+from langchain.agents import initialize_agent, AgentType
+from langchain_openai import ChatOpenAI
+from langgraph.graph import StateGraph, END
+# ─── Load Environment Variables ──────────────────────────────────────────────
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+assert OPENAI_API_KEY, "OPENAI_API_KEY is not set"
+# ─── Define Calculator Tool ──────────────────────────────────────────────────
+def safe_eval(expr: str) -> str:
+    ops = {
+        ast.Add: operator.add,
+        ast.Sub: operator.sub,
+        ast.Mult: operator.mul,
+        ast.Div: operator.truediv,
+        ast.Pow: operator.pow,
+        ast.USub: operator.neg,
+    }
+    def _eval(node):
+        if isinstance(node, ast.Constant):
+            return node.value
+        if isinstance(node, ast.BinOp):
+            return ops[type(node.op)](_eval(node.left), _eval(node.right))
+        if isinstance(node, ast.UnaryOp):
+            return ops[type(node.op)](_eval(node.operand))
+        raise TypeError(f"Unsupported AST node: {node!r}")
+    try:
+        node = ast.parse(expr, mode="eval").body
+        return str(_eval(node))
+    except Exception as e:
+        return f"Error: {e}"
+calculator_tool = Tool(
+    name="calculator",
+    func=safe_eval,
+    description="Evaluate basic math expressions. Input: a math string like '2 + 2'. Output: the result.",
+)
+# ─── Define Search Tool using Tavily ─────────────────────────────────────────
+from tavily import TavilyClient
+with open("config.yaml") as f:
+    cfg = yaml.safe_load(f)
+TAVILY_API_KEY = cfg.get("tavily_api_key")
+assert TAVILY_API_KEY, "TAVILY API key is missing in config.yaml"
+tavily = TavilyClient(api_key=TAVILY_API_KEY)
+def search_tool_fn(query: str) -> str:
+    try:
+        resp = tavily.search(query)
+        results = resp.get("results", [])
+        if not results:
+            return "No results found."
+        return results[0].get("title") or results[0].get("snippet") or "No snippet."
+    except Exception as e:
+        return f"Search error: {e}"
+search_tool = Tool(
+    name="search",
+    func=search_tool_fn,
+    description="Useful for answering factual questions using a search engine.",
+)
+# ─── Create LLM Agent ────────────────────────────────────────────────────────
+llm = ChatOpenAI(
+    temperature=0.0,
+    model="gpt-4o",
+    openai_api_key=OPENAI_API_KEY
+)
+agent_executor = initialize_agent(
+    tools=[calculator_tool, search_tool],
+    llm=llm,
+    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+    verbose=False,
+    handle_parsing_errors=True,
+)
+# ─── Clean Output ────────────────────────────────────────────────────────────
+def clean_answer(ans: str) -> str:
+    if "```" in ans:
+        ans = ans.split("```")[-1]
+    if "Answer:" in ans:
+        return ans.split("Answer:")[-1].strip()
+    if "→" in ans:
+        return ans.split("→")[-1].strip()
+    return ans.strip()
+# ─── Define State ────────────────────────────────────────────────────────────
+class GaiaState(TypedDict):
+    question: str
+    answer: str
+# ─── Define Node Function ────────────────────────────────────────────────────
+def agent_node(state: GaiaState) -> GaiaState:
+    raw = agent_executor.run(state["question"])
+    return {"question": state["question"], "answer": clean_answer(raw)}
+# ─── Build LangGraph ─────────────────────────────────────────────────────────
+builder = StateGraph(GaiaState)
+builder.add_node("agent", agent_node)
+builder.set_entry_point("agent")
+builder.set_finish_point("agent")
+graph = builder.compile()

gaia_graph_legacy.py ADDED Viewed

	@@ -0,0 +1,188 @@

+# gaia_graph.py
+import os
+import re
+import yaml
+from typing import TypedDict
+from dotenv import load_dotenv
+from transformers import pipeline
+from langchain_huggingface import HuggingFacePipeline
+from langchain_core.tools.structured import StructuredTool
+from langgraph.graph import StateGraph, START, END
+from langgraph.prebuilt.chat_agent_executor import create_react_agent
+#
+# ─── 1) LOAD ENVIRONMENT VARIABLES ──────────────────────────────────────────────
+#
+# Make sure you have a valid HF token in your shell or .env:
+#     export HUGGINGFACE_API_TOKEN="<your token>"
+load_dotenv()
+HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
+assert HF_TOKEN, "Please set HUGGINGFACE_API_TOKEN in your environment or .env."
+#
+# ─── 2) LOAD config.yaml ─────────────────────────────────────────────────────────
+#
+# Expect config.yaml with:
+#   tavily_api_key: "<your Tavily key>"
+#   huggingface_api_token: "<your HF token>"  (optional duplication)
+with open("config.yaml", "r") as f:
+    cfg = yaml.safe_load(f)
+TAVILY_API_KEY = cfg.get("tavily_api_key")
+assert TAVILY_API_KEY, "Put your Tavily key under 'tavily_api_key' in config.yaml."
+#
+# ─── 3) DEFINE “TOOL” WRAPPERS ────────────────────────────────────────────────────
+#
+# 3a) Calculator (a “safe eval” of simple expressions)
+def _safe_eval(expr: str) -> str:
+    import ast, operator
+    ops = {
+        ast.Add: operator.add,
+        ast.Sub: operator.sub,
+        ast.Mult: operator.mul,
+        ast.Div: operator.truediv,
+        ast.Pow: operator.pow,
+        ast.USub: operator.neg,
+    }
+    def _eval(node):
+        if isinstance(node, ast.Constant):
+            return node.n
+        elif isinstance(node, ast.BinOp):
+            return ops[type(node.op)](_eval(node.left), _eval(node.right))
+        elif isinstance(node, ast.UnaryOp):
+            return ops[type(node.op)](_eval(node.operand))
+        else:
+            raise TypeError(f"Unsupported AST node: {node}")
+    node = ast.parse(expr, mode="eval").body
+    return str(_eval(node))
+def _calculator_tool(text: str) -> str:
+    try:
+        return _safe_eval(text)
+    except Exception as e:
+        return f"Error evaluating expression: {e}"
+calculator_tool = StructuredTool.from_function(
+    func=_calculator_tool,
+    name="calculator",
+    description="Evaluate simple arithmetic expressions; return the numeric result as a string.",
+)
+# 3b) Tavily‐based search
+from tavily import TavilyClient
+class _TavilySearch:
+    def __init__(self, api_key: str):
+        self.client = TavilyClient(api_key=api_key)
+    def __call__(self, query: str) -> str:
+        resp = self.client.search(query)
+        results = resp.get("results", [])
+        if not results:
+            return "No results found."
+        snippets = []
+        for r in results[:3]:
+            title = r.get("title")
+            snippet = r.get("snippet")
+            if title:
+                snippets.append(title)
+            elif snippet:
+                snippets.append(snippet)
+        return " | ".join(snippets)
+_tavily_search = _TavilySearch(api_key=TAVILY_API_KEY)
+# Note: pass the instance’s __call__, not the instance itself.
+search_tool = StructuredTool.from_function(
+    func=_tavily_search.__call__,
+    name="search",
+    description="Look up facts via Tavily; return up to three summaries joined by ' | '.",
+)
+TOOLS = [calculator_tool, search_tool]
+#
+# ─── 4) PRELOAD A FREE HF MODEL & WRAP IT AS HuggingFacePipeline ───────────────────
+#
+# We choose “google/flan-t5-small” (free, CPU‐friendly). Load as a text2text pipeline:
+hf_gen = pipeline(
+    "text2text-generation",
+    model="google/flan-t5-small",
+    device=-1,          # CPU only
+    max_new_tokens=128,
+    do_sample=False,    # greedy
+)
+# Now wrap that pipeline into a HuggingFacePipeline LLM.
+# (No API token needed here for a local “google/flan-t5-small”)
+llm = HuggingFacePipeline(pipeline=hf_gen)
+#
+# ─── 5) CREATE A LANGGRAPH ReAct AGENT ─────────────────────────────────────────────
+#
+# This `create_react_agent` will add the Thought/Action/Observation framing
+# so that the LLM can call “calculator” or “search” as needed,
+# and then eventually emit “Final Answer: …”.
+#
+react_agent = create_react_agent(
+    llm=llm,
+    tools=TOOLS,
+    max_iterations=3,
+    verbose=False,
+)
+#
+# ─── 6) DEFINE STATE SCHEMA & SINGLE GRAPH NODE ─────────────────────────────────
+#
+class AgentState(TypedDict):
+    question:     str
+    tool_output:  str   # (ignored by ReAct, but must exist)
+    final_answer: str
+def AgentNode(state: AgentState) -> AgentState:
+    q = state["question"].strip()
+    # Invoke the internal ReAct loop:
+    answer = react_agent.invoke(q).strip()
+    state["final_answer"] = answer
+    return state
+#
+# ─── 7) WIRE UP THE LANGGRAPH ─────────────────────────────────────────────────────
+#
+builder = StateGraph(AgentState)
+builder.set_entry_point("AgentNode")
+builder.add_node("AgentNode", AgentNode)
+builder.add_edge(START, "AgentNode")
+builder.add_edge("AgentNode", END)
+graph = builder.compile()
+#
+# ─── 8) SMOKE TESTS ───────────────────────────────────────────────────────────────
+#
+if __name__ == "__main__":
+    print("Device set to use CPU\n")
+    tests = [
+        "How much is 2 + 2",
+        "What is the capital of France?",
+        "Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
+    ]
+    for q in tests:
+        state = {"question": q, "tool_output": "", "final_answer": ""}
+        out = graph.invoke(state)
+        print(f"Q: {q!r}\n→ A: {out['final_answer']!r}\n")

langgraph_openai.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# langgraph_openai.py
+import os
+import ast
+import operator
+import yaml
+from typing import TypedDict
+from dotenv import load_dotenv
+from langchain_core.tools.structured import StructuredTool
+from langchain_community.chat_models import ChatOpenAI
+from langgraph.prebuilt.chat_agent_executor import create_react_agent
+from langgraph.graph import StateGraph, START, END
+from langsmith import traceable
+from tavily import TavilyClient
+# ─── 1) LOAD KEYS ───────────────────────────────────────────────────────────────
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+assert OPENAI_API_KEY, "Set OPENAI_API_KEY in your .env"
+with open("config.yaml") as f:
+    cfg = yaml.safe_load(f)
+TAVILY_API_KEY = cfg.get("tavily_api_key")
+assert TAVILY_API_KEY, "Put your Tavily key under 'tavily_api_key' in config.yaml"
+# ─── 2) TOOLS ───────────────────────────────────────────────────────────────────
+def _safe_eval(expr: str) -> str:
+    ops = {
+        ast.Add: operator.add,
+        ast.Sub: operator.sub,
+        ast.Mult: operator.mul,
+        ast.Div: operator.truediv,
+        ast.Pow: operator.pow,
+        ast.USub: operator.neg,
+    }
+    def _eval(node):
+        if isinstance(node, ast.Constant):
+            return node.value
+        if isinstance(node, ast.BinOp):
+            return ops[type(node.op)](_eval(node.left), _eval(node.right))
+        if isinstance(node, ast.UnaryOp):
+            return ops[type(node.op)](_eval(node.operand))
+        raise TypeError(f"Unsupported AST node: {node!r}")
+    node = ast.parse(expr, mode="eval").body
+    return str(_eval(node))
+def calculator_fn(expr: str) -> str:
+    try:
+        return _safe_eval(expr)
+    except Exception as e:
+        return f"Error: {e}"
+calculator_tool = StructuredTool.from_function(
+    func=calculator_fn,
+    name="calculator",
+    description="Useful for evaluating simple math expressions like '2 + 2'. Takes a single input 'expr'.",
+)
+tavily = TavilyClient(api_key=TAVILY_API_KEY)
+def search_fn(query: str) -> str:
+    try:
+        response = tavily.search(query)
+        results = response.get("results", [])
+        if not results:
+            return "No results found."
+        snippets = []
+        for r in results[:3]:
+            t = r.get("title") or r.get("snippet") or ""
+            snippets.append(t.strip())
+        return " | ".join(snippets)
+    except Exception as e:
+        return f"Search error: {e}"
+search_tool = StructuredTool.from_function(
+    func=search_fn,
+    name="search",
+    description="Useful for general purpose web search queries.",
+)
+TOOLS = [calculator_tool, search_tool]
+# ─── 3) AGENT + ANALYZER ────────────────────────────────────────────────────────
+chat = ChatOpenAI(
+    model="gpt-4o",
+    temperature=0.0,
+    openai_api_key=OPENAI_API_KEY,
+)
+chat_with_tools = chat.bind_tools(TOOLS)
+react_agent = create_react_agent(chat_with_tools, TOOLS, max_steps=3, verbose=False)
+analyzer = ChatOpenAI(
+    model="gpt-4o",
+    temperature=0.0,
+    openai_api_key=OPENAI_API_KEY,
+)
+def analyze_answer(text: str) -> str:
+    prompt = (
+        "Extract the final concise answer from the following text."
+        "Respond with only the core answer, no explanation."
+        f"{text}"
+    )
+    result = analyzer.invoke(prompt)
+    return result.content.strip()
+# ─── 4) LANGGRAPH ───────────────────────────────────────────────────────────────
+class AgentState(TypedDict):
+    question: str
+    tool_output: str
+    final_answer: str
+@traceable(name="AgentNode OpenAI")
+def AgentNode(state: AgentState) -> AgentState:
+    q = state["question"].strip()
+    out = react_agent.invoke(q).strip()
+    state["tool_output"] = out
+    return state
+@traceable(name="AnalyzerNode")
+def AnalyzerNode(state: AgentState) -> AgentState:
+    clean = analyze_answer(state["tool_output"])
+    state["final_answer"] = clean
+    return state
+builder = StateGraph(AgentState)
+builder.set_entry_point("AgentNode")
+builder.add_node("AgentNode", AgentNode)
+builder.add_node("AnalyzerNode", AnalyzerNode)
+builder.add_edge(START, "AgentNode")
+builder.add_edge("AgentNode", "AnalyzerNode")
+builder.add_edge("AnalyzerNode", END)
+graph = builder.build()
+# ─── 5) RUN TEST ───────────────────────────────────────────────────────────────
+@traceable(name="OpenAI LangGraph Final Test")
+def run_tests():
+    questions = [
+        "How much is 2 + 2",
+        "What is the capital of France?",
+        "Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
+    ]
+    for q in questions:
+        state = {"question": q, "tool_output": "", "final_answer": ""}
+        out = graph.invoke(state)
+        print(f"Q: {q}\nA: {out['final_answer']}\n")
+if __name__ == "__main__":
+    run_tests()

qa_graph.py DELETED Viewed

@@ -1,69 +0,0 @@
-from typing import TypedDict
-import re
-from langgraph.graph import StateGraph, START, END
-from tools.calculator_tool import calculator_tool
-from tools.search_tool import search_tool
-from transformers import pipeline
-# Shape of the state
-class QAState(TypedDict):
-    question: str   # incoming question
-    answer: str     # to store tool output or synthesized answer
-# Use text2text-generation for llm model
-synthesizer = pipeline(
-    "text2text-generation",
-    model="google/flan-t5-small",
-    device=-1,               # CPU; change to 0 for GPU
-    max_new_tokens=100,
-    do_sample=True,
-    top_p=0.95,
-    temperature=0.7
-)
-# Tool agent: calculator for math, search for other
-def QAAgent(state: QAState) -> QAState:
-    q = state["question"].strip()
-    if re.fullmatch(r"[0-9\s\+\-\*\/\.\(\)]+", q):
-        state["answer"] = calculator_tool.invoke(q)
-    else:
-        state["answer"] = search_tool.invoke(q)
-    return state
-# Synthesis agent to generate final response
-def SynthesisAgent(state: QAState) -> QAState:
-    question = state["question"]
-    tool_out = state["answer"]
-    prompt = (
-        f"Question: {question}\n"
-        f"Tool output: {tool_out}\n"
-        "Answer in a comma-separated list (no extra text):"
-    )
-    outputs = synthesizer(prompt)
-    completion = outputs[0]["generated_text"].strip()
-    state["answer"] = completion
-    return state
-# Build the graph
-builder = StateGraph(QAState)
-builder.set_entry_point("QAAgent")
-builder.add_node("QAAgent", QAAgent)
-builder.add_node("SynthesisAgent", SynthesisAgent)
-builder.add_edge(START, "QAAgent")
-builder.add_edge("QAAgent", "SynthesisAgent")
-builder.add_edge("SynthesisAgent", END)
-graph = builder.compile()
-# Local testing
-if __name__ == "__main__":
-    # Math example
-    s1: QAState = {"question": "2 + 2", "answer": ""}
-    o1 = graph.invoke(s1)
-    print("Q:", s1["question"], "-> A:", o1["answer"])
-    # Search + synthesis example
-    s2: QAState = {"question": "What is the capital of France?", "answer": ""}
-    o2 = graph.invoke(s2)
-    print("Q:", s2["question"], "-> A:", o2["answer"])

requirements.txt CHANGED Viewed

@@ -1,14 +1,38 @@
-gradio
 requests
-langgraph
-openai
-tavily-python
-langchain
 pandas
 python-dotenv
-huggingface_hub
-transformers
-langchain-huggingface
 IPython
 numpy<2.0

+# gradio
+# requests
+# pandas
+# python-dotenv
+# IPython
+# numpy<2.0
+# huggingface_hub
+# transformers
+# langchain-huggingface
+# langgraph
+# langsmith
+# langchain>=0.1.20
+# langchain-community
+# tavily-python
+# huggingface_hub[hf_xet]
+# pydantic
+# PyYAML
+gradio==5.30.0
 requests
 pandas
 python-dotenv
 IPython
 numpy<2.0
+huggingface_hub
+transformers==4.51.3
+langchain-huggingface==0.2.0
+langgraph==0.4.5
+langsmith==0.3.42
+langchain>=0.1.20,<0.4.0
+langchain-community==0.3.24
+tavily-python==0.7.2
+pydantic>=2.0
+PyYAML
+hf-xet~=1.1.1
+langchain-openai

test_gaia.py DELETED Viewed

@@ -1,8 +0,0 @@
-from qa_graph import graph
-import requests
-import pandas as pd
-QUESTIONS = requests.get("https://agents-course-unit4-scoring.hf.space/questions").json()
-for q in QUESTIONS[:5]:
-    out = graph.invoke({"question": q["question"], "answer": ""})
-    print(q["task_id"], out["answer"])

test_gaia_questions.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# test_gaia_questions.py
+import requests
+from gaia_graph import graph
+def test_with_real_gaia_questions():
+    # Fetch questions directly from the benchmark API
+    url = "https://agents-course-unit4-scoring.hf.space/questions"
+    response = requests.get(url)
+    questions = response.json()
+    for q in questions[:5]:  # Limit to first 5 for testing
+        question = q["question"]
+        task_id = q["task_id"]
+        state = {"question": question, "answer": ""}
+        result = graph.invoke(state)
+        print(f"[{task_id}] Q: {question}")
+        print(f"→ {result['answer']}")
+        print("-" * 60)
+if __name__ == "__main__":
+    test_with_real_gaia_questions()

test_openai_agent.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# test_openai_agent.py
+import os, json
+from typing import Dict
+from dotenv import load_dotenv
+from openai import OpenAI
+from tavily import TavilyClient
+load_dotenv()
+# ─── 1) OpenAI client (v1) ─────────────────────────────────────────────────────────
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+assert OPENAI_API_KEY, "Set OPENAI_API_KEY in your .env"
+client = OpenAI(api_key=OPENAI_API_KEY)
+# ─── 2) Tavily search client ───────────────────────────────────────────────────────
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
+assert TAVILY_API_KEY, "Set TAVILY_API_KEY in your .env"
+tavily = TavilyClient(api_key=TAVILY_API_KEY)
+# ─── 3) Tool implementations ───────────────────────────────────────────────────────
+def calculator(expr: str) -> str:
+    try:
+        return str(eval(expr, {}, {}))
+    except Exception as e:
+        return f"Error: {e}"
+def search(query: str) -> str:
+    try:
+        resp = tavily.search(query=query, search_depth="basic")
+        results = resp.get("results", [])
+        if not results:
+            return "No results."
+        # join up to three titles/snippets
+        snippets = []
+        for r in results[:3]:
+            text = (r.get("title") or r.get("snippet") or "").strip()
+            if text:
+                snippets.append(text)
+        return " | ".join(snippets) or "No results."
+    except Exception as e:
+        return f"Search error: {e}"
+# ─── 4) Function‐calling schemas ──────────────────────────────────────────────────
+functions = [
+    {
+        "name": "calculator",
+        "description": "Evaluate a math expression, return result as string",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "expr": {"type": "string", "description": "The expression to evaluate"}
+            },
+            "required": ["expr"],
+        },
+    },
+    {
+        "name": "search",
+        "description": "Look up facts on the web via Tavily; return up to three summaries.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {"type": "string", "description": "The search query"}
+            },
+            "required": ["query"],
+        },
+    },
+]
+tool_map = {"calculator": calculator, "search": search}
+# ─── 5) A single ReAct loop using free‑tier mini model ────────────────────────────
+def run_react(question: str, max_steps: int = 3) -> str:
+    messages = [{"role": "user", "content": question}]
+    for _ in range(max_steps):
+        resp = client.chat.completions.create(
+            model="gpt-4o-mini",       # free‑tier “mini” model
+            messages=messages,
+            functions=functions,
+            function_call="auto",
+        )
+        msg = resp.choices[0].message
+        # if the model decided to call a tool:
+        if msg.function_call:
+            name = msg.function_call.name
+            args = json.loads(msg.function_call.arguments)
+            print(f"[Tool Call] {name}({args})")
+            out = tool_map[name](**args)
+            # append both the tool call and your tool’s response
+            messages.append({
+                "role": "assistant",
+                "content": None,
+                "function_call": msg.function_call.to_dict(),
+            })
+            messages.append({
+                "role": "function",
+                "name": name,
+                "content": out,
+            })
+            # loop continues so model can see the tool output
+            continue
+        # otherwise this is the final answer:
+        return msg.content.strip()
+    return "[no final answer after max steps]"
+def analyzer_agent(question: str, raw_answer: str) -> str:
+    prompt = (
+        f"You are a data extractor.\n"
+        f"Q: {question.strip()}\n"
+        f"Answer: {raw_answer.strip()}\n"
+        f"Return only the short answer (e.g., a number, place, or code)."
+    )
+    resp = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": prompt}],
+        temperature=0,
+    )
+    return resp.choices[0].message.content.strip()
+def run_cleaned_answer(question: str) -> str:
+    raw = run_react(question)
+    return analyzer_agent(question, raw)
+# ─── 6) Smoke‑test it ──────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    tests = [
+        "How much is 2 + 2",
+        "What is the capital of France?",
+        "Which country had the fewest athletes at the 1928 Olympics? Give the IOC code."
+    ]
+    for q in tests:
+        print(f"\n>>> {q}")
+        try:
+            result = run_cleaned_answer(q)
+        except Exception as e:
+            result = f"[ERROR] {e}"
+        print(f"→ {result}")

tools/search_tool.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from tavily import TavilyClient
 from langchain.tools import tool
-# from config import TAVILY_API_KEY
-import os
-TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 class SearchTool:
     def __init__(self, api_key: str):

 from tavily import TavilyClient
 from langchain.tools import tool
+from config import TAVILY_API_KEY
+# import os
+# TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 class SearchTool:
     def __init__(self, api_key: str):