Final_Assignment_D3MI4N

Sleeping

App Files Files Community

D3MI4N commited on Jun 22, 2025

Commit

0ee4998

1 Parent(s): 94958b6

try function calling version

Browse files

Files changed (4) hide show

app.py +140 -89
app_prior.py +116 -0
test_gaia_questions.py +1 -1
test_openai_agent.py +1 -0

app.py CHANGED Viewed

@@ -1,116 +1,167 @@
 import os
-import gradio as gr
 import requests
 import pandas as pd
 import asyncio
-from gaia_graph import graph  # Use your agent
-from typing import Optional
-# Constants
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-user_answers_cache = {}  # session-based cache
-class GaiaAgent:
-    def __init__(self):
-        print("Graph-based agent initialized.")
-    def __call__(self, question: str) -> str:
-        print("Received question:", question)
-        state = {"question": question, "answer": ""}
-        try:
-            result = graph.invoke(state)
-            print("Result type:", type(result))
-            print("Result value:", result)
-            if isinstance(result, dict):
-                return result.get("answer", "No answer generated.")
-            else:
-                return f"Unexpected output from graph: {result}"
-        except Exception as e:
-            return f"ERROR invoking graph: {e}"
-# Async runner
-async def run_agent(profile: gr.OAuthProfile | None):
-    if not profile:
-        return "Please login to Hugging Face.", None
-    username = profile.username
-    agent = GaiaAgent()
-    # 1. Load questions
     try:
-        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10)
-        response.raise_for_status()
-        questions_data = response.json()
     except Exception as e:
-        return f"Error fetching questions: {e}", None
-    # 2. Process questions
-    async def process(item):
-        task_id = item.get("task_id")
-        question = item.get("question")
-        try:
-            answer = await asyncio.to_thread(agent, question)
-            return {"task_id": task_id, "question": question, "submitted_answer": answer}
-        except Exception as e:
-            return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"}
-    results = await asyncio.gather(*(process(item) for item in questions_data))
-    user_answers_cache[username] = results
-    df = pd.DataFrame(results)
-    return f"Answered {len(results)} questions. Ready to submit.", df
 def submit_answers(profile: gr.OAuthProfile | None):
     if not profile:
-        return "Please login to Hugging Face.", None
-    username = profile.username.strip()
-    if username not in user_answers_cache:
-        return "No cached answers. Please run the agent first.", None
-    answers_payload = [
-        {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
-        for item in user_answers_cache[username]
     ]
     space_id = os.getenv("SPACE_ID", "")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
-    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
-    # 3. Submit to scoring API
-    try:
-        response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
-        response.raise_for_status()
-        result = response.json()
-        final_status = (
-            f"✅ Submission Successful!\n"
-            f"👤 User: {result.get('username')}\n"
-            f"🎯 Score: {result.get('score', 'N/A')}% "
-            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
-            f"📩 Message: {result.get('message', 'No message received.')}"
-        )
-        df = pd.DataFrame(user_answers_cache[username])
-        return final_status, df
-    except Exception as e:
-        return f"❌ Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
-# ────────── Gradio UI ──────────
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 GAIA Agent Evaluation")
     gr.LoginButton()
-    run_button = gr.Button("▶️ Run Agent on GAIA Questions")
-    submit_button = gr.Button("📤 Submit Cached Answers")
-    status = gr.Textbox(label="Status", lines=6, interactive=False)
-    results = gr.DataFrame(label="Answers", wrap=True)
-    run_button.click(run_agent, outputs=[status, results])
-    submit_button.click(submit_answers, outputs=[status, results])
 if __name__ == "__main__":
-    print("Launching Gradio app...")
     demo.launch(debug=True, share=False)

+# app.py
 import os
+import json
 import requests
 import pandas as pd
 import asyncio
+import gradio as gr
+from openai import OpenAI
+from tavily import TavilyClient
+from dotenv import load_dotenv
+load_dotenv()
+# ─── 1) OpenAI client (v1 SDK) ───────────────────────────────────────────────────
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+assert OPENAI_API_KEY, "Set OPENAI_API_KEY in .env"
+openai_client = OpenAI(api_key=OPENAI_API_KEY)
+# ─── 2) Tavily search client ─────────────────────────────────────────────────────
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
+assert TAVILY_API_KEY, "Set TAVILY_API_KEY in .env"
+tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
+# ─── 3) Define our tools & JSON schemas ──────────────────────────────────────────
+def calculator(expr: str) -> str:
     try:
+        # safe eval
+        return str(eval(expr, {}, {}))
     except Exception as e:
+        return f"Error: {e}"
+def search(query: str) -> str:
+    try:
+        resp = tavily_client.search(query=query, search_depth="basic")
+        results = resp.get("results", [])
+        if not results:
+            return "No results found."
+        # grab up to 3 titles/snippets
+        snippets = []
+        for r in results[:3]:
+            snippets.append(r.get("title") or r.get("snippet") or "")
+        return " | ".join(snippets)
+    except Exception as e:
+        return f"Search error: {e}"
+functions = [
+    {
+        "name": "calculator",
+        "description": "Evaluate a math expression. Returns the result as a string.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "expr": {"type": "string", "description": "Math expression to evaluate"}
+            },
+            "required": ["expr"],
+        },
+    },
+    {
+        "name": "search",
+        "description": "Look up facts on the web via Tavily; return up to three summaries separated by ' | '.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {"type": "string", "description": "The search query"}
+            },
+            "required": ["query"],
+        },
+    },
+]
+tool_map = {"calculator": calculator, "search": search}
+# ─── 4) The ReAct loop ───────────────────────────────────────────────────────────
+def run_react(question: str) -> str:
+    messages = [{"role": "user", "content": question}]
+    while True:
+        resp = openai_client.chat.completions.create(
+            model="gpt-4o-mini",      # free-tier “mini” model
+            messages=messages,
+            functions=functions,
+            function_call="auto",
+        )
+        msg = resp.choices[0].message
+        # if the model wants to call a tool:
+        if msg.function_call:
+            name = msg.function_call.name
+            args = json.loads(msg.function_call.arguments)
+            output = tool_map[name](**args)
+            # feed both the assistant's call and the tool's result back into the loop
+            messages.append({
+                "role": "assistant",
+                "content": None,
+                "function_call": msg.function_call.to_dict()
+            })
+            messages.append({
+                "role": "function",
+                "name": name,
+                "content": output
+            })
+        else:
+            # final answer
+            return msg.content.strip()
+# ─── 5) Gradio / GAIA integration ────────────────────────────────────────────────
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+_cache = {}
+class GaiaAgent:
+    def __call__(self, question: str) -> str:
+        return run_react(question)
+async def run_agent(profile: gr.OAuthProfile | None):
+    if not profile:
+        return "Please login.", None
+    user = profile.username
+    resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
+    data = resp.json()
+    agent = GaiaAgent()
+    async def proc(item):
+        ans = await asyncio.to_thread(agent, item["question"])
+        return {
+            "task_id": item["task_id"],
+            "question": item["question"],
+            "submitted_answer": ans
+        }
+    results = await asyncio.gather(*(proc(it) for it in data))
+    _cache[user] = results
+    return f"Answered {len(results)} questions.", pd.DataFrame(results)
 def submit_answers(profile: gr.OAuthProfile | None):
     if not profile:
+        return "Please login.", None
+    user = profile.username
+    if user not in _cache:
+        return "Run agent first.", None
+    payload = [
+        {"task_id": r["task_id"], "submitted_answer": r["submitted_answer"]}
+        for r in _cache[user]
     ]
     space_id = os.getenv("SPACE_ID", "")
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
+    body = {"username": user, "agent_code": agent_code, "answers": payload}
+    r = requests.post(f"{DEFAULT_API_URL}/submit", json=body, timeout=60)
+    r.raise_for_status()
+    res = r.json()
+    msg = (
+        f"Score: {res.get('score')}% "
+        f"({res.get('correct_count')}/{res.get('total_attempted')})"
+    )
+    return msg, pd.DataFrame(_cache[user])
 with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 GAIA Benchmark Runner")
     gr.LoginButton()
+    run_btn = gr.Button("Run agent on questions")
+    sub_btn = gr.Button("Submit cached answers")
+    out_txt = gr.Textbox(lines=3, interactive=False)
+    out_tbl = gr.DataFrame()
+    run_btn.click(run_agent, outputs=[out_txt, out_tbl])
+    sub_btn.click(submit_answers, outputs=[out_txt, out_tbl])
 if __name__ == "__main__":
     demo.launch(debug=True, share=False)

app_prior.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import os
+import gradio as gr
+import requests
+import pandas as pd
+import asyncio
+from gaia_new import graph  # Use your agent
+from typing import Optional
+# Constants
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+user_answers_cache = {}  # session-based cache
+class GaiaAgent:
+    def __init__(self):
+        print("Graph-based agent initialized.")
+    def __call__(self, question: str) -> str:
+        print("Received question:", question)
+        state = {"question": question, "answer": ""}
+        try:
+            result = graph.invoke(state)
+            print("Result type:", type(result))
+            print("Result value:", result)
+            if isinstance(result, dict):
+                return result.get("answer", "No answer generated.")
+            else:
+                return f"Unexpected output from graph: {result}"
+        except Exception as e:
+            return f"ERROR invoking graph: {e}"
+# Async runner
+async def run_agent(profile: gr.OAuthProfile | None):
+    if not profile:
+        return "Please login to Hugging Face.", None
+    username = profile.username
+    agent = GaiaAgent()
+    # 1. Load questions
+    try:
+        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10)
+        response.raise_for_status()
+        questions_data = response.json()
+    except Exception as e:
+        return f"Error fetching questions: {e}", None
+    # 2. Process questions
+    async def process(item):
+        task_id = item.get("task_id")
+        question = item.get("question")
+        try:
+            answer = await asyncio.to_thread(agent, question)
+            return {"task_id": task_id, "question": question, "submitted_answer": answer}
+        except Exception as e:
+            return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"}
+    results = await asyncio.gather(*(process(item) for item in questions_data))
+    user_answers_cache[username] = results
+    df = pd.DataFrame(results)
+    return f"Answered {len(results)} questions. Ready to submit.", df
+def submit_answers(profile: gr.OAuthProfile | None):
+    if not profile:
+        return "Please login to Hugging Face.", None
+    username = profile.username.strip()
+    if username not in user_answers_cache:
+        return "No cached answers. Please run the agent first.", None
+    answers_payload = [
+        {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
+        for item in user_answers_cache[username]
+    ]
+    space_id = os.getenv("SPACE_ID", "")
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
+    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
+    # 3. Submit to scoring API
+    try:
+        response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
+        response.raise_for_status()
+        result = response.json()
+        final_status = (
+            f"✅ Submission Successful!\n"
+            f"👤 User: {result.get('username')}\n"
+            f"🎯 Score: {result.get('score', 'N/A')}% "
+            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
+            f"📩 Message: {result.get('message', 'No message received.')}"
+        )
+        df = pd.DataFrame(user_answers_cache[username])
+        return final_status, df
+    except Exception as e:
+        return f"❌ Submission failed: {e}", pd.DataFrame(user_answers_cache[username])
+# ────────── Gradio UI ──────────
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 GAIA Agent Evaluation")
+    gr.LoginButton()
+    run_button = gr.Button("▶️ Run Agent on GAIA Questions")
+    submit_button = gr.Button("📤 Submit Cached Answers")
+    status = gr.Textbox(label="Status", lines=6, interactive=False)
+    results = gr.DataFrame(label="Answers", wrap=True)
+    run_button.click(run_agent, outputs=[status, results])
+    submit_button.click(submit_answers, outputs=[status, results])
+if __name__ == "__main__":
+    print("Launching Gradio app...")
+    demo.launch(debug=True, share=False)

test_gaia_questions.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # test_gaia_questions.py
 import requests
-from gaia_graph import graph
 def test_with_real_gaia_questions():
     # Fetch questions directly from the benchmark API

 # test_gaia_questions.py
 import requests
+from gaia_new import graph
 def test_with_real_gaia_questions():
     # Fetch questions directly from the benchmark API

test_openai_agent.py CHANGED Viewed

@@ -139,3 +139,4 @@ if __name__ == "__main__":
         except Exception as e:
             result = f"[ERROR] {e}"
         print(f"→ {result}")

         except Exception as e:
             result = f"[ERROR] {e}"
         print(f"→ {result}")