File size: 4,156 Bytes
10e9b7d
7835c92
eccf8e4
3c4371f
0c06f61
7835c92
10e9b7d
f5838b6
1d0ce3b
f5838b6
7835c92
 
 
5ada353
f5838b6
7835c92
 
 
3c4371f
7835c92
 
 
f5838b6
 
 
 
 
 
7835c92
 
 
 
 
 
 
 
0c06f61
7835c92
 
5ada353
7835c92
0ee4998
7835c92
 
 
0ee4998
7835c92
0ee4998
7835c92
 
 
 
 
 
 
 
 
0c06f61
7835c92
 
0c06f61
7835c92
 
0c06f61
 
f5838b6
0c06f61
 
7835c92
 
 
 
 
 
 
 
 
0c06f61
7835c92
7bccf8e
0c06f61
f5838b6
 
 
 
 
0c06f61
7835c92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e80aab9
7835c92
7e4a06b
7835c92
 
 
 
 
 
 
 
 
e80aab9
 
7835c92
0c06f61
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import gradio as gr
import requests
import pandas as pd
import asyncio
from typing import Optional

from langchain_core.messages import HumanMessage
from langgraph_new import graph  # Your graph agent

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
user_answers_cache = {}  # session-based cache


class GaiaAgent:
    def __init__(self):
        print("Graph-based agent initialized.")

    def __call__(self, question: str) -> str:
        print("Received question:", question)
        try:
            # FIXED: Correct input for LangGraph
            result = graph.invoke({"messages": [HumanMessage(content=question)]})
            messages = result.get("messages", [])
            if messages:
                return messages[-1].content.strip()
            return "No messages returned."
        except Exception as e:
            return f"ERROR invoking graph: {e}"


# Async runner
async def run_agent(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please login to Hugging Face.", None

    username = profile.username
    agent = GaiaAgent()

    # 1. Load questions
    try:
        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=10)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 2. Process questions
    async def process(item):
        task_id = item.get("task_id")
        question = item.get("question")
        try:
            answer = await asyncio.to_thread(agent, question)
            return {"task_id": task_id, "question": question, "submitted_answer": answer}
        except Exception as e:
            return {"task_id": task_id, "question": question, "submitted_answer": f"ERROR: {e}"}

    results = await asyncio.gather(*(process(item) for item in questions_data))
    user_answers_cache[username] = results

    df = pd.DataFrame(results)
    return f"Answered {len(results)} questions. Ready to submit.", df


# Submission
def submit_answers(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please login to Hugging Face.", None

    username = profile.username.strip()
    if username not in user_answers_cache:
        return "No cached answers. Please run the agent first.", None

    answers_payload = [
        {"task_id": item["task_id"], "submitted_answer": item["submitted_answer"]}
        for item in user_answers_cache[username]
    ]

    space_id = os.getenv("SPACE_ID", "")
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
    submission_data = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers_payload,
    }

    try:
        response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
        response.raise_for_status()
        result = response.json()
        final_status = (
            f"βœ… Submission Successful!\n"
            f"πŸ‘€ User: {result.get('username')}\n"
            f"🎯 Score: {result.get('score', 'N/A')}% "
            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
            f"πŸ“© Message: {result.get('message', 'No message received.')}"
        )
        df = pd.DataFrame(user_answers_cache[username])
        return final_status, df
    except Exception as e:
        return f"❌ Submission failed: {e}", pd.DataFrame(user_answers_cache[username])


# ────────── Gradio UI ──────────
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 GAIA Agent Evaluation")
    gr.LoginButton()

    run_button = gr.Button("▢️ Run Agent on GAIA Questions")
    submit_button = gr.Button("πŸ“€ Submit Cached Answers")

    status = gr.Textbox(label="Status", lines=6, interactive=False)
    results = gr.DataFrame(label="Answers", wrap=True)

    run_button.click(run_agent, outputs=[status, results])
    submit_button.click(submit_answers, outputs=[status, results])

if __name__ == "__main__":
    print("Launching Gradio app...")
    demo.launch(debug=True, share=False)