File size: 4,105 Bytes
c64543a
c0961ba
10e9b7d
bf4b516
10e9b7d
eccf8e4
3c4371f
475d553
4da7cc9
d10d5ac
4da7cc9
475d553
a68a46b
c0961ba
3db6293
5d7f198
475d553
31243f4
c0961ba
31243f4
 
c0961ba
 
dac9255
c0961ba
dac9255
c0961ba
e80aab9
c0961ba
 
 
 
 
3c4371f
c0961ba
eccf8e4
c0961ba
 
 
31243f4
475d553
 
31243f4
e80aab9
c0961ba
d10d5ac
7d65c66
c0961ba
 
d10d5ac
 
 
5f7f41e
 
d10d5ac
 
c0961ba
475d553
c0961ba
d10d5ac
475d553
 
 
d10d5ac
 
475d553
c0961ba
d83fbb8
5f7f41e
c0961ba
d10d5ac
 
31243f4
 
 
 
c0961ba
475d553
 
 
 
 
aff7c2b
c0961ba
 
 
 
475d553
c0961ba
 
 
 
e80aab9
c0961ba
7d65c66
c0961ba
475d553
c0961ba
 
 
475d553
 
c0961ba
 
 
 
 
 
 
 
475d553
dac9255
e80aab9
c0961ba
 
 
 
 
e80aab9
 
dac9255
dcc3160
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

"""Enhanced Agent Evaluation Runner with simplified Agent integration"""
import os
import time
import gradio as gr
import requests
import pandas as pd
from dotenv import load_dotenv
from agent import Agent  
agent = Agent() 

load_dotenv()

# 常量
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """
    Fetches all questions, runs the Agent on them, submits all answers,
    and displays the results.
    """
    # 登录检查
    if not profile:
        return "Please Login to Hugging Face with the button.", None
    username = profile.username

    # 初始化你的简易 Agent

    # 组装提交相关 URL
    space_id    = os.getenv("SPACE_ID")
    agent_code  = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
    questions_url = f"{DEFAULT_API_URL}/questions"
    submit_url    = f"{DEFAULT_API_URL}/submit"

    # 1. 拉取题目
    try:
        resp = requests.get(questions_url, timeout=20)
        resp.raise_for_status()
        questions_data = resp.json()
        if not questions_data:
            return "No questions received from server.", None
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 2. 遍历题目并调用 Agent 获取答案
    results_log = []
    answers_payload = []

    for item in questions_data:
        task_id  = item.get("task_id")
        question = item.get("question")
        if not task_id or question is None:
            continue
        try:
        # 只调用一次,带 task_id
            answer = agent(question, task_id=task_id)

            answers_payload.append({
                "task_id": task_id,
                "submitted_answer": answer
            })
            results_log.append({
                "Task ID": task_id,
                "Question": question,
                "Submitted Answer": answer
            })

            time.sleep(0.3)          # 小延迟防止 QPS 超限
        except Exception as e:
            err = f"ERROR: {e}"
            answers_payload.append({"task_id": task_id, "submitted_answer": err})
            results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": err})

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # 3. 提交答案
    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }
    try:
        post = requests.post(submit_url, json=submission_data, timeout=60)
        post.raise_for_status()
        data = post.json()
        status = (
            f"✅ Submission Successful!\n"
            f"User: {data.get('username')}\n"
            f"Score: {data.get('score','N/A')}% "
            f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
            f"Message: {data.get('message','No additional message.')}"
        )
        return status, pd.DataFrame(results_log)
    except Exception as e:
        return f"❌ Submission Failed: {e}", pd.DataFrame(results_log)

# --- Gradio 界面 ---
with gr.Blocks(title="Simplified GAIA Agent Evaluation") as demo:
    gr.Markdown("# Simplified GAIA Agent Evaluation Runner")
    gr.Markdown("""
    **Instructions:**
    1. Set your `GOOGLE_API_KEY` in the environment variables.
    2. Log in to your Hugging Face account using the button below.
    3. Click **Run Evaluation & Submit All Answers** to start.
    
    This runner uses:
    - A custom `agent.py` for answering GAIA questions.
    - Gradio for UI.
    - HTTP requests to fetch & submit answers.
    """)
    gr.LoginButton()

    run_btn     = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
    status_out  = gr.Textbox(label="Status / Results", lines=6, interactive=False)
    table_out   = gr.DataFrame(label="Questions and Answers", wrap=True)

    run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)