Spaces:
Sleeping
Sleeping
File size: 4,105 Bytes
c64543a c0961ba 10e9b7d bf4b516 10e9b7d eccf8e4 3c4371f 475d553 4da7cc9 d10d5ac 4da7cc9 475d553 a68a46b c0961ba 3db6293 5d7f198 475d553 31243f4 c0961ba 31243f4 c0961ba dac9255 c0961ba dac9255 c0961ba e80aab9 c0961ba 3c4371f c0961ba eccf8e4 c0961ba 31243f4 475d553 31243f4 e80aab9 c0961ba d10d5ac 7d65c66 c0961ba d10d5ac 5f7f41e d10d5ac c0961ba 475d553 c0961ba d10d5ac 475d553 d10d5ac 475d553 c0961ba d83fbb8 5f7f41e c0961ba d10d5ac 31243f4 c0961ba 475d553 aff7c2b c0961ba 475d553 c0961ba e80aab9 c0961ba 7d65c66 c0961ba 475d553 c0961ba 475d553 c0961ba 475d553 dac9255 e80aab9 c0961ba e80aab9 dac9255 dcc3160 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
"""Enhanced Agent Evaluation Runner with simplified Agent integration"""
import os
import time
import gradio as gr
import requests
import pandas as pd
from dotenv import load_dotenv
from agent import Agent
agent = Agent()
load_dotenv()
# 常量
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Fetches all questions, runs the Agent on them, submits all answers,
and displays the results.
"""
# 登录检查
if not profile:
return "Please Login to Hugging Face with the button.", None
username = profile.username
# 初始化你的简易 Agent
# 组装提交相关 URL
space_id = os.getenv("SPACE_ID")
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
# 1. 拉取题目
try:
resp = requests.get(questions_url, timeout=20)
resp.raise_for_status()
questions_data = resp.json()
if not questions_data:
return "No questions received from server.", None
except Exception as e:
return f"Error fetching questions: {e}", None
# 2. 遍历题目并调用 Agent 获取答案
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question = item.get("question")
if not task_id or question is None:
continue
try:
# 只调用一次,带 task_id
answer = agent(question, task_id=task_id)
answers_payload.append({
"task_id": task_id,
"submitted_answer": answer
})
results_log.append({
"Task ID": task_id,
"Question": question,
"Submitted Answer": answer
})
time.sleep(0.3) # 小延迟防止 QPS 超限
except Exception as e:
err = f"ERROR: {e}"
answers_payload.append({"task_id": task_id, "submitted_answer": err})
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": err})
if not answers_payload:
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
# 3. 提交答案
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload
}
try:
post = requests.post(submit_url, json=submission_data, timeout=60)
post.raise_for_status()
data = post.json()
status = (
f"✅ Submission Successful!\n"
f"User: {data.get('username')}\n"
f"Score: {data.get('score','N/A')}% "
f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
f"Message: {data.get('message','No additional message.')}"
)
return status, pd.DataFrame(results_log)
except Exception as e:
return f"❌ Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio 界面 ---
with gr.Blocks(title="Simplified GAIA Agent Evaluation") as demo:
gr.Markdown("# Simplified GAIA Agent Evaluation Runner")
gr.Markdown("""
**Instructions:**
1. Set your `GOOGLE_API_KEY` in the environment variables.
2. Log in to your Hugging Face account using the button below.
3. Click **Run Evaluation & Submit All Answers** to start.
This runner uses:
- A custom `agent.py` for answering GAIA questions.
- Gradio for UI.
- HTTP requests to fetch & submit answers.
""")
gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
status_out = gr.Textbox(label="Status / Results", lines=6, interactive=False)
table_out = gr.DataFrame(label="Questions and Answers", wrap=True)
run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
if __name__ == "__main__":
demo.launch(debug=True, share=False)
|