lethaq's picture
Update app.py
bc80784 verified
"""Enhanced Agent Evaluation Runner with simplified Agent integration"""
import os
import time
import gradio as gr
import requests
import pandas as pd
from dotenv import load_dotenv
from agent import Agent
agent = Agent()
load_dotenv()
# 常量
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def run_and_submit_all(profile: gr.OAuthProfile | None):
"""
Fetches all questions, runs the Agent on them, submits all answers,
and displays the results.
"""
# 登录检查
if not profile:
return "Please Login to Hugging Face with the button.", None
username = profile.username
# 初始化你的简易 Agent
# 组装提交相关 URL
space_id = os.getenv("SPACE_ID")
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
# 1. 拉取题目
try:
resp = requests.get(questions_url, timeout=20)
resp.raise_for_status()
questions_data = resp.json()
if not questions_data:
return "No questions received from server.", None
except Exception as e:
return f"Error fetching questions: {e}", None
# 2. 遍历题目并调用 Agent 获取答案
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question = item.get("question")
if not task_id or question is None:
continue
try:
# 只调用一次,带 task_id
answer = agent(question, task_id=task_id)
answers_payload.append({
"task_id": task_id,
"submitted_answer": answer
})
results_log.append({
"Task ID": task_id,
"Question": question,
"Submitted Answer": answer
})
time.sleep(0.3) # 小延迟防止 QPS 超限
except Exception as e:
err = f"ERROR: {e}"
answers_payload.append({"task_id": task_id, "submitted_answer": err})
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": err})
if not answers_payload:
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
# 3. 提交答案
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload
}
try:
post = requests.post(submit_url, json=submission_data, timeout=60)
post.raise_for_status()
data = post.json()
status = (
f"✅ Submission Successful!\n"
f"User: {data.get('username')}\n"
f"Score: {data.get('score','N/A')}% "
f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
f"Message: {data.get('message','No additional message.')}"
)
return status, pd.DataFrame(results_log)
except Exception as e:
return f"❌ Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio 界面 ---
with gr.Blocks(title="Simplified GAIA Agent Evaluation") as demo:
gr.Markdown("# Simplified GAIA Agent Evaluation Runner")
gr.Markdown("""
**Instructions:**
1. Set your `GOOGLE_API_KEY` in the environment variables.
2. Log in to your Hugging Face account using the button below.
3. Click **Run Evaluation & Submit All Answers** to start.
This runner uses:
- A custom `agent.py` for answering GAIA questions.
- Gradio for UI.
- HTTP requests to fetch & submit answers.
""")
gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
status_out = gr.Textbox(label="Status / Results", lines=6, interactive=False)
table_out = gr.DataFrame(label="Questions and Answers", wrap=True)
run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
if __name__ == "__main__":
demo.launch(debug=True, share=False)