"""Enhanced Agent Evaluation Runner with simplified Agent integration""" import os import time import gradio as gr import requests import pandas as pd from dotenv import load_dotenv from agent import Agent agent = Agent() load_dotenv() # 常量 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" def run_and_submit_all(profile: gr.OAuthProfile | None): """ Fetches all questions, runs the Agent on them, submits all answers, and displays the results. """ # 登录检查 if not profile: return "Please Login to Hugging Face with the button.", None username = profile.username # 初始化你的简易 Agent # 组装提交相关 URL space_id = os.getenv("SPACE_ID") agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown" questions_url = f"{DEFAULT_API_URL}/questions" submit_url = f"{DEFAULT_API_URL}/submit" # 1. 拉取题目 try: resp = requests.get(questions_url, timeout=20) resp.raise_for_status() questions_data = resp.json() if not questions_data: return "No questions received from server.", None except Exception as e: return f"Error fetching questions: {e}", None # 2. 遍历题目并调用 Agent 获取答案 results_log = [] answers_payload = [] for item in questions_data: task_id = item.get("task_id") question = item.get("question") if not task_id or question is None: continue try: # 只调用一次,带 task_id answer = agent(question, task_id=task_id) answers_payload.append({ "task_id": task_id, "submitted_answer": answer }) results_log.append({ "Task ID": task_id, "Question": question, "Submitted Answer": answer }) time.sleep(0.3) # 小延迟防止 QPS 超限 except Exception as e: err = f"ERROR: {e}" answers_payload.append({"task_id": task_id, "submitted_answer": err}) results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": err}) if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log) # 3. 提交答案 submission_data = { "username": username.strip(), "agent_code": agent_code, "answers": answers_payload } try: post = requests.post(submit_url, json=submission_data, timeout=60) post.raise_for_status() data = post.json() status = ( f"✅ Submission Successful!\n" f"User: {data.get('username')}\n" f"Score: {data.get('score','N/A')}% " f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n" f"Message: {data.get('message','No additional message.')}" ) return status, pd.DataFrame(results_log) except Exception as e: return f"❌ Submission Failed: {e}", pd.DataFrame(results_log) # --- Gradio 界面 --- with gr.Blocks(title="Simplified GAIA Agent Evaluation") as demo: gr.Markdown("# Simplified GAIA Agent Evaluation Runner") gr.Markdown(""" **Instructions:** 1. Set your `GOOGLE_API_KEY` in the environment variables. 2. Log in to your Hugging Face account using the button below. 3. Click **Run Evaluation & Submit All Answers** to start. This runner uses: - A custom `agent.py` for answering GAIA questions. - Gradio for UI. - HTTP requests to fetch & submit answers. """) gr.LoginButton() run_btn = gr.Button("Run Evaluation & Submit All Answers", variant="primary") status_out = gr.Textbox(label="Status / Results", lines=6, interactive=False) table_out = gr.DataFrame(label="Questions and Answers", wrap=True) run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out]) if __name__ == "__main__": demo.launch(debug=True, share=False)