| import requests |
| import pandas as pd |
| import gradio as gr |
| from agent import HF_USERNAME, AGENT_CODE_URL, solve_question |
|
|
| GAIA_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
| def get_random_question(): |
| return requests.get(f"{GAIA_API_URL}/random-question").json() |
| def get_questions(): |
| return requests.get(f"{GAIA_API_URL}/questions").json() |
| def get_task_file(task_id: str): |
| url = f"{GAIA_API_URL}/files/{task_id}" |
| res = requests.get(url) |
| if res.status_code == 200: |
| return res.content |
| return None |
| def submit_answer(answers: list[dict], username: str, agent_code:str = AGENT_CODE_URL) -> dict: |
| payload = { |
| "username": username, |
| "agent_code": agent_code, |
| "answers": answers, |
| } |
| resp = requests.post( |
| f"{GAIA_API_URL}/submit", |
| json=payload, |
|
|
| ) |
| return resp.json() |
| def run_random(): |
| print("Fetching a random GAIA question…\n") |
| task = get_random_question() |
| answer = solve_question(task) |
| print(f"Answer : {answer}") |
|
|
| def run_and_submit_all(profile: gr.OAuthProfile | None): |
| |
| if not profile: |
| yield "Please log in to Hugging Face first.", None |
| return |
| |
| username = profile.username |
| questions = get_questions() |
| total = len(questions) |
| yield f" Fetched {total} questions. Starting agent...", None |
| |
| answers, log = [], [] |
| |
| |
| for i, item in enumerate(questions, start=1): |
| task_id, question = item["task_id"], item["question"] |
| try: |
| answer = solve_question(item) |
| except Exception as e: |
| answer = f"ERROR: {e}" |
| |
| answers.append({"task_id": task_id, "submitted_answer": answer}) |
| log.append({"Task ID": task_id, "Question": question, "Answer": answer}) |
| |
| yield f" Progress: {i}/{total} — Last answer: {str(answer)[:80]}", pd.DataFrame(log) |
| |
| resp = submit_answer(answers, username) |
| status = (f"Done! User: {resp.get('username')} | Score: {resp.get('score')}% | " |
| f"Correct: {resp.get('correct_count')}/{resp.get('total_attempted')} | " |
| f"{resp.get('message')}") |
| yield status, pd.DataFrame(log) |
|
|
|
|
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("# GAIA Agent Evaluation Runner") |
| gr.Markdown("Log in with Hugging Face, then click the button to run and submit all answers.") |
| gr.LoginButton() |
| btn = gr.Button("Run Evaluation & Submit All Answers") |
| status = gr.Textbox(label="Status", lines=3, interactive=False) |
| table = gr.DataFrame(label="Results", wrap=True) |
| btn.click(fn=run_and_submit_all, outputs=[status, table]) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|
|
|