import os import requests import pandas as pd import gradio as gr from agent import agent DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" GAIA_DATA_URL = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation" LAST = {} def submit_with_retry(payload): url = f"{DEFAULT_API_URL}/submit" for _ in range(4): try: r = requests.post(url, json=payload, timeout=120) if r.status_code >= 500: continue r.raise_for_status() return True, r.json() except Exception: pass return False, "Failed" def run_and_submit_all(progress=gr.Progress()): hf_token = os.getenv("HF_TOKEN") space_id = os.getenv("SPACE_ID") if not hf_token or not space_id: return "❌ Missing HF_TOKEN or SPACE_ID", pd.DataFrame() agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" r = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) questions = r.json() total = len(questions) results = [] answers = [] status_lines = [] progress(0, desc="Starting...") status_lines.append("πŸš€ Starting evaluation\n") for idx, q in enumerate(questions, start=1): task_id = q["task_id"] question_text = q["question"] file_name = q.get("file_name") file_url = None if file_name: if file_name.startswith("http"): file_url = file_name else: file_url = f"{GAIA_DATA_URL}/{file_name}" status_lines.append(f"πŸ“‹ Processing {idx}/{total}: {question_text}") print(f"Processing {idx}/{total}: {question_text}") progress(idx / total, desc=f"{idx}/{total}") ans = agent(question_text, files=[file_url] if file_url else None) ans = str(ans).strip() or "0" results.append({"Task ID": task_id, "Question": question_text, "Answer": ans}) answers.append({"task_id": task_id, "submitted_answer": ans}) status_lines.append(f" βœ… Answer: {ans}\n") payload = { "username": os.getenv("HF_USERNAME", "BiGuan"), "agent_code": agent_code, "answers": answers, } LAST["payload"] = payload LAST["results"] = results ok, data = submit_with_retry(payload) if ok: final_status = f"βœ… Score: {data.get('score')}%\nCorrect: {data.get('correct_count')}/{data.get('total_attempted')}\n" else: final_status = "❌ Submission failed" full_status = "\n".join(status_lines) + "\n" + final_status return full_status, pd.DataFrame(results) with gr.Blocks() as demo: gr.Markdown("# GAIA Agent (90‑Point Attempt)") btn = gr.Button("Run Evaluation", variant="primary") status = gr.Textbox(label="Detailed Progress", lines=12) table = gr.DataFrame(label="Results (Task ID, Question, Answer)") btn.click(run_and_submit_all, outputs=[status, table], queue=True, show_progress="full") demo.launch()