import requests import pandas as pd import gradio as gr from agent import HF_USERNAME, AGENT_CODE_URL, solve_question GAIA_API_URL = "https://agents-course-unit4-scoring.hf.space" def get_random_question(): return requests.get(f"{GAIA_API_URL}/random-question").json() def get_questions(): return requests.get(f"{GAIA_API_URL}/questions").json() def get_task_file(task_id: str): url = f"{GAIA_API_URL}/files/{task_id}" res = requests.get(url) if res.status_code == 200: return res.content return None def submit_answer(answers: list[dict], username: str, agent_code:str = AGENT_CODE_URL) -> dict: payload = { "username": username, "agent_code": agent_code, "answers": answers, } resp = requests.post( f"{GAIA_API_URL}/submit", json=payload, ) return resp.json() def run_random(): print("Fetching a random GAIA question…\n") task = get_random_question() answer = solve_question(task) print(f"Answer : {answer}") def run_and_submit_all(profile: gr.OAuthProfile | None): if not profile: yield "Please log in to Hugging Face first.", None return username = profile.username questions = get_questions() total = len(questions) yield f" Fetched {total} questions. Starting agent...", None answers, log = [], [] for i, item in enumerate(questions, start=1): task_id, question = item["task_id"], item["question"] try: answer = solve_question(item) except Exception as e: answer = f"ERROR: {e}" answers.append({"task_id": task_id, "submitted_answer": answer}) log.append({"Task ID": task_id, "Question": question, "Answer": answer}) yield f" Progress: {i}/{total} — Last answer: {str(answer)[:80]}", pd.DataFrame(log) resp = submit_answer(answers, username) status = (f"Done! User: {resp.get('username')} | Score: {resp.get('score')}% | " f"Correct: {resp.get('correct_count')}/{resp.get('total_attempted')} | " f"{resp.get('message')}") yield status, pd.DataFrame(log) with gr.Blocks() as demo: gr.Markdown("# GAIA Agent Evaluation Runner") gr.Markdown("Log in with Hugging Face, then click the button to run and submit all answers.") gr.LoginButton() btn = gr.Button("Run Evaluation & Submit All Answers") status = gr.Textbox(label="Status", lines=3, interactive=False) table = gr.DataFrame(label="Results", wrap=True) btn.click(fn=run_and_submit_all, outputs=[status, table]) if __name__ == "__main__": demo.launch()