import os import gradio as gr import json import requests from agent import load_tasks, solve_task from smolagents import Task # Load metadata METADATA_PATH = "metadata.jsonl" GAIA_API_URL = "https://huggingface.co/spaces/gaia-benchmark/api/submit" # Optional auth for GAIA HF_USER = os.getenv("HF_USERNAME") HF_TOKEN = os.getenv("HF_TOKEN") def run_and_submit_all(submit: bool = False): tasks = load_tasks(METADATA_PATH) results = [] for task in tasks: answer = solve_task(task) results.append({ "question_id": task.task_id, "answer": answer, }) if submit: if not HF_USER or not HF_TOKEN: raise ValueError("Missing HF_USERNAME or HF_TOKEN in env vars.") response = requests.post( GAIA_API_URL, headers={ "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json", }, json={ "username": HF_USER, "level": "level1", "answers": results, }, ) return f"Submitted! Response: {response.text}" return json.dumps(results, indent=2) def single_task_run(question_id: str, question_input: str): task = Task(task_id=question_id, input=question_input) output = solve_task(task) return output # Gradio UI with gr.Blocks() as demo: gr.Markdown("# GAIA Benchmark Agent") with gr.Tab("Run Single Task"): with gr.Row(): qid = gr.Textbox(label="Question ID", placeholder="e.g., q123") qtext = gr.Textbox(label="Question Text", lines=4) out = gr.Textbox(label="Generated Answer") btn = gr.Button("Run Agent") btn.click(fn=single_task_run, inputs=[qid, qtext], outputs=out) with gr.Tab(" Run All + Submit"): submit_toggle = gr.Checkbox(label="Submit to GAIA?", value=False) submit_btn = gr.Button("Run All Tasks") batch_out = gr.Textbox(label="Batch Output", lines=20) submit_btn.click(fn=run_and_submit_all, inputs=[submit_toggle], outputs=batch_out) if __name__ == "__main__": demo.launch()