Spaces:
Sleeping
Sleeping
| """ | |
| HF Agents Course β Unit 4 Final Project: GAIA Level-1 agent + submission UI. | |
| This Space exposes a Gradio UI that: | |
| 1. Authenticates the user via the gradio_oauth log-in. | |
| 2. Fetches the 20 GAIA-Level-1 evaluation questions from the official | |
| course scoring API. | |
| 3. Runs a smolagents CodeAgent on each question (with web search, | |
| webpage visiting, Python interpreter, and file download tools). | |
| 4. Submits the answers and prints the score returned by the API. | |
| Scoring API: https://agents-course-unit4-scoring.hf.space (see /docs). | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from typing import Any | |
| import gradio as gr | |
| import requests | |
| from smolagents import ( | |
| CodeAgent, | |
| DuckDuckGoSearchTool, | |
| InferenceClientModel, | |
| VisitWebpageTool, | |
| tool, | |
| ) | |
| from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool | |
| API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| QUESTIONS_URL = f"{API_URL}/questions" | |
| SUBMIT_URL = f"{API_URL}/submit" | |
| FILE_URL = f"{API_URL}/files" | |
| # Allowed Python imports inside the CodeAgent sandbox. Wide-enough to cover | |
| # most GAIA Level-1 questions (date arithmetic, basic table manipulation, | |
| # JSON parsing, regex, etc.) without enabling network or fs access beyond | |
| # what our tools already wrap. | |
| ALLOWED_IMPORTS = [ | |
| "math", "datetime", "json", "re", "statistics", "itertools", "functools", | |
| "collections", "string", "decimal", "fractions", "calendar", "csv", | |
| "pandas", "numpy", | |
| ] | |
| # ----- Custom tools --------------------------------------------------------- | |
| def download_task_file(task_id: str) -> str: | |
| """Download the auxiliary file associated with a GAIA task_id (if any). | |
| The official Unit 4 scoring API exposes /files/{task_id}. Some questions | |
| reference an attached image, spreadsheet, audio, PDF, etc. The bytes are | |
| saved to ./task_files/<task_id>.bin and the absolute path is returned so | |
| the agent can open / parse it with normal Python. | |
| Args: | |
| task_id: The GAIA task identifier (as supplied in each question). | |
| """ | |
| os.makedirs("task_files", exist_ok=True) | |
| try: | |
| r = requests.get(f"{FILE_URL}/{task_id}", timeout=30) | |
| if r.status_code == 404: | |
| return "No file attached to this task." | |
| r.raise_for_status() | |
| except Exception as exc: # noqa: BLE001 | |
| return f"Download failed: {exc}" | |
| path = os.path.abspath(os.path.join("task_files", f"{task_id}.bin")) | |
| with open(path, "wb") as fh: | |
| fh.write(r.content) | |
| return path | |
| # ----- Agent factory -------------------------------------------------------- | |
| SYSTEM_HINT = ( | |
| "You are a careful, persistent GAIA benchmark agent. For each question:\n" | |
| " 1. Plan: identify exactly what fact / list / number is being asked.\n" | |
| " 2. Act: use the tools (web search, visit_webpage, python_interpreter,\n" | |
| " download_task_file) to gather and verify the answer.\n" | |
| " 3. Answer: call final_answer(...) with the SHORT, EXACT-MATCH answer\n" | |
| " - just the value, no preamble.\n" | |
| " - no 'FINAL ANSWER:' prefix.\n" | |
| " - numbers as digits, no units unless asked; lists\n" | |
| " comma-separated; dates as the question requests.\n" | |
| ) | |
| def build_agent() -> CodeAgent: | |
| model_id = os.environ.get("AGENT_MODEL_ID", "Qwen/Qwen2.5-Coder-32B-Instruct") | |
| model = InferenceClientModel(model_id=model_id, max_tokens=2048, temperature=0.0) | |
| return CodeAgent( | |
| model=model, | |
| tools=[ | |
| DuckDuckGoSearchTool(), | |
| VisitWebpageTool(), | |
| PythonInterpreterTool(), | |
| download_task_file, | |
| FinalAnswerTool(), | |
| ], | |
| additional_authorized_imports=ALLOWED_IMPORTS, | |
| max_steps=12, | |
| verbosity_level=1, | |
| name="GAIAAgent", | |
| description=SYSTEM_HINT, | |
| ) | |
| # ----- Runner --------------------------------------------------------------- | |
| def run_one(agent: CodeAgent, q: dict[str, Any]) -> str: | |
| task_id = q["task_id"] | |
| question = q["question"] | |
| has_file = q.get("file_name") not in (None, "") | |
| prompt = f"task_id: {task_id}\nQuestion: {question}" | |
| if has_file: | |
| prompt += ( | |
| f"\n\nThis task has an attached file named {q['file_name']!r}. " | |
| f"Call download_task_file({task_id!r}) to fetch it, then open it " | |
| f"with the appropriate Python library." | |
| ) | |
| return str(agent.run(prompt)).strip() | |
| def run_and_submit(profile: gr.OAuthProfile | None) -> tuple[str, str]: | |
| if profile is None: | |
| return "β Not logged in. Click 'Sign in with Hugging Face' first.", "" | |
| username = profile.username | |
| space_id = os.environ.get("SPACE_ID") | |
| agent_code_url = ( | |
| f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "" | |
| ) | |
| try: | |
| r = requests.get(QUESTIONS_URL, timeout=30) | |
| r.raise_for_status() | |
| questions = r.json() | |
| except Exception as exc: # noqa: BLE001 | |
| return f"Failed to fetch questions: {exc}", "" | |
| agent = build_agent() | |
| answers, transcript_rows = [], [] | |
| for q in questions: | |
| try: | |
| answer = run_one(agent, q) | |
| except Exception as exc: # noqa: BLE001 | |
| answer = f"AGENT_ERROR: {exc}" | |
| answers.append({"task_id": q["task_id"], "submitted_answer": answer}) | |
| transcript_rows.append( | |
| f"- **{q['task_id']}** β {q['question'][:120]}β¦\n β `{answer[:200]}`" | |
| ) | |
| payload = { | |
| "username": username, | |
| "agent_code": agent_code_url, | |
| "answers": answers, | |
| } | |
| try: | |
| resp = requests.post(SUBMIT_URL, json=payload, timeout=120) | |
| resp.raise_for_status() | |
| result = resp.json() | |
| except Exception as exc: # noqa: BLE001 | |
| return f"Submit failed: {exc}", "\n".join(transcript_rows) | |
| summary = ( | |
| f"### Score: **{result.get('score', '?')}** " | |
| f"({result.get('correct_count', '?')} / {result.get('total_attempted', '?')})\n\n" | |
| f"{result.get('message', '')}" | |
| ) | |
| return summary, "\n".join(transcript_rows) | |
| # ----- Gradio UI ------------------------------------------------------------ | |
| with gr.Blocks(title="GAIA Unit 4 Agent β VoicesColeby") as demo: | |
| gr.Markdown("# π¦ GAIA Unit 4 β Final Project Agent") | |
| gr.Markdown( | |
| "smolagents `CodeAgent` (Qwen2.5-Coder-32B via HF Inference Providers) " | |
| "with web_search, visit_webpage, python_interpreter, download_task_file, " | |
| "and final_answer. Click **Run + Submit** below to evaluate against the " | |
| "20 GAIA-Level-1 questions and post the score to the Students leaderboard." | |
| ) | |
| gr.LoginButton() | |
| run_btn = gr.Button("π Run + Submit", variant="primary") | |
| score_md = gr.Markdown(label="Score") | |
| transcript = gr.Markdown(label="Per-question answers") | |
| run_btn.click(fn=run_and_submit, inputs=None, outputs=[score_md, transcript]) | |
| if __name__ == "__main__": | |
| demo.launch(debug=False) | |