import os import io import gradio as gr import requests import pandas as pd from smolagents import ( CodeAgent, DuckDuckGoSearchTool, LiteLLMModel, Tool, tool, ) # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # --- Custom Tool: Read task files from GAIA API --- class TaskFileReaderTool(Tool): name = "task_file_reader" description = ( "Downloads and reads a file attached to a GAIA task by its task_id. " "Use this when the question mentions an attached file, document, spreadsheet, or image." ) inputs = { "task_id": { "type": "string", "description": "The task_id to download the file for.", } } output_type = "string" def forward(self, task_id: str) -> str: try: r = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=30) r.raise_for_status() ct = r.headers.get("Content-Type", "") if "text" in ct or "json" in ct or "csv" in ct: return r.text[:10000] elif "spreadsheet" in ct or "excel" in ct: df = pd.read_excel(io.BytesIO(r.content)) return df.to_string() else: try: return r.text[:10000] except Exception: return f"[Binary file, {len(r.content)} bytes, type: {ct}]" except Exception as e: return f"Error downloading file for task {task_id}: {e}" # --- Agent Definition --- class GAIAAgent: def __init__(self): api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: raise ValueError("Set ANTHROPIC_API_KEY env var") model = LiteLLMModel( model_id="anthropic/claude-sonnet-4-20250514", api_key=api_key, ) self.agent = CodeAgent( tools=[DuckDuckGoSearchTool(), TaskFileReaderTool()], model=model, max_steps=8, verbosity_level=1, additional_authorized_imports=[ "re", "json", "math", "collections", "itertools", "statistics", "unicodedata", ], ) print("GAIAAgent initialized with Claude Sonnet.") def __call__(self, question: str, task_id: str = None) -> str: prompt = ( f"Question: {question}\n\n" f"INSTRUCTIONS:\n" f"- If the question references an attached file, use task_file_reader with task_id='{task_id}'.\n" f"- Use web_search to find factual information when needed.\n" f"- Give ONLY the exact final answer. No explanation, no 'The answer is', no extra words.\n" f"- For numbers: just the number. For names: just the name. For lists: comma-separated.\n" ) try: result = self.agent.run(prompt) answer = str(result).strip() for prefix in ["The answer is ", "Answer: ", "FINAL ANSWER: ", "Final answer: "]: if answer.lower().startswith(prefix.lower()): answer = answer[len(prefix):].strip() return answer except Exception as e: print(f"Agent error: {e}") return "Unable to determine answer" def run_and_submit_all(profile: gr.OAuthProfile | None): space_id = os.getenv("SPACE_ID") if not profile: return "Please Login to Hugging Face with the button.", None username = profile.username agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" # 1. Init agent try: agent = GAIAAgent() except Exception as e: return f"Error initializing agent: {e}", None # 2. Fetch questions try: resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) resp.raise_for_status() questions_data = resp.json() print(f"Fetched {len(questions_data)} questions.") except Exception as e: return f"Error fetching questions: {e}", None # 3. Run agent results_log = [] answers_payload = [] for i, item in enumerate(questions_data): task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: continue print(f"\n--- Q{i+1}/{len(questions_data)} [{task_id}] ---") print(f"Q: {question_text[:120]}") try: answer = agent(question_text, task_id=task_id) print(f"A: {answer}") except Exception as e: answer = f"ERROR: {e}" print(f"Error: {e}") answers_payload.append({"task_id": task_id, "submitted_answer": answer}) results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer}) if not answers_payload: return "No answers produced.", pd.DataFrame(results_log) # 4. Submit submission = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload} try: resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=120) resp.raise_for_status() data = resp.json() status = ( f"Submission Successful!\n" f"User: {data.get('username')}\n" f"Score: {data.get('score', 'N/A')}% " f"({data.get('correct_count', '?')}/{data.get('total_attempted', '?')} correct)\n" f"Message: {data.get('message', '')}" ) return status, pd.DataFrame(results_log) except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log) # --- Gradio UI --- with gr.Blocks() as demo: gr.Markdown("# GAIA Agent — smolagents + Claude Sonnet") gr.Markdown( "1. Log in with HuggingFace\n" "2. Click 'Run Evaluation & Submit'\n" "3. Wait for the agent to answer all 20 questions" ) gr.LoginButton() run_btn = gr.Button("Run Evaluation & Submit All Answers") status_box = gr.Textbox(label="Status", lines=5, interactive=False) results_tbl = gr.DataFrame(label="Results", wrap=True) run_btn.click(fn=run_and_submit_all, outputs=[status_box, results_tbl]) if __name__ == "__main__": demo.launch(debug=True, share=False)