nawter
code update
1fc7c71
import os
import io
import gradio as gr
import requests
import pandas as pd
from smolagents import (
CodeAgent,
DuckDuckGoSearchTool,
LiteLLMModel,
Tool,
tool,
)
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Custom Tool: Read task files from GAIA API ---
class TaskFileReaderTool(Tool):
name = "task_file_reader"
description = (
"Downloads and reads a file attached to a GAIA task by its task_id. "
"Use this when the question mentions an attached file, document, spreadsheet, or image."
)
inputs = {
"task_id": {
"type": "string",
"description": "The task_id to download the file for.",
}
}
output_type = "string"
def forward(self, task_id: str) -> str:
try:
r = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=30)
r.raise_for_status()
ct = r.headers.get("Content-Type", "")
if "text" in ct or "json" in ct or "csv" in ct:
return r.text[:10000]
elif "spreadsheet" in ct or "excel" in ct:
df = pd.read_excel(io.BytesIO(r.content))
return df.to_string()
else:
try:
return r.text[:10000]
except Exception:
return f"[Binary file, {len(r.content)} bytes, type: {ct}]"
except Exception as e:
return f"Error downloading file for task {task_id}: {e}"
# --- Agent Definition ---
class GAIAAgent:
def __init__(self):
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise ValueError("Set ANTHROPIC_API_KEY env var")
model = LiteLLMModel(
model_id="anthropic/claude-sonnet-4-20250514",
api_key=api_key,
)
self.agent = CodeAgent(
tools=[DuckDuckGoSearchTool(), TaskFileReaderTool()],
model=model,
max_steps=8,
verbosity_level=1,
additional_authorized_imports=[
"re", "json", "math", "collections",
"itertools", "statistics", "unicodedata",
],
)
print("GAIAAgent initialized with Claude Sonnet.")
def __call__(self, question: str, task_id: str = None) -> str:
prompt = (
f"Question: {question}\n\n"
f"INSTRUCTIONS:\n"
f"- If the question references an attached file, use task_file_reader with task_id='{task_id}'.\n"
f"- Use web_search to find factual information when needed.\n"
f"- Give ONLY the exact final answer. No explanation, no 'The answer is', no extra words.\n"
f"- For numbers: just the number. For names: just the name. For lists: comma-separated.\n"
)
try:
result = self.agent.run(prompt)
answer = str(result).strip()
for prefix in ["The answer is ", "Answer: ", "FINAL ANSWER: ", "Final answer: "]:
if answer.lower().startswith(prefix.lower()):
answer = answer[len(prefix):].strip()
return answer
except Exception as e:
print(f"Agent error: {e}")
return "Unable to determine answer"
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if not profile:
return "Please Login to Hugging Face with the button.", None
username = profile.username
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
# 1. Init agent
try:
agent = GAIAAgent()
except Exception as e:
return f"Error initializing agent: {e}", None
# 2. Fetch questions
try:
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
resp.raise_for_status()
questions_data = resp.json()
print(f"Fetched {len(questions_data)} questions.")
except Exception as e:
return f"Error fetching questions: {e}", None
# 3. Run agent
results_log = []
answers_payload = []
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
continue
print(f"\n--- Q{i+1}/{len(questions_data)} [{task_id}] ---")
print(f"Q: {question_text[:120]}")
try:
answer = agent(question_text, task_id=task_id)
print(f"A: {answer}")
except Exception as e:
answer = f"ERROR: {e}"
print(f"Error: {e}")
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
if not answers_payload:
return "No answers produced.", pd.DataFrame(results_log)
# 4. Submit
submission = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
try:
resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=120)
resp.raise_for_status()
data = resp.json()
status = (
f"Submission Successful!\n"
f"User: {data.get('username')}\n"
f"Score: {data.get('score', 'N/A')}% "
f"({data.get('correct_count', '?')}/{data.get('total_attempted', '?')} correct)\n"
f"Message: {data.get('message', '')}"
)
return status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent — smolagents + Claude Sonnet")
gr.Markdown(
"1. Log in with HuggingFace\n"
"2. Click 'Run Evaluation & Submit'\n"
"3. Wait for the agent to answer all 20 questions"
)
gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit All Answers")
status_box = gr.Textbox(label="Status", lines=5, interactive=False)
results_tbl = gr.DataFrame(label="Results", wrap=True)
run_btn.click(fn=run_and_submit_all, outputs=[status_box, results_tbl])
if __name__ == "__main__":
demo.launch(debug=True, share=False)