gaia-agent / app.py
Hemil4's picture
model chagne
8bba5f5
import os
import re
import math
import json
import requests
import gradio as gr
import pandas as pd
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
from langchain_core.tools import tool
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langgraph.graph import StateGraph, END
from langgraph.graph.message import add_messages
from typing import TypedDict, Annotated
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
# ── Agent state ───────────────────────────────────────────────────────────────
class AgentState(TypedDict):
messages: Annotated[list, add_messages]
# ── Tools ─────────────────────────────────────────────────────────────────────
@tool
def web_search(query: str) -> str:
"""Search the web for current facts, people, events, or definitions."""
try:
from ddgs import DDGS
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=5))
if not results:
return "No results found."
return "\n\n".join(
f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body']}"
for r in results
)
except Exception as e:
return f"Search error: {e}"
@tool
def fetch_url(url: str) -> str:
"""Fetch and return the text content of a web page."""
try:
import urllib.request
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urllib.request.urlopen(req, timeout=10) as resp:
html = resp.read().decode("utf-8", errors="ignore")
text = re.sub(r"<[^>]+>", " ", html)
text = re.sub(r"\s{2,}", " ", text).strip()
return text[:6000]
except Exception as e:
return f"Fetch error: {e}"
@tool
def python_repl(code: str) -> str:
"""Execute Python code. Use for arithmetic, date math, string processing, list work."""
import io, contextlib, traceback
buf = io.StringIO()
local_ns: dict = {}
try:
with contextlib.redirect_stdout(buf):
exec(compile(code, "<string>", "exec"), local_ns)
output = buf.getvalue()
lines = [l.strip() for l in code.strip().splitlines() if l.strip()]
if lines:
try:
val = eval(lines[-1], local_ns)
if val is not None:
output += str(val)
except Exception:
pass
return output.strip() if output.strip() else "(no output)"
except Exception:
return traceback.format_exc()
@tool
def read_task_file(task_id: str) -> str:
"""Download the file attached to a GAIA task and return its text content."""
url = f"{DEFAULT_API_URL}/files/{task_id}"
try:
resp = requests.get(url, timeout=30)
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
raw = resp.content
if "pdf" in content_type or str(task_id).endswith(".pdf"):
try:
import pdfplumber, io
with pdfplumber.open(io.BytesIO(raw)) as pdf:
return "\n".join(p.extract_text() or "" for p in pdf.pages)[:8000]
except Exception:
pass
if "image" in content_type:
return f"[Image file received ({len(raw)} bytes). Analyse based on question context.]"
return raw.decode("utf-8", errors="ignore")[:8000]
except Exception as e:
return f"File fetch error: {e}"
@tool
def calculator(expression: str) -> str:
"""Evaluate a mathematical expression. Supports +,-,*,/,**,sqrt,log, etc."""
try:
safe_globals = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
safe_globals["__builtins__"] = {}
return str(eval(expression, safe_globals))
except Exception as e:
return f"Calc error: {e}"
TOOLS = [web_search, fetch_url, python_repl, read_task_file, calculator]
TOOL_MAP = {t.name: t for t in TOOLS}
SYSTEM_PROMPT = """You are a precise research assistant solving GAIA benchmark questions.
RULES:
1. Use your tools to look up facts β€” never guess or hallucinate.
2. Break multi-step questions into parts and solve each with tools.
3. When you have the final answer, respond with ONLY the answer β€” no explanation, no preamble.
4. Answers must be exact: correct spelling, correct format (numbers, lists, names).
5. For lists, separate items with commas.
6. For numbers, give only the number (with units only if the question asks).
7. If the question mentions an attached file, call read_task_file with the task_id."""
# ── LangGraph nodes ───────────────────────────────────────────────────────────
def agent_node(state: AgentState, llm_with_tools):
messages = [SystemMessage(content=SYSTEM_PROMPT)] + state["messages"]
response = llm_with_tools.invoke(messages)
return {"messages": [response]}
def tool_node(state: AgentState):
last = state["messages"][-1]
results = []
for call in last.tool_calls:
fn = TOOL_MAP.get(call["name"])
try:
output = fn.invoke(call["args"]) if fn else f"Unknown tool: {call['name']}"
except Exception as e:
output = f"Tool error: {e}"
results.append(ToolMessage(content=str(output), tool_call_id=call["id"]))
return {"messages": results}
def should_continue(state: AgentState) -> str:
last = state["messages"][-1]
if hasattr(last, "tool_calls") and last.tool_calls:
return "tools"
return END
# ── BasicAgent β€” drop-in replacement for the template ────────────────────────
class BasicAgent:
"""
LangGraph ReAct agent powered by Qwen2.5-72B via HuggingFace Inference API.
Implements __call__(question) -> answer so it fits the template unchanged.
"""
def __init__(self):
hf_token = os.environ.get("HF_TOKEN", "")
endpoint = HuggingFaceEndpoint(
repo_id=MODEL_ID,
task="text-generation",
max_new_tokens=1024,
temperature=0.1,
huggingfacehub_api_token=hf_token,
)
llm = ChatHuggingFace(llm=endpoint)
llm_with_tools = llm.bind_tools(TOOLS)
graph = StateGraph(AgentState)
graph.add_node("agent", lambda s: agent_node(s, llm_with_tools))
graph.add_node("tools", tool_node)
graph.set_entry_point("agent")
graph.add_conditional_edges("agent", should_continue, {"tools": "tools", END: END})
graph.add_edge("tools", "agent")
self.app = graph.compile()
print("BasicAgent (LangGraph + Qwen2.5-72B) initialized.")
def __call__(self, question: str, task_id: str = "") -> str:
print(f"Agent received question (first 50 chars): {question[:50]}...")
user_content = question
if task_id:
user_content += f"\n\n[Task ID for attached file: {task_id}]"
try:
final_state = self.app.invoke(
{"messages": [HumanMessage(content=user_content)]},
config={"recursion_limit": 25},
)
answer = final_state["messages"][-1].content.strip()
except Exception as e:
answer = f"ERROR: {e}"
print(f"Agent returning answer: {answer[:80]}")
return answer
# ── run_and_submit_all β€” identical structure to template ──────────────────────
def run_and_submit_all(profile=None):
space_id = os.getenv("SPACE_ID")
if profile:
username = profile.username
print(f"User logged in: {username}")
else:
from types import SimpleNamespace
profile = SimpleNamespace(username="Hemil4")
username = "Hemil4"
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# 1. Instantiate agent
try:
agent = BasicAgent()
except Exception as e:
print(f"Error instantiating agent: {e}")
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
print(agent_code)
# 2. Fetch questions
print(f"Fetching questions from: {questions_url}")
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
return "Fetched questions list is empty or invalid format.", None
print(f"Fetched {len(questions_data)} questions.")
except Exception as e:
return f"Error fetching questions: {e}", None
# 3. Run agent
results_log = []
answers_payload = []
print(f"Running agent on {len(questions_data)} questions...")
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
print(f"Skipping item with missing task_id or question: {item}")
continue
try:
submitted_answer = agent(question_text, task_id)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
print(f"Error running agent on task {task_id}: {e}")
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
if not answers_payload:
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
# 4. Submit
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'No message received.')}"
)
print("Submission successful.")
return final_status, pd.DataFrame(results_log)
except requests.exceptions.HTTPError as e:
error_detail = f"Server responded with status {e.response.status_code}."
try:
error_json = e.response.json()
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
except Exception:
error_detail += f" Response: {e.response.text[:500]}"
return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# ── Gradio UI β€” identical to template ────────────────────────────────────────
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
---
**Disclaimers:**
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
print(f"βœ… SPACE_HOST found: {space_host_startup}")
else:
print("ℹ️ SPACE_HOST not found (running locally?).")
if space_id_startup:
print(f"βœ… SPACE_ID found: {space_id_startup}")
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
print("ℹ️ SPACE_ID not found (running locally?).")
print("-"*60 + "\n")
demo.launch(debug=True, share=False)