gaia_unit4_space / tools /wiki_tools.py
hawkdev's picture
Improve GAIA exact-match handling and Wikipedia wikitext tool
9428cf6
import json
from typing import Any
import requests
import wikipedia
UA = "GAIA-Agent/1.0 (educational; +https://huggingface.co)"
def wikipedia_wikitext(title: str) -> str:
"""Fetch raw wikitext for an English Wikipedia page (via MediaWiki API)."""
if not title.strip():
return "Error: empty title."
url = "https://en.wikipedia.org/w/api.php"
params: dict[str, Any] = {
"action": "parse",
"page": title.strip(),
"prop": "wikitext",
"formatversion": "2",
"format": "json",
}
try:
r = requests.get(url, params=params, timeout=45, headers={"User-Agent": UA})
r.raise_for_status()
data = r.json()
except (requests.RequestException, json.JSONDecodeError) as e:
return f"Wikipedia API error: {e}"
err = data.get("error")
if err:
return f"Wikipedia API error: {err}"
wt = (data.get("parse") or {}).get("wikitext", "")
if not wt:
return "No wikitext returned (check page title)."
if len(wt) > 120_000:
return wt[:120_000] + "\n[truncated]"
return wt
def wikipedia_search(query: str, results: int = 5) -> str:
"""Search English Wikipedia titles."""
if not query.strip():
return "Error: empty query."
wikipedia.set_lang("en")
try:
titles = wikipedia.search(query, results=results)
except Exception as e:
return f"Wikipedia search error: {e}"
if not titles:
return "No titles found."
return "\n".join(f"- {t}" for t in titles)
def wikipedia_summary(title: str, sentences: int = 12) -> str:
"""Fetch a Wikipedia page summary by title (English)."""
if not title.strip():
return "Error: empty title."
wikipedia.set_lang("en")
try:
page = wikipedia.page(title, auto_suggest=True)
summary = wikipedia.summary(title, sentences=sentences, auto_suggest=True)
out = f"Title: {page.title}\nURL: {page.url}\n\n{summary}"
return out[:40_000]
except wikipedia.DisambiguationError as e:
opts = ", ".join(e.options[:8])
return f"Disambiguation; try one of: {opts}"
except Exception as e:
return f"Wikipedia error: {e}"