""" Custom Tools for GAIA Benchmark Agent Working tools: 1. wikipedia_search - Search Wikipedia for factual information 2. fetch_url_content - Fetch and extract text from web pages """ import os import requests from smolagents import tool from bs4 import BeautifulSoup @tool def wikipedia_search(query: str, lang: str = "en") -> str: """Searches Wikipedia and returns a summary of the most relevant article. Args: query: The search query (e.g., "Mercedes Sosa discography") lang: Language code for Wikipedia (default: "en") Returns: The article title and summary text, or an error message if not found. """ try: search_url = f"https://{lang}.wikipedia.org/w/api.php" headers = { "User-Agent": "GAIABenchmarkAgent/1.0 (Educational project)" } # Search for the query search_params = { "action": "query", "list": "search", "srsearch": query, "format": "json", "srlimit": 1 } response = requests.get(search_url, params=search_params, headers=headers, timeout=10) response.raise_for_status() search_data = response.json() search_results = search_data.get("query", {}).get("search", []) if not search_results: return f"No Wikipedia article found for: {query}" page_title = search_results[0]["title"] # Get the page content content_params = { "action": "query", "titles": page_title, "prop": "extracts", "exintro": False, "explaintext": True, "format": "json", "exsectionformat": "plain" } response = requests.get(search_url, params=content_params, headers=headers, timeout=10) response.raise_for_status() content_data = response.json() pages = content_data.get("query", {}).get("pages", {}) if not pages: return f"Could not retrieve content for: {page_title}" page = list(pages.values())[0] extract = page.get("extract", "") if not extract: return f"Wikipedia article '{page_title}' has no text content." if len(extract) > 8000: extract = extract[:8000] + "\n\n[Content truncated...]" return f"Wikipedia: {page_title}\n\n{extract}" except requests.exceptions.RequestException as e: return f"ERROR: Failed to search Wikipedia - {str(e)}" except Exception as e: return f"ERROR: Wikipedia search failed - {str(e)}" @tool def fetch_url_content(url: str) -> str: """Fetches and extracts text content from a given URL. Args: url: The URL to fetch content from Returns: The extracted text content from the webpage, or an error message. """ try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } response = requests.get(url, headers=headers, timeout=15) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') # Remove non-content elements for element in soup(["script", "style", "nav", "header", "footer"]): element.decompose() text = soup.get_text() # Clean up whitespace lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = '\n'.join(chunk for chunk in chunks if chunk) if len(text) > 5000: text = text[:5000] + "\n\n[Content truncated]" return f"Content from {url}:\n\n{text}" except requests.exceptions.RequestException as e: return f"ERROR: Failed to fetch URL - {str(e)}" except Exception as e: return f"ERROR: {str(e)}" # Export tools for use in agent.py custom_tools = [ wikipedia_search, fetch_url_content, ]