from __future__ import annotations """Comprehensive, **key‑free** toolset for the LangGraph agent. Every tool is fully self‑contained and safe to run inside a public Hugging Face Space – no private API keys required. Capabilities covered (GAIA L1): • general web search • Wikipedia summary • light image inspection • basic math • current time • YouTube video metadata • inspect Excel files """ import datetime import io import math import pathlib import re import statistics from typing import List, Dict, Any import pandas as pd import requests from bs4 import BeautifulSoup from langchain.tools import tool, BaseTool from PIL import Image from pytube import YouTube # --------------------------------------------------------------------------- # Helper utilities ----------------------------------------------------------- # --------------------------------------------------------------------------- def _html_text(soup: BeautifulSoup) -> str: return re.sub(r"\s+", " ", soup.get_text(" ").strip()) # --------------------------------------------------------------------------- # Time & math ---------------------------------------------------------------- # --------------------------------------------------------------------------- @tool def get_current_time() -> str: """Return the current UTC time (ISO‑8601).""" return datetime.datetime.utcnow().isoformat() @tool def calculator(expression: str) -> str: """Evaluate an arithmetic **expression** (e.g. "2 + 2*3"). Supported tokens: numbers, + ‑ * / ** % ( ), and constants/funcs from math.* Returns the result or an error message. """ allowed_names = { k: v for k, v in math.__dict__.items() if not k.startswith("__") } try: result = eval(expression, {"__builtins__": {}}, allowed_names) return str(result) except Exception as exc: # pylint: disable=broad-except return f"Error: {exc}" # --------------------------------------------------------------------------- # Web search & Wikipedia ----------------------------------------------------- # --------------------------------------------------------------------------- _DDG_URL = "https://duckduckgo.com/html/" _WIKI_API = "https://en.wikipedia.org/api/rest_v1/page/summary/{}" @tool def web_search(query: str, max_results: int = 6) -> List[Dict[str, str]]: """Return *max_results* DuckDuckGo hits for **query**. Each hit is a dict with keys: title, url, snippet. """ params = {"q": query, "s": "0"} html = requests.post(_DDG_URL, data=params, timeout=10).text soup = BeautifulSoup(html, "lxml") results = [] for a in soup.select("a.result__a", limit=max_results): title = _html_text(a) url = a["href"] snippet_tag = a.find_parent(class_="result").select_one(".result__snippet") snippet = _html_text(snippet_tag) if snippet_tag else "" results.append({"title": title, "url": url, "snippet": snippet}) return results @tool def wikipedia_summary(title: str) -> str: """Return the lead paragraph of a Wikipedia page by **title**.""" url = _WIKI_API.format(requests.utils.quote(title)) resp = requests.get(url, timeout=10) if resp.status_code != 200: return f"Error: page '{title}' not found." data = resp.json() return data.get("extract", "No extract available.") # --------------------------------------------------------------------------- # YouTube metadata ----------------------------------------------------------- # --------------------------------------------------------------------------- @tool def youtube_info(url: str) -> Dict[str, Any]: """Fetch basic metadata (title, length, author, views) of a YouTube video.""" try: yt = YouTube(url) except Exception as exc: # pylint: disable=broad-except return {"error": str(exc)} return { "title": yt.title, "author": yt.author, "length_sec": yt.length, "views": yt.views, } # --------------------------------------------------------------------------- # Image inspection ----------------------------------------------------------- # --------------------------------------------------------------------------- @tool def image_info(path: str) -> Dict[str, Any]: """Return basic stats for an image file at **path** (W×H, mode, format, mean pixel value per channel).""" p = pathlib.Path(path) if not p.exists(): return {"error": "file not found"} try: with Image.open(p) as im: pixels = list(im.getdata()) except Exception as exc: # pylint: disable=broad-except return {"error": str(exc)} # Flatten tuples (RGB) into list of channels if isinstance(pixels[0], (tuple, list)): channels = list(zip(*pixels)) means = [statistics.mean(c) for c in channels] else: means = [statistics.mean(pixels)] return { "width": im.width, "height": im.height, "format": im.format, "mode": im.mode, "mean_pixel": means, } # --------------------------------------------------------------------------- # Excel inspection ----------------------------------------------------------- # --------------------------------------------------------------------------- @tool def excel_preview(path: str, sheet: str | int = 0, nrows: int = 5) -> str: """Return the first *nrows* rows of an Excel sheet as a markdown table.""" p = pathlib.Path(path) if not p.exists(): return "Error: file not found." try: df = pd.read_excel(p, sheet_name=sheet, engine="openpyxl", nrows=nrows) except Exception as exc: # pylint: disable=broad-except return f"Error: {exc}" return df.to_markdown(index=False) # --------------------------------------------------------------------------- # Export list ---------------------------------------------------------------- # --------------------------------------------------------------------------- TOOLS: List[BaseTool] = [ get_current_time, calculator, web_search, wikipedia_summary, youtube_info, image_info, excel_preview, ]