| from __future__ import annotations |
|
|
| """Comprehensive, **key‑free** toolset for the LangGraph agent. |
| Every tool is fully self‑contained and safe to run inside a public |
| Hugging Face Space – no private API keys required. |
| Capabilities covered (GAIA L1): |
| • general web search • Wikipedia summary • light image inspection |
| • basic math • current time • YouTube video metadata |
| • inspect Excel files |
| """ |
|
|
| import datetime |
| import io |
| import math |
| import pathlib |
| import re |
| import statistics |
| from typing import List, Dict, Any |
|
|
| import pandas as pd |
| import requests |
| from bs4 import BeautifulSoup |
| from langchain.tools import tool, BaseTool |
| from PIL import Image |
| from pytube import YouTube |
|
|
|
|
| |
| |
| |
|
|
| def _html_text(soup: BeautifulSoup) -> str: |
| return re.sub(r"\s+", " ", soup.get_text(" ").strip()) |
|
|
|
|
| |
| |
| |
|
|
| @tool |
| def get_current_time() -> str: |
| """Return the current UTC time (ISO‑8601).""" |
| return datetime.datetime.utcnow().isoformat() |
|
|
|
|
| @tool |
| def calculator(expression: str) -> str: |
| """Evaluate an arithmetic **expression** (e.g. "2 + 2*3"). |
| Supported tokens: numbers, + ‑ * / ** % ( ), and constants/funcs from math.* |
| Returns the result or an error message. |
| """ |
|
|
| allowed_names = { |
| k: v for k, v in math.__dict__.items() if not k.startswith("__") |
| } |
| try: |
| result = eval(expression, {"__builtins__": {}}, allowed_names) |
| return str(result) |
| except Exception as exc: |
| return f"Error: {exc}" |
|
|
|
|
| |
| |
| |
|
|
| _DDG_URL = "https://duckduckgo.com/html/" |
| _WIKI_API = "https://en.wikipedia.org/api/rest_v1/page/summary/{}" |
|
|
|
|
| @tool |
| def web_search(query: str, max_results: int = 6) -> List[Dict[str, str]]: |
| """Return *max_results* DuckDuckGo hits for **query**. |
| Each hit is a dict with keys: title, url, snippet. |
| """ |
| params = {"q": query, "s": "0"} |
| html = requests.post(_DDG_URL, data=params, timeout=10).text |
| soup = BeautifulSoup(html, "lxml") |
|
|
| results = [] |
| for a in soup.select("a.result__a", limit=max_results): |
| title = _html_text(a) |
| url = a["href"] |
| snippet_tag = a.find_parent(class_="result").select_one(".result__snippet") |
| snippet = _html_text(snippet_tag) if snippet_tag else "" |
| results.append({"title": title, "url": url, "snippet": snippet}) |
| return results |
|
|
|
|
| @tool |
| def wikipedia_summary(title: str) -> str: |
| """Return the lead paragraph of a Wikipedia page by **title**.""" |
| url = _WIKI_API.format(requests.utils.quote(title)) |
| resp = requests.get(url, timeout=10) |
| if resp.status_code != 200: |
| return f"Error: page '{title}' not found." |
| data = resp.json() |
| return data.get("extract", "No extract available.") |
|
|
|
|
| |
| |
| |
|
|
| @tool |
| def youtube_info(url: str) -> Dict[str, Any]: |
| """Fetch basic metadata (title, length, author, views) of a YouTube video.""" |
| try: |
| yt = YouTube(url) |
| except Exception as exc: |
| return {"error": str(exc)} |
| return { |
| "title": yt.title, |
| "author": yt.author, |
| "length_sec": yt.length, |
| "views": yt.views, |
| } |
|
|
|
|
| |
| |
| |
|
|
| @tool |
| def image_info(path: str) -> Dict[str, Any]: |
| """Return basic stats for an image file at **path** (W×H, mode, format, mean |
| pixel value per channel).""" |
| p = pathlib.Path(path) |
| if not p.exists(): |
| return {"error": "file not found"} |
|
|
| try: |
| with Image.open(p) as im: |
| pixels = list(im.getdata()) |
| except Exception as exc: |
| return {"error": str(exc)} |
|
|
| |
| if isinstance(pixels[0], (tuple, list)): |
| channels = list(zip(*pixels)) |
| means = [statistics.mean(c) for c in channels] |
| else: |
| means = [statistics.mean(pixels)] |
|
|
| return { |
| "width": im.width, |
| "height": im.height, |
| "format": im.format, |
| "mode": im.mode, |
| "mean_pixel": means, |
| } |
|
|
|
|
| |
| |
| |
|
|
| @tool |
| def excel_preview(path: str, sheet: str | int = 0, nrows: int = 5) -> str: |
| """Return the first *nrows* rows of an Excel sheet as a markdown table.""" |
| p = pathlib.Path(path) |
| if not p.exists(): |
| return "Error: file not found." |
|
|
| try: |
| df = pd.read_excel(p, sheet_name=sheet, engine="openpyxl", nrows=nrows) |
| except Exception as exc: |
| return f"Error: {exc}" |
|
|
| return df.to_markdown(index=False) |
|
|
|
|
| |
| |
| |
|
|
| TOOLS: List[BaseTool] = [ |
| get_current_time, |
| calculator, |
| web_search, |
| wikipedia_summary, |
| youtube_info, |
| image_info, |
| excel_preview, |
| ] |