File size: 6,238 Bytes
75d90d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | from __future__ import annotations
"""Comprehensive, **key‑free** toolset for the LangGraph agent.
Every tool is fully self‑contained and safe to run inside a public
Hugging Face Space – no private API keys required.
Capabilities covered (GAIA L1):
• general web search • Wikipedia summary • light image inspection
• basic math • current time • YouTube video metadata
• inspect Excel files
"""
import datetime
import io
import math
import pathlib
import re
import statistics
from typing import List, Dict, Any
import pandas as pd
import requests
from bs4 import BeautifulSoup
from langchain.tools import tool, BaseTool
from PIL import Image
from pytube import YouTube
# ---------------------------------------------------------------------------
# Helper utilities -----------------------------------------------------------
# ---------------------------------------------------------------------------
def _html_text(soup: BeautifulSoup) -> str:
return re.sub(r"\s+", " ", soup.get_text(" ").strip())
# ---------------------------------------------------------------------------
# Time & math ----------------------------------------------------------------
# ---------------------------------------------------------------------------
@tool
def get_current_time() -> str:
"""Return the current UTC time (ISO‑8601)."""
return datetime.datetime.utcnow().isoformat()
@tool
def calculator(expression: str) -> str:
"""Evaluate an arithmetic **expression** (e.g. "2 + 2*3").
Supported tokens: numbers, + ‑ * / ** % ( ), and constants/funcs from math.*
Returns the result or an error message.
"""
allowed_names = {
k: v for k, v in math.__dict__.items() if not k.startswith("__")
}
try:
result = eval(expression, {"__builtins__": {}}, allowed_names)
return str(result)
except Exception as exc: # pylint: disable=broad-except
return f"Error: {exc}"
# ---------------------------------------------------------------------------
# Web search & Wikipedia -----------------------------------------------------
# ---------------------------------------------------------------------------
_DDG_URL = "https://duckduckgo.com/html/"
_WIKI_API = "https://en.wikipedia.org/api/rest_v1/page/summary/{}"
@tool
def web_search(query: str, max_results: int = 6) -> List[Dict[str, str]]:
"""Return *max_results* DuckDuckGo hits for **query**.
Each hit is a dict with keys: title, url, snippet.
"""
params = {"q": query, "s": "0"}
html = requests.post(_DDG_URL, data=params, timeout=10).text
soup = BeautifulSoup(html, "lxml")
results = []
for a in soup.select("a.result__a", limit=max_results):
title = _html_text(a)
url = a["href"]
snippet_tag = a.find_parent(class_="result").select_one(".result__snippet")
snippet = _html_text(snippet_tag) if snippet_tag else ""
results.append({"title": title, "url": url, "snippet": snippet})
return results
@tool
def wikipedia_summary(title: str) -> str:
"""Return the lead paragraph of a Wikipedia page by **title**."""
url = _WIKI_API.format(requests.utils.quote(title))
resp = requests.get(url, timeout=10)
if resp.status_code != 200:
return f"Error: page '{title}' not found."
data = resp.json()
return data.get("extract", "No extract available.")
# ---------------------------------------------------------------------------
# YouTube metadata -----------------------------------------------------------
# ---------------------------------------------------------------------------
@tool
def youtube_info(url: str) -> Dict[str, Any]:
"""Fetch basic metadata (title, length, author, views) of a YouTube video."""
try:
yt = YouTube(url)
except Exception as exc: # pylint: disable=broad-except
return {"error": str(exc)}
return {
"title": yt.title,
"author": yt.author,
"length_sec": yt.length,
"views": yt.views,
}
# ---------------------------------------------------------------------------
# Image inspection -----------------------------------------------------------
# ---------------------------------------------------------------------------
@tool
def image_info(path: str) -> Dict[str, Any]:
"""Return basic stats for an image file at **path** (W×H, mode, format, mean
pixel value per channel)."""
p = pathlib.Path(path)
if not p.exists():
return {"error": "file not found"}
try:
with Image.open(p) as im:
pixels = list(im.getdata())
except Exception as exc: # pylint: disable=broad-except
return {"error": str(exc)}
# Flatten tuples (RGB) into list of channels
if isinstance(pixels[0], (tuple, list)):
channels = list(zip(*pixels))
means = [statistics.mean(c) for c in channels]
else:
means = [statistics.mean(pixels)]
return {
"width": im.width,
"height": im.height,
"format": im.format,
"mode": im.mode,
"mean_pixel": means,
}
# ---------------------------------------------------------------------------
# Excel inspection -----------------------------------------------------------
# ---------------------------------------------------------------------------
@tool
def excel_preview(path: str, sheet: str | int = 0, nrows: int = 5) -> str:
"""Return the first *nrows* rows of an Excel sheet as a markdown table."""
p = pathlib.Path(path)
if not p.exists():
return "Error: file not found."
try:
df = pd.read_excel(p, sheet_name=sheet, engine="openpyxl", nrows=nrows)
except Exception as exc: # pylint: disable=broad-except
return f"Error: {exc}"
return df.to_markdown(index=False)
# ---------------------------------------------------------------------------
# Export list ----------------------------------------------------------------
# ---------------------------------------------------------------------------
TOOLS: List[BaseTool] = [
get_current_time,
calculator,
web_search,
wikipedia_summary,
youtube_info,
image_info,
excel_preview,
] |