from __future__ import annotations import json import os from typing import Any, Dict, List, Tuple import requests from smolagents.tools import Tool class BrightDataSearchTool(Tool): name = "brightdata_search_engine" description = """ Search Google, Bing, or Yandex and get structured results. Returns search results with URLs, titles, and descriptions. Ideal for gathering current information and news. """ output_type = "string" engines: Tuple[str, ...] = ("google", "bing", "yandex") def __init__(self) -> None: self.inputs = { "query": { "type": "string", "description": "The search query", }, "engine": { "type": "string", "description": "Search engine to use", "enum": list(self.engines), "nullable": True, "default": "google", }, } super().__init__() def forward(self, query: str, engine: str = "google") -> str: """ Search using Bright Data's search API. Args: query: The search query. engine: Search engine to use (google, bing, or yandex). Returns: JSON string with search results or markdown for non-Google engines. """ api_token = os.getenv("BRIGHT_DATA_API_TOKEN") unlocker_zone = os.getenv("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker1") if not api_token: raise ValueError("BRIGHT_DATA_API_TOKEN not found in environment variables") normalized_engine = engine.strip().lower() if normalized_engine not in self.engines: normalized_engine = "google" search_urls = self._build_search_urls(query) search_url = search_urls[normalized_engine] is_google = normalized_engine == "google" api_url = "https://api.brightdata.com/request" headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json", } payload = { "url": search_url, "zone": unlocker_zone, "format": "raw", } if not is_google: payload["data_format"] = "markdown" try: response = requests.post(api_url, json=payload, headers=headers, timeout=120) response.raise_for_status() if is_google: return self._format_google_results(response.json()) return response.text except requests.exceptions.RequestException as e: return json.dumps({"error": str(e)}) def _build_search_urls(self, query: str) -> Dict[str, str]: encoded_query = requests.utils.quote(query) return { "google": f"https://www.google.com/search?q={encoded_query}&brd_json=1", "bing": f"https://www.bing.com/search?q={encoded_query}", "yandex": f"https://yandex.com/search/?text={encoded_query}", } def _format_google_results(self, data: Dict[str, Any]) -> str: results: Dict[str, Any] = { "organic": data.get("organic", []), "images": self._extract_image_links(data.get("images", [])), "related": data.get("related", []), "ai_overview": data.get("ai_overview"), } return json.dumps(results, indent=2) def _extract_image_links(self, images: List[Dict[str, Any]]) -> List[str]: return [img.get("link", "") for img in images if img.get("link")]