|
|
import os |
|
|
import requests |
|
|
from typing import Dict, Any, Optional, List |
|
|
from pydantic import Field |
|
|
from .search_base import SearchBase |
|
|
from .tool import Tool, Toolkit |
|
|
from evoagentx.core.logging import logger |
|
|
import dotenv |
|
|
|
|
|
dotenv.load_dotenv() |
|
|
|
|
|
class SearchSerpAPI(SearchBase): |
|
|
""" |
|
|
SerpAPI search tool that provides access to multiple search engines including |
|
|
Google, Bing, Baidu, Yahoo, and DuckDuckGo through a unified interface. |
|
|
""" |
|
|
|
|
|
api_key: Optional[str] = Field(default=None, description="SerpAPI authentication key") |
|
|
default_engine: Optional[str] = Field(default="google", description="Default search engine") |
|
|
default_location: Optional[str] = Field(default=None, description="Default geographic location") |
|
|
default_language: Optional[str] = Field(default="en", description="Default interface language") |
|
|
default_country: Optional[str] = Field(default="us", description="Default country code") |
|
|
enable_content_scraping: Optional[bool] = Field(default=True, description="Enable full content scraping") |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
name: str = "SearchSerpAPI", |
|
|
num_search_pages: Optional[int] = 5, |
|
|
max_content_words: Optional[int] = None, |
|
|
api_key: Optional[str] = None, |
|
|
default_engine: Optional[str] = "google", |
|
|
default_location: Optional[str] = None, |
|
|
default_language: Optional[str] = "en", |
|
|
default_country: Optional[str] = "us", |
|
|
enable_content_scraping: Optional[bool] = True, |
|
|
**kwargs |
|
|
): |
|
|
""" |
|
|
Initialize the SerpAPI Search tool. |
|
|
|
|
|
Args: |
|
|
name (str): Name of the tool |
|
|
num_search_pages (int): Number of search results to retrieve |
|
|
max_content_words (int): Maximum number of words to include in content |
|
|
api_key (str): SerpAPI authentication key (can also use SERPAPI_KEY env var) |
|
|
default_engine (str): Default search engine (google, bing, baidu, yahoo, duckduckgo) |
|
|
default_location (str): Default geographic location for searches |
|
|
default_language (str): Default interface language |
|
|
default_country (str): Default country code |
|
|
enable_content_scraping (bool): Whether to scrape full page content |
|
|
**kwargs: Additional keyword arguments for parent class initialization |
|
|
""" |
|
|
super().__init__( |
|
|
name=name, |
|
|
num_search_pages=num_search_pages, |
|
|
max_content_words=max_content_words, |
|
|
api_key=api_key, |
|
|
default_engine=default_engine, |
|
|
default_location=default_location, |
|
|
default_language=default_language, |
|
|
default_country=default_country, |
|
|
enable_content_scraping=enable_content_scraping, |
|
|
**kwargs |
|
|
) |
|
|
|
|
|
|
|
|
self.api_key = api_key or os.getenv('SERPAPI_KEY', '') |
|
|
self.base_url = "https://serpapi.com/search.json" |
|
|
|
|
|
if not self.api_key: |
|
|
logger.warning("SerpAPI key not found. Set SERPAPI_KEY environment variable or pass api_key parameter.") |
|
|
|
|
|
def _build_serpapi_params(self, query: str, engine: str = None, location: str = None, |
|
|
language: str = None, country: str = None, search_type: str = None, |
|
|
num_results: int = None) -> Dict[str, Any]: |
|
|
""" |
|
|
Build SerpAPI request parameters. |
|
|
|
|
|
Args: |
|
|
query (str): Search query |
|
|
engine (str): Search engine to use |
|
|
location (str): Geographic location |
|
|
language (str): Interface language |
|
|
country (str): Country code |
|
|
search_type (str): Type of search (web, images, news, shopping, maps) |
|
|
num_results (int): Number of results to retrieve |
|
|
|
|
|
Returns: |
|
|
Dict[str, Any]: SerpAPI request parameters |
|
|
""" |
|
|
params = { |
|
|
"q": query, |
|
|
"api_key": self.api_key, |
|
|
"num": num_results or self.num_search_pages, |
|
|
} |
|
|
|
|
|
|
|
|
if location or self.default_location: |
|
|
params["location"] = location or self.default_location |
|
|
|
|
|
if language or self.default_language: |
|
|
params["hl"] = language or self.default_language |
|
|
|
|
|
if country or self.default_country: |
|
|
params["gl"] = country or self.default_country |
|
|
|
|
|
|
|
|
if search_type and search_type != "web": |
|
|
search_type_map = { |
|
|
"images": "isch", |
|
|
"news": "nws", |
|
|
"shopping": "shop", |
|
|
"maps": "lcl" |
|
|
} |
|
|
if search_type in search_type_map: |
|
|
params["tbm"] = search_type_map[search_type] |
|
|
|
|
|
return params |
|
|
|
|
|
def _execute_serpapi_search(self, params: Dict[str, Any]) -> Dict[str, Any]: |
|
|
""" |
|
|
Execute search using direct HTTP requests to SerpAPI. |
|
|
|
|
|
Args: |
|
|
params (Dict[str, Any]): Search parameters |
|
|
|
|
|
Returns: |
|
|
Dict[str, Any]: SerpAPI response data |
|
|
|
|
|
Raises: |
|
|
Exception: For API errors |
|
|
""" |
|
|
try: |
|
|
response = requests.get(self.base_url, params=params, timeout=30) |
|
|
response.raise_for_status() |
|
|
|
|
|
data = response.json() |
|
|
|
|
|
|
|
|
if "error" in data: |
|
|
raise Exception(f"SerpAPI error: {data['error']}") |
|
|
|
|
|
return data |
|
|
|
|
|
except requests.exceptions.RequestException as e: |
|
|
raise Exception(f"SerpAPI request failed: {str(e)}") |
|
|
except Exception as e: |
|
|
raise Exception(f"SerpAPI search failed: {str(e)}") |
|
|
|
|
|
def _process_serpapi_results(self, serpapi_data: Dict[str, Any], max_content_words: int = None) -> Dict[str, Any]: |
|
|
""" |
|
|
Process SerpAPI results into structured format with processed results + raw data. |
|
|
|
|
|
Args: |
|
|
serpapi_data (Dict[str, Any]): Raw SerpAPI response |
|
|
max_content_words (int): Maximum words per result content |
|
|
|
|
|
Returns: |
|
|
Dict[str, Any]: Structured response with processed results and raw data |
|
|
""" |
|
|
processed_results = [] |
|
|
|
|
|
|
|
|
if knowledge_graph := serpapi_data.get("knowledge_graph", {}): |
|
|
if description := knowledge_graph.get("description"): |
|
|
title = knowledge_graph.get("title", "Unknown") |
|
|
content = f"**{title}**" |
|
|
|
|
|
|
|
|
if kg_type := knowledge_graph.get("type"): |
|
|
content += f" ({kg_type})" |
|
|
content += f"\n\n{description}" |
|
|
|
|
|
|
|
|
if kg_list := knowledge_graph.get("list", {}): |
|
|
content += "\n\n**Key Information:**" |
|
|
for key, value in list(kg_list.items())[:5]: |
|
|
if isinstance(value, list) and value: |
|
|
formatted_key = key.replace('_', ' ').title() |
|
|
formatted_value = ', '.join(str(v) for v in value[:3]) |
|
|
content += f"\n• {formatted_key}: {formatted_value}" |
|
|
|
|
|
processed_results.append({ |
|
|
"title": f"Knowledge: {title}", |
|
|
"content": self._truncate_content(content, max_content_words or 200), |
|
|
"url": knowledge_graph.get("source", {}).get("link", ""), |
|
|
"type": "knowledge_graph", |
|
|
"priority": 1 |
|
|
}) |
|
|
|
|
|
|
|
|
for item in serpapi_data.get("organic_results", []): |
|
|
url = item.get("link", "") |
|
|
title = item.get("title", "No Title") |
|
|
snippet = item.get("snippet", "") |
|
|
position = item.get("position", 0) |
|
|
|
|
|
|
|
|
result = { |
|
|
"title": title, |
|
|
"content": self._truncate_content(snippet, max_content_words or 400), |
|
|
"url": url, |
|
|
"type": "organic", |
|
|
"priority": 2, |
|
|
"position": position |
|
|
} |
|
|
|
|
|
|
|
|
if self.enable_content_scraping and url and url.startswith(('http://', 'https://')): |
|
|
try: |
|
|
scraped_title, scraped_content = self._scrape_page(url) |
|
|
if scraped_content and scraped_content.strip(): |
|
|
|
|
|
if scraped_title and scraped_title.strip(): |
|
|
result["title"] = scraped_title |
|
|
|
|
|
result["site_content"] = self._truncate_content(scraped_content, max_content_words or 400) |
|
|
else: |
|
|
result["site_content"] = None |
|
|
except Exception as e: |
|
|
logger.debug(f"Content scraping failed for {url}: {str(e)}") |
|
|
result["site_content"] = None |
|
|
else: |
|
|
result["site_content"] = None |
|
|
|
|
|
|
|
|
if snippet or result.get("site_content"): |
|
|
processed_results.append(result) |
|
|
|
|
|
|
|
|
raw_data = {} |
|
|
raw_sections = [ |
|
|
"local_results", "news_results", "shopping_results", |
|
|
"related_questions", "recipes_results", "images_results" |
|
|
] |
|
|
|
|
|
for section in raw_sections: |
|
|
if section in serpapi_data and serpapi_data[section]: |
|
|
|
|
|
if section == "local_results": |
|
|
|
|
|
places = serpapi_data[section].get("places", [])[:3] |
|
|
if places: |
|
|
raw_data[section] = {"places": places} |
|
|
else: |
|
|
|
|
|
raw_data[section] = serpapi_data[section][:3] |
|
|
|
|
|
|
|
|
search_metadata = {} |
|
|
if search_meta := serpapi_data.get("search_metadata", {}): |
|
|
search_metadata = { |
|
|
"query": search_meta.get("query", ""), |
|
|
"location": search_meta.get("location", ""), |
|
|
"total_results": search_meta.get("total_results", ""), |
|
|
"search_time": search_meta.get("total_time_taken", "") |
|
|
} |
|
|
|
|
|
|
|
|
processed_results.sort(key=lambda x: (x.get("priority", 999), x.get("position", 0))) |
|
|
|
|
|
return { |
|
|
"results": processed_results, |
|
|
"raw_data": raw_data if raw_data else None, |
|
|
"search_metadata": search_metadata if search_metadata else None, |
|
|
"error": None |
|
|
} |
|
|
|
|
|
def _handle_api_errors(self, error: Exception) -> str: |
|
|
""" |
|
|
Handle SerpAPI specific errors with appropriate messages. |
|
|
|
|
|
Args: |
|
|
error (Exception): The exception that occurred |
|
|
|
|
|
Returns: |
|
|
str: User-friendly error message |
|
|
""" |
|
|
error_str = str(error).lower() |
|
|
|
|
|
if "api key" in error_str or "unauthorized" in error_str: |
|
|
return "Invalid or missing SerpAPI key. Please set SERPAPI_KEY environment variable." |
|
|
elif "rate limit" in error_str or "too many requests" in error_str: |
|
|
return "SerpAPI rate limit exceeded. Please try again later." |
|
|
elif "quota" in error_str or "credit" in error_str: |
|
|
return "SerpAPI quota exceeded. Please check your plan limits." |
|
|
elif "timeout" in error_str: |
|
|
return "SerpAPI request timeout. Please try again." |
|
|
else: |
|
|
return f"SerpAPI error: {str(error)}" |
|
|
|
|
|
def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, |
|
|
engine: str = None, location: str = None, language: str = None, |
|
|
country: str = None, search_type: str = None) -> Dict[str, Any]: |
|
|
""" |
|
|
Search using SerpAPI with comprehensive parameter support. |
|
|
|
|
|
Args: |
|
|
query (str): The search query |
|
|
num_search_pages (int): Number of search results to retrieve |
|
|
max_content_words (int): Maximum number of words to include in content |
|
|
engine (str): Search engine (google, bing, baidu, yahoo, duckduckgo) |
|
|
location (str): Geographic location for localized results |
|
|
language (str): Interface language (e.g., 'en', 'es', 'fr') |
|
|
country (str): Country code for country-specific results (e.g., 'us', 'uk') |
|
|
search_type (str): Type of search (web, images, news, shopping, maps) |
|
|
|
|
|
Returns: |
|
|
Dict[str, Any]: Contains search results and optional error message |
|
|
""" |
|
|
|
|
|
num_search_pages = num_search_pages or self.num_search_pages |
|
|
max_content_words = max_content_words or self.max_content_words |
|
|
|
|
|
if not self.api_key: |
|
|
error_msg = ( |
|
|
"SerpAPI key is required. Please set SERPAPI_KEY environment variable " |
|
|
"or pass api_key parameter. Get your key from: https://serpapi.com/" |
|
|
) |
|
|
logger.error(error_msg) |
|
|
return {"results": [], "raw_data": None, "search_metadata": None, "error": error_msg} |
|
|
|
|
|
try: |
|
|
search_engine = engine or self.default_engine |
|
|
logger.info(f"Searching {search_engine} via SerpAPI: {query}, " |
|
|
f"num_results={num_search_pages}, max_content_words={max_content_words}") |
|
|
|
|
|
|
|
|
params = self._build_serpapi_params( |
|
|
query=query, |
|
|
engine=search_engine, |
|
|
location=location, |
|
|
language=language, |
|
|
country=country, |
|
|
search_type=search_type, |
|
|
num_results=num_search_pages |
|
|
) |
|
|
|
|
|
|
|
|
serpapi_data = self._execute_serpapi_search(params) |
|
|
|
|
|
|
|
|
response_data = self._process_serpapi_results(serpapi_data, max_content_words) |
|
|
|
|
|
logger.info(f"Successfully retrieved {len(response_data['results'])} processed results") |
|
|
return response_data |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = self._handle_api_errors(e) |
|
|
logger.error(f"SerpAPI search failed: {error_msg}") |
|
|
return {"results": [], "raw_data": None, "search_metadata": None, "error": error_msg} |
|
|
|
|
|
|
|
|
class SerpAPITool(Tool): |
|
|
name: str = "serpapi_search" |
|
|
description: str = "Search multiple search engines using SerpAPI with comprehensive result processing and content scraping" |
|
|
inputs: Dict[str, Dict[str, str]] = { |
|
|
"query": { |
|
|
"type": "string", |
|
|
"description": "The search query to execute" |
|
|
}, |
|
|
"num_search_pages": { |
|
|
"type": "integer", |
|
|
"description": "Number of search results to retrieve. Default: 5" |
|
|
}, |
|
|
"max_content_words": { |
|
|
"type": "integer", |
|
|
"description": "Maximum number of words to include in content per result. None means no limit. Default: None" |
|
|
}, |
|
|
"engine": { |
|
|
"type": "string", |
|
|
"description": "Search engine to use: google, bing, baidu, yahoo, duckduckgo. Default: google" |
|
|
}, |
|
|
"location": { |
|
|
"type": "string", |
|
|
"description": "Geographic location for localized results (e.g., 'New York, NY', 'London, UK')" |
|
|
}, |
|
|
"language": { |
|
|
"type": "string", |
|
|
"description": "Interface language code (e.g., 'en', 'es', 'fr', 'de'). Default: en" |
|
|
}, |
|
|
"country": { |
|
|
"type": "string", |
|
|
"description": "Country code for country-specific results (e.g., 'us', 'uk', 'ca'). Default: us" |
|
|
}, |
|
|
"search_type": { |
|
|
"type": "string", |
|
|
"description": "Type of search: web, images, news, shopping, maps. Default: web" |
|
|
} |
|
|
} |
|
|
required: Optional[List[str]] = ["query"] |
|
|
|
|
|
def __init__(self, search_serpapi: SearchSerpAPI = None): |
|
|
super().__init__() |
|
|
self.search_serpapi = search_serpapi |
|
|
|
|
|
def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, |
|
|
engine: str = None, location: str = None, language: str = None, |
|
|
country: str = None, search_type: str = None) -> Dict[str, Any]: |
|
|
"""Execute SerpAPI search using the SearchSerpAPI instance.""" |
|
|
if not self.search_serpapi: |
|
|
raise RuntimeError("SerpAPI search instance not initialized") |
|
|
|
|
|
try: |
|
|
return self.search_serpapi.search( |
|
|
query=query, |
|
|
num_search_pages=num_search_pages, |
|
|
max_content_words=max_content_words, |
|
|
engine=engine, |
|
|
location=location, |
|
|
language=language, |
|
|
country=country, |
|
|
search_type=search_type |
|
|
) |
|
|
except Exception as e: |
|
|
return {"results": [], "error": f"Error executing SerpAPI search: {str(e)}"} |
|
|
|
|
|
|
|
|
class SerpAPIToolkit(Toolkit): |
|
|
def __init__( |
|
|
self, |
|
|
name: str = "SerpAPIToolkit", |
|
|
api_key: Optional[str] = None, |
|
|
num_search_pages: Optional[int] = 5, |
|
|
max_content_words: Optional[int] = None, |
|
|
default_engine: Optional[str] = "google", |
|
|
default_location: Optional[str] = None, |
|
|
default_language: Optional[str] = "en", |
|
|
default_country: Optional[str] = "us", |
|
|
enable_content_scraping: Optional[bool] = True, |
|
|
**kwargs |
|
|
): |
|
|
""" |
|
|
Initialize SerpAPI Toolkit. |
|
|
|
|
|
Args: |
|
|
name (str): Name of the toolkit |
|
|
api_key (str): SerpAPI authentication key |
|
|
num_search_pages (int): Default number of search results to retrieve |
|
|
max_content_words (int): Default maximum words per result content |
|
|
default_engine (str): Default search engine |
|
|
default_location (str): Default geographic location |
|
|
default_language (str): Default interface language |
|
|
default_country (str): Default country code |
|
|
enable_content_scraping (bool): Whether to enable content scraping |
|
|
**kwargs: Additional keyword arguments |
|
|
""" |
|
|
|
|
|
search_serpapi = SearchSerpAPI( |
|
|
name="SearchSerpAPI", |
|
|
api_key=api_key, |
|
|
num_search_pages=num_search_pages, |
|
|
max_content_words=max_content_words, |
|
|
default_engine=default_engine, |
|
|
default_location=default_location, |
|
|
default_language=default_language, |
|
|
default_country=default_country, |
|
|
enable_content_scraping=enable_content_scraping, |
|
|
**kwargs |
|
|
) |
|
|
|
|
|
|
|
|
tools = [ |
|
|
SerpAPITool(search_serpapi=search_serpapi) |
|
|
] |
|
|
|
|
|
|
|
|
super().__init__(name=name, tools=tools) |
|
|
|
|
|
|
|
|
self.search_serpapi = search_serpapi |
|
|
|
|
|
|
|
|
|