selfevolveagent / evoagentx /tools /search_serperapi.py
iLOVE2D's picture
Upload 2846 files
5374a2d verified
import os
import requests
from typing import Dict, Any, Optional, List
from pydantic import Field
from .search_base import SearchBase
from .tool import Tool, Toolkit
from evoagentx.core.logging import logger
import dotenv
dotenv.load_dotenv()
class SearchSerperAPI(SearchBase):
"""
SerperAPI search tool that provides access to Google search results
through a simple and efficient API interface.
"""
api_key: Optional[str] = Field(default=None, description="SerperAPI authentication key")
default_location: Optional[str] = Field(default=None, description="Default geographic location")
default_language: Optional[str] = Field(default="en", description="Default interface language")
default_country: Optional[str] = Field(default="us", description="Default country code")
enable_content_scraping: Optional[bool] = Field(default=True, description="Enable full content scraping")
def __init__(
self,
name: str = "SearchSerperAPI",
num_search_pages: Optional[int] = 10,
max_content_words: Optional[int] = None,
api_key: Optional[str] = None,
default_location: Optional[str] = None,
default_language: Optional[str] = "en",
default_country: Optional[str] = "us",
enable_content_scraping: Optional[bool] = True,
**kwargs
):
"""
Initialize the SerperAPI Search tool.
Args:
name (str): Name of the tool
num_search_pages (int): Number of search results to retrieve
max_content_words (int): Maximum number of words to include in content
api_key (str): SerperAPI authentication key (can also use SERPERAPI_KEY env var)
default_location (str): Default geographic location for searches
default_language (str): Default interface language
default_country (str): Default country code
enable_content_scraping (bool): Whether to scrape full page content
**kwargs: Additional keyword arguments for parent class initialization
"""
super().__init__(
name=name,
num_search_pages=num_search_pages,
max_content_words=max_content_words,
api_key=api_key,
default_location=default_location,
default_language=default_language,
default_country=default_country,
enable_content_scraping=enable_content_scraping,
**kwargs
)
# Get API key from parameter or environment variable
self.api_key = api_key or os.getenv('SERPERAPI_KEY', '')
self.base_url = "https://google.serper.dev/search"
if not self.api_key:
logger.warning("SerperAPI key not found. Set SERPERAPI_KEY environment variable or pass api_key parameter.")
def _build_serperapi_payload(self, query: str, location: str = None,
language: str = None, country: str = None,
num_results: int = None) -> Dict[str, Any]:
"""
Build SerperAPI request payload.
Args:
query (str): Search query
location (str): Geographic location
language (str): Interface language
country (str): Country code
num_results (int): Number of results to retrieve
Returns:
Dict[str, Any]: SerperAPI request payload
"""
payload = {
"q": query
}
# Add optional parameters if provided
if num_results:
payload["num"] = num_results
if location or self.default_location:
payload["location"] = location or self.default_location
if language or self.default_language:
payload["hl"] = language or self.default_language
if country or self.default_country:
payload["gl"] = country or self.default_country
return payload
def _execute_serperapi_search(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""
Execute search using direct HTTP POST requests to SerperAPI.
Args:
payload (Dict[str, Any]): Search payload
Returns:
Dict[str, Any]: SerperAPI response data
Raises:
Exception: For API errors
"""
try:
headers = {
'X-API-KEY': self.api_key,
'Content-Type': 'application/json'
}
response = requests.post(self.base_url, headers=headers, json=payload, timeout=30)
response.raise_for_status()
data = response.json()
# Check for SerperAPI errors in response
if "error" in data:
raise Exception(f"SerperAPI error: {data['error']}")
return data
except requests.exceptions.RequestException as e:
raise Exception(f"SerperAPI request failed: {str(e)}")
except Exception as e:
raise Exception(f"SerperAPI search failed: {str(e)}")
def _process_serperapi_results(self, serperapi_data: Dict[str, Any], max_content_words: int = None) -> Dict[str, Any]:
"""
Process SerperAPI results into structured format with processed results + raw data.
Args:
serperapi_data (Dict[str, Any]): Raw SerperAPI response
max_content_words (int): Maximum words per result content
Returns:
Dict[str, Any]: Structured response with processed results and raw data
"""
processed_results = []
# 1. Process Knowledge Graph (highest priority)
if knowledge_graph := serperapi_data.get("knowledgeGraph", {}):
if description := knowledge_graph.get("description"):
title = knowledge_graph.get("title", "Unknown")
content = f"**{title}**\n\n{description}"
# Add attributes if available
if attributes := knowledge_graph.get("attributes", {}):
content += "\n\n**Key Information:**"
for key, value in list(attributes.items())[:5]: # Limit to 5 attributes
formatted_key = key.replace('_', ' ').title()
content += f"\n• {formatted_key}: {value}"
processed_results.append({
"title": f"Knowledge: {title}",
"content": self._truncate_content(content, max_content_words or 200),
"url": knowledge_graph.get("descriptionLink", ""),
"type": "knowledge_graph",
"priority": 1
})
# 2. Process Organic Results with scraping
for item in serperapi_data.get("organic", []):
url = item.get("link", "")
title = item.get("title", "No Title")
snippet = item.get("snippet", "")
position = item.get("position", 0)
# Prepare the result dict
result = {
"title": title,
"content": self._truncate_content(snippet, max_content_words or 400),
"url": url,
"type": "organic",
"priority": 2,
"position": position
}
# Try to scrape full content if enabled and add as site_content
if self.enable_content_scraping and url and url.startswith(('http://', 'https://')):
try:
scraped_title, scraped_content = self._scrape_page(url)
if scraped_content and scraped_content.strip():
# Update title if scraped title is better
if scraped_title and scraped_title.strip():
result["title"] = scraped_title
# Add scraped content as site_content
result["site_content"] = self._truncate_content(scraped_content, max_content_words or 400)
else:
result["site_content"] = None
except Exception as e:
logger.debug(f"Content scraping failed for {url}: {str(e)}")
result["site_content"] = None
else:
result["site_content"] = None
# Only include results that have either snippet or scraped content
if snippet or result.get("site_content"):
processed_results.append(result)
# 3. Collect raw data sections for LLM processing
raw_data = {}
raw_sections = ["relatedSearches"] # SerperAPI specific sections
for section in raw_sections:
if section in serperapi_data and serperapi_data[section]:
raw_data[section] = serperapi_data[section][:5] # Limit to 5 items
# 4. Extract search metadata
search_metadata = {}
if search_params := serperapi_data.get("searchParameters", {}):
search_metadata = {
"query": search_params.get("q", ""),
"engine": search_params.get("engine", ""),
"type": search_params.get("type", ""),
"credits": serperapi_data.get("credits", 0)
}
# Sort processed results by priority and position
processed_results.sort(key=lambda x: (x.get("priority", 999), x.get("position", 0)))
return {
"results": processed_results,
"raw_data": raw_data if raw_data else None,
"search_metadata": search_metadata if search_metadata else None,
"error": None
}
def _handle_api_errors(self, error: Exception) -> str:
"""
Handle SerperAPI specific errors with appropriate messages.
Args:
error (Exception): The exception that occurred
Returns:
str: User-friendly error message
"""
error_str = str(error).lower()
if "api key" in error_str or "unauthorized" in error_str:
return "Invalid or missing SerperAPI key. Please set SERPERAPI_KEY environment variable."
elif "rate limit" in error_str or "too many requests" in error_str:
return "SerperAPI rate limit exceeded. Please try again later."
elif "quota" in error_str or "credit" in error_str:
return "SerperAPI quota exceeded. Please check your plan limits."
elif "timeout" in error_str:
return "SerperAPI request timeout. Please try again."
else:
return f"SerperAPI error: {str(error)}"
def search(self, query: str, num_search_pages: int = None, max_content_words: int = None,
location: str = None, language: str = None, country: str = None) -> Dict[str, Any]:
"""
Search using SerperAPI with comprehensive parameter support.
Args:
query (str): The search query
num_search_pages (int): Number of search results to retrieve
max_content_words (int): Maximum number of words to include in content
location (str): Geographic location for localized results
language (str): Interface language (e.g., 'en', 'es', 'fr')
country (str): Country code for country-specific results (e.g., 'us', 'uk')
Returns:
Dict[str, Any]: Contains search results and optional error message
"""
# Use instance defaults if parameters not provided
num_search_pages = num_search_pages or self.num_search_pages
max_content_words = max_content_words or self.max_content_words
if not self.api_key:
error_msg = (
"SerperAPI key is required. Please set SERPERAPI_KEY environment variable "
"or pass api_key parameter. Get your key from: https://serper.dev/"
)
logger.error(error_msg)
return {"results": [], "raw_data": None, "search_metadata": None, "error": error_msg}
try:
logger.info(f"Searching SerperAPI: {query}, "
f"num_results={num_search_pages}, max_content_words={max_content_words}")
# Build request payload
payload = self._build_serperapi_payload(
query=query,
location=location,
language=language,
country=country,
num_results=num_search_pages
)
# Execute search using direct HTTP request
serperapi_data = self._execute_serperapi_search(payload)
# Process results
response_data = self._process_serperapi_results(serperapi_data, max_content_words)
logger.info(f"Successfully retrieved {len(response_data['results'])} processed results")
return response_data
except Exception as e:
error_msg = self._handle_api_errors(e)
logger.error(f"SerperAPI search failed: {error_msg}")
return {"results": [], "raw_data": None, "search_metadata": None, "error": error_msg}
class SerperAPITool(Tool):
name: str = "serperapi_search"
description: str = "Search Google using SerperAPI with comprehensive result processing and content scraping"
inputs: Dict[str, Dict[str, str]] = {
"query": {
"type": "string",
"description": "The search query to execute"
},
"num_search_pages": {
"type": "integer",
"description": "Number of search results to retrieve. Default: 10"
},
"max_content_words": {
"type": "integer",
"description": "Maximum number of words to include in content per result. None means no limit. Default: None"
},
"location": {
"type": "string",
"description": "Geographic location for localized results (e.g., 'New York, NY', 'London, UK')"
},
"language": {
"type": "string",
"description": "Interface language code (e.g., 'en', 'es', 'fr', 'de'). Default: en"
},
"country": {
"type": "string",
"description": "Country code for country-specific results (e.g., 'us', 'uk', 'ca'). Default: us"
}
}
required: Optional[List[str]] = ["query"]
def __init__(self, search_serperapi: SearchSerperAPI = None):
super().__init__()
self.search_serperapi = search_serperapi
def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None,
location: str = None, language: str = None, country: str = None) -> Dict[str, Any]:
"""Execute SerperAPI search using the SearchSerperAPI instance."""
if not self.search_serperapi:
raise RuntimeError("SerperAPI search instance not initialized")
try:
return self.search_serperapi.search(
query=query,
num_search_pages=num_search_pages,
max_content_words=max_content_words,
location=location,
language=language,
country=country
)
except Exception as e:
return {"results": [], "error": f"Error executing SerperAPI search: {str(e)}"}
class SerperAPIToolkit(Toolkit):
def __init__(
self,
name: str = "SerperAPIToolkit",
api_key: Optional[str] = None,
num_search_pages: Optional[int] = 10,
max_content_words: Optional[int] = None,
default_location: Optional[str] = None,
default_language: Optional[str] = "en",
default_country: Optional[str] = "us",
enable_content_scraping: Optional[bool] = True,
**kwargs
):
"""
Initialize SerperAPI Toolkit.
Args:
name (str): Name of the toolkit
api_key (str): SerperAPI authentication key
num_search_pages (int): Default number of search results to retrieve
max_content_words (int): Default maximum words per result content
default_location (str): Default geographic location
default_language (str): Default interface language
default_country (str): Default country code
enable_content_scraping (bool): Whether to enable content scraping
**kwargs: Additional keyword arguments
"""
# Create the shared SerperAPI search instance
search_serperapi = SearchSerperAPI(
name="SearchSerperAPI",
api_key=api_key,
num_search_pages=num_search_pages,
max_content_words=max_content_words,
default_location=default_location,
default_language=default_language,
default_country=default_country,
enable_content_scraping=enable_content_scraping,
**kwargs
)
# Create tools with the shared search instance
tools = [
SerperAPITool(search_serperapi=search_serperapi)
]
# Initialize parent with tools
super().__init__(name=name, tools=tools)
# Store search_serperapi as instance variable
self.search_serperapi = search_serperapi