Spaces:
Running
Running
File size: 4,076 Bytes
d9162ac e3c2163 d9162ac e3c2163 d9162ac e3c2163 d9162ac d45d242 188495c f173aad 90aae85 74117ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
"""Serper web search tool using Serper API for Google searches."""
import structlog
from tenacity import retry, stop_after_attempt, wait_exponential
from src.tools.query_utils import preprocess_query
from src.tools.rate_limiter import get_serper_limiter
from src.tools.vendored.serper_client import SerperClient
from src.tools.vendored.web_search_core import scrape_urls
from src.utils.config import settings
from src.utils.exceptions import ConfigurationError, RateLimitError, SearchError
from src.utils.models import Citation, Evidence
logger = structlog.get_logger()
class SerperWebSearchTool:
"""Tool for searching the web using Serper API (Google search)."""
def __init__(self, api_key: str | None = None) -> None:
"""Initialize Serper web search tool.
Args:
api_key: Serper API key. If None, reads from settings.
Raises:
ConfigurationError: If no API key is available.
"""
self.api_key = api_key or settings.serper_api_key
if not self.api_key:
raise ConfigurationError(
"Serper API key required. Set SERPER_API_KEY environment variable or serper_api_key in settings."
)
self._client = SerperClient(api_key=self.api_key)
self._limiter = get_serper_limiter(self.api_key)
@property
def name(self) -> str:
"""Return the name of this search tool."""
return "serper"
async def _rate_limit(self) -> None:
"""Enforce Serper API rate limiting."""
await self._limiter.acquire()
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=10),
reraise=True,
)
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
"""Execute a web search using Serper API.
Args:
query: The search query string
max_results: Maximum number of results to return
Returns:
List of Evidence objects
Raises:
SearchError: If the search fails
RateLimitError: If rate limit is exceeded
"""
await self._rate_limit()
# Preprocess query to remove noise
clean_query = preprocess_query(query)
final_query = clean_query if clean_query else query
try:
# Get search results (snippets)
search_results = await self._client.search(
final_query, filter_for_relevance=False, max_results=max_results
)
if not search_results:
logger.info("No search results found", query=final_query)
return []
# Scrape URLs to get full content
scraped = await scrape_urls(search_results)
# Convert ScrapeResult to Evidence objects
evidence = []
for result in scraped:
# Truncate title to max 500 characters to match Citation model validation
title = result.title
if len(title) > 500:
title = title[:497] + "..."
ev = Evidence(
content=result.text,
citation=Citation(
title=title,
url=result.url,
source="web", # Use "web" to match SourceName literal, not "serper"
date="Unknown",
authors=[],
),
relevance=0.0,
)
evidence.append(ev)
logger.info(
"Serper search complete",
query=final_query,
results_found=len(evidence),
)
return evidence
except RateLimitError:
raise
except SearchError:
raise
except Exception as e:
logger.error("Unexpected error in Serper search", error=str(e), query=final_query)
raise SearchError(f"Serper search failed: {e}") from e
|