ask-the-web-agent / src /tools /web_search.py
debashis2007's picture
Upload folder using huggingface_hub
75bea1c verified
"""Web search tool implementation."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
import httpx
from src.tools.base import Tool, ToolParameter, ToolResult
from src.utils.config import settings
from src.utils.exceptions import SearchError, SearchNoResultsError
from src.utils.logging import get_logger
logger = get_logger(__name__)
@dataclass
class SearchResult:
"""A single search result."""
title: str
url: str
snippet: str
score: float = 0.0
@dataclass
class WebSearchTool(Tool):
"""Tool for searching the web."""
name: str = "web_search"
description: str = "Search the web for current information on any topic. Returns relevant web pages with titles, URLs, and snippets."
parameters: list[ToolParameter] = field(default_factory=lambda: [
ToolParameter(
name="query",
type="string",
description="The search query - be specific and include relevant keywords",
required=True,
),
ToolParameter(
name="num_results",
type="integer",
description="Number of results to return (1-10)",
required=False,
default=5,
),
])
async def execute(self, **kwargs: Any) -> ToolResult:
"""Execute a web search.
Args:
query: Search query string
num_results: Number of results to return
Returns:
ToolResult with search results
"""
query = kwargs.get("query", "")
num_results = kwargs.get("num_results", settings.max_search_results)
if not query:
return ToolResult.fail("Search query cannot be empty")
try:
# Use configured search provider
if settings.search_provider == "tavily":
results = await self._search_tavily(query, num_results)
elif settings.search_provider == "duckduckgo":
results = await self._search_duckduckgo(query, num_results)
else:
return ToolResult.fail(f"Unsupported search provider: {settings.search_provider}")
if not results:
return ToolResult.fail(f"No results found for query: {query}")
# Format results for return
formatted_results = [
{
"title": r.title,
"url": r.url,
"snippet": r.snippet,
"score": r.score,
}
for r in results
]
return ToolResult.ok({
"query": query,
"num_results": len(formatted_results),
"results": formatted_results,
})
except SearchNoResultsError as e:
return ToolResult.fail(str(e))
except Exception as e:
logger.error(f"Search failed: {e}")
return ToolResult.fail(f"Search failed: {e}")
async def _search_tavily(self, query: str, num_results: int) -> list[SearchResult]:
"""Search using Tavily API.
Args:
query: Search query
num_results: Number of results
Returns:
List of search results
"""
if not settings.tavily_api_key:
raise SearchError("Tavily API key not configured")
async with httpx.AsyncClient() as client:
response = await client.post(
"https://api.tavily.com/search",
json={
"api_key": settings.tavily_api_key,
"query": query,
"max_results": num_results,
"include_answer": False,
"include_raw_content": False,
},
timeout=30.0,
)
response.raise_for_status()
data = response.json()
results = []
for item in data.get("results", []):
results.append(
SearchResult(
title=item.get("title", ""),
url=item.get("url", ""),
snippet=item.get("content", ""),
score=item.get("score", 0.0),
)
)
return results
async def _search_duckduckgo(self, query: str, num_results: int) -> list[SearchResult]:
"""Search using DuckDuckGo (no API key required).
Args:
query: Search query
num_results: Number of results
Returns:
List of search results
"""
try:
from duckduckgo_search import DDGS
except ImportError:
raise SearchError(
"duckduckgo-search package required. Install with: pip install duckduckgo-search"
)
results = []
with DDGS() as ddgs:
for r in ddgs.text(query, max_results=num_results):
results.append(
SearchResult(
title=r.get("title", ""),
url=r.get("href", ""),
snippet=r.get("body", ""),
score=0.0, # DuckDuckGo doesn't provide scores
)
)
return results