DeepBoner / src /tools /websearch.py
VibecoderMcSwaggins's picture
fix(phase2): address CodeRabbit review feedback + add examples
32e3b61
raw
history blame
1.95 kB
"""Web search tool using DuckDuckGo."""
import asyncio
from typing import Any
from duckduckgo_search import DDGS
from src.utils.exceptions import SearchError
from src.utils.models import Citation, Evidence
class WebTool:
"""Search tool for general web search via DuckDuckGo."""
def __init__(self) -> None:
pass
@property
def name(self) -> str:
return "web"
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
"""
Search DuckDuckGo and return evidence.
Note: duckduckgo-search is synchronous, so we run it in executor.
"""
loop = asyncio.get_running_loop()
try:
results = await loop.run_in_executor(
None,
lambda: self._sync_search(query, max_results),
)
return results
except Exception as e:
raise SearchError(f"Web search failed: {e}") from e
def _sync_search(self, query: str, max_results: int) -> list[Evidence]:
"""Synchronous search implementation."""
evidence_list = []
with DDGS() as ddgs:
results: list[dict[str, Any]] = list(ddgs.text(query, max_results=max_results))
# Truncation rationale: LLM context limits + cost optimization
# - Content: 1000 chars (~250 tokens) - web snippets are shorter than abstracts
# - Title: 500 chars covers most web page titles
for result in results:
evidence_list.append(
Evidence(
content=result.get("body", "")[:1000],
citation=Citation(
source="web",
title=result.get("title", "Unknown")[:500],
url=result.get("href", ""),
date="Unknown",
authors=[],
),
)
)
return evidence_list