File size: 1,950 Bytes
499170b 32e3b61 499170b 32e3b61 499170b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
"""Web search tool using DuckDuckGo."""
import asyncio
from typing import Any
from duckduckgo_search import DDGS
from src.utils.exceptions import SearchError
from src.utils.models import Citation, Evidence
class WebTool:
"""Search tool for general web search via DuckDuckGo."""
def __init__(self) -> None:
pass
@property
def name(self) -> str:
return "web"
async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
"""
Search DuckDuckGo and return evidence.
Note: duckduckgo-search is synchronous, so we run it in executor.
"""
loop = asyncio.get_running_loop()
try:
results = await loop.run_in_executor(
None,
lambda: self._sync_search(query, max_results),
)
return results
except Exception as e:
raise SearchError(f"Web search failed: {e}") from e
def _sync_search(self, query: str, max_results: int) -> list[Evidence]:
"""Synchronous search implementation."""
evidence_list = []
with DDGS() as ddgs:
results: list[dict[str, Any]] = list(ddgs.text(query, max_results=max_results))
# Truncation rationale: LLM context limits + cost optimization
# - Content: 1000 chars (~250 tokens) - web snippets are shorter than abstracts
# - Title: 500 chars covers most web page titles
for result in results:
evidence_list.append(
Evidence(
content=result.get("body", "")[:1000],
citation=Citation(
source="web",
title=result.get("title", "Unknown")[:500],
url=result.get("href", ""),
date="Unknown",
authors=[],
),
)
)
return evidence_list
|