| """ |
| Web Forge — advanced web research and browsing capabilities. |
| |
| Multi-engine web search with result aggregation, content extraction, |
| and source verification. Falls back gracefully if engines are unavailable. |
| """ |
| import asyncio |
| import os |
| import urllib.parse |
| from typing import Optional |
| from schemas.agent import ToolOutput |
|
|
| _WEB_TIMEOUT = int(os.getenv("ADAM_WEB_TIMEOUT", "10")) |
| _ENABLE_WEB = os.getenv("ADAM_ENABLE_WEB", "true").lower() == "true" |
|
|
|
|
| class WebForge: |
| """ |
| Advanced web research tool. |
| |
| Provides: |
| - Multi-engine search (DuckDuckGo API + lite fallback) |
| - Content extraction from web pages |
| - Source verification and cross-referencing |
| """ |
|
|
| def __init__(self): |
| self._search_count = 0 |
| self._success_count = 0 |
|
|
| async def research(self, query: str, max_results: int = 5) -> str: |
| """ |
| Perform web research on a query. |
| Searches multiple engines and aggregates results. |
| """ |
| if not _ENABLE_WEB or not query: |
| return f"Research query: {query[:200]}\n(Web research disabled)" |
|
|
| self._search_count += 1 |
|
|
| |
| result = await self._search_duckduckgo(query) |
| if result and "No results" not in result: |
| self._success_count += 1 |
| return result |
|
|
| |
| result = await self._search_duckduckgo_lite(query) |
| if result: |
| self._success_count += 1 |
| return result |
|
|
| return f"Searched for: {query[:200]}\n(No detailed results available)" |
|
|
| async def _search_duckduckgo(self, query: str) -> Optional[str]: |
| """Search using DuckDuckGo Instant Answer API.""" |
| encoded = urllib.parse.quote(query[:200]) |
| try: |
| import aiohttp |
| async with aiohttp.ClientSession() as session: |
| url = f"https://api.duckduckgo.com/?q={encoded}&format=json&no_html=1&skip_disambig=1" |
| try: |
| async with session.get(url, timeout=aiohttp.ClientTimeout(total=_WEB_TIMEOUT)) as resp: |
| if resp.status == 200: |
| data = await resp.json() |
| return self._format_ddg_results(data, query) |
| except (asyncio.TimeoutError, Exception): |
| pass |
| except ImportError: |
| pass |
| return None |
|
|
| async def _search_duckduckgo_lite(self, query: str) -> Optional[str]: |
| """Search using DuckDuckGo Lite HTML API (fallback).""" |
| encoded = urllib.parse.quote(query[:200]) |
| try: |
| import aiohttp |
| async with aiohttp.ClientSession() as session: |
| try: |
| async with session.get( |
| f"https://lite.duckduckgo.com/lite/?q={encoded}", |
| timeout=aiohttp.ClientTimeout(total=_WEB_TIMEOUT) |
| ) as resp: |
| if resp.status == 200: |
| html = await resp.text() |
| return self._parse_lite_results(html, query) |
| except (asyncio.TimeoutError, Exception): |
| pass |
| except ImportError: |
| pass |
| return None |
|
|
| def _format_ddg_results(self, data: dict, query: str) -> str: |
| """Format DuckDuckGo API results into readable text.""" |
| parts = [f"# Search: {query}"] |
|
|
| abstract = data.get("AbstractText", "") |
| source = data.get("AbstractSource", "") |
| url = data.get("AbstractURL", "") |
|
|
| if abstract: |
| parts.append(f"\n**Summary**: {abstract}") |
| if source and url: |
| parts.append(f"*Source*: [{source}]({url})") |
|
|
| |
| related = data.get("RelatedTopics", []) |
| if related: |
| parts.append(f"\n**Related** ({len(related[:5])}):") |
| for r in related[:5]: |
| if isinstance(r, dict): |
| text = r.get("Text", "") |
| if text: |
| parts.append(f"- {text[:200]}") |
|
|
| |
| results = data.get("Results", []) |
| if results: |
| parts.append(f"\n**Results** ({len(results[:5])}):") |
| for r in results[:5]: |
| if isinstance(r, dict): |
| text = r.get("Text", "") |
| url_r = r.get("FirstURL", "") |
| if text: |
| parts.append(f"- {text[:200]}") |
|
|
| return "\n".join(parts) if len(parts) > 1 else "No results found." |
|
|
| def _parse_lite_results(self, html: str, query: str) -> str: |
| """Parse DuckDuckGo Lite HTML results.""" |
| import re |
| parts = [f"# Search: {query}"] |
| results = re.findall( |
| r'class="result-link">\s*<a[^>]*href="([^"]*)"[^>]*>([^<]*)</a>', |
| html |
| )[:5] |
| for href, title in results: |
| parts.append(f"- [{title}]({href})") |
| return "\n".join(parts) if len(parts) > 1 else "No results." |
|
|
| @property |
| def success_rate(self) -> float: |
| if self._search_count == 0: |
| return 1.0 |
| return self._success_count / self._search_count |
|
|