""" Web Forge — advanced web research and browsing capabilities. Multi-engine web search with result aggregation, content extraction, and source verification. Falls back gracefully if engines are unavailable. """ import asyncio import os import urllib.parse from typing import Optional from schemas.agent import ToolOutput _WEB_TIMEOUT = int(os.getenv("ADAM_WEB_TIMEOUT", "10")) _ENABLE_WEB = os.getenv("ADAM_ENABLE_WEB", "true").lower() == "true" class WebForge: """ Advanced web research tool. Provides: - Multi-engine search (DuckDuckGo API + lite fallback) - Content extraction from web pages - Source verification and cross-referencing """ def __init__(self): self._search_count = 0 self._success_count = 0 async def research(self, query: str, max_results: int = 5) -> str: """ Perform web research on a query. Searches multiple engines and aggregates results. """ if not _ENABLE_WEB or not query: return f"Research query: {query[:200]}\n(Web research disabled)" self._search_count += 1 # Try primary search engine result = await self._search_duckduckgo(query) if result and "No results" not in result: self._success_count += 1 return result # Fallback to lite search result = await self._search_duckduckgo_lite(query) if result: self._success_count += 1 return result return f"Searched for: {query[:200]}\n(No detailed results available)" async def _search_duckduckgo(self, query: str) -> Optional[str]: """Search using DuckDuckGo Instant Answer API.""" encoded = urllib.parse.quote(query[:200]) try: import aiohttp async with aiohttp.ClientSession() as session: url = f"https://api.duckduckgo.com/?q={encoded}&format=json&no_html=1&skip_disambig=1" try: async with session.get(url, timeout=aiohttp.ClientTimeout(total=_WEB_TIMEOUT)) as resp: if resp.status == 200: data = await resp.json() return self._format_ddg_results(data, query) except (asyncio.TimeoutError, Exception): pass except ImportError: pass return None async def _search_duckduckgo_lite(self, query: str) -> Optional[str]: """Search using DuckDuckGo Lite HTML API (fallback).""" encoded = urllib.parse.quote(query[:200]) try: import aiohttp async with aiohttp.ClientSession() as session: try: async with session.get( f"https://lite.duckduckgo.com/lite/?q={encoded}", timeout=aiohttp.ClientTimeout(total=_WEB_TIMEOUT) ) as resp: if resp.status == 200: html = await resp.text() return self._parse_lite_results(html, query) except (asyncio.TimeoutError, Exception): pass except ImportError: pass return None def _format_ddg_results(self, data: dict, query: str) -> str: """Format DuckDuckGo API results into readable text.""" parts = [f"# Search: {query}"] abstract = data.get("AbstractText", "") source = data.get("AbstractSource", "") url = data.get("AbstractURL", "") if abstract: parts.append(f"\n**Summary**: {abstract}") if source and url: parts.append(f"*Source*: [{source}]({url})") # Related topics related = data.get("RelatedTopics", []) if related: parts.append(f"\n**Related** ({len(related[:5])}):") for r in related[:5]: if isinstance(r, dict): text = r.get("Text", "") if text: parts.append(f"- {text[:200]}") # Results results = data.get("Results", []) if results: parts.append(f"\n**Results** ({len(results[:5])}):") for r in results[:5]: if isinstance(r, dict): text = r.get("Text", "") url_r = r.get("FirstURL", "") if text: parts.append(f"- {text[:200]}") return "\n".join(parts) if len(parts) > 1 else "No results found." def _parse_lite_results(self, html: str, query: str) -> str: """Parse DuckDuckGo Lite HTML results.""" import re parts = [f"# Search: {query}"] results = re.findall( r'class="result-link">\s*]*href="([^"]*)"[^>]*>([^<]*)', html )[:5] for href, title in results: parts.append(f"- [{title}]({href})") return "\n".join(parts) if len(parts) > 1 else "No results." @property def success_rate(self) -> float: if self._search_count == 0: return 1.0 return self._success_count / self._search_count