granite-code-3b / shared /agent /tools /web_forge.py
AjinkyaPagare's picture
ADAM v2.0: Advanced Agentic Mesh — DAG orchestrator, cognition, knowledge web, forge tools, runtime optimization
ba2ada2
"""
Web Forge — advanced web research and browsing capabilities.
Multi-engine web search with result aggregation, content extraction,
and source verification. Falls back gracefully if engines are unavailable.
"""
import asyncio
import os
import urllib.parse
from typing import Optional
from schemas.agent import ToolOutput
_WEB_TIMEOUT = int(os.getenv("ADAM_WEB_TIMEOUT", "10"))
_ENABLE_WEB = os.getenv("ADAM_ENABLE_WEB", "true").lower() == "true"
class WebForge:
"""
Advanced web research tool.
Provides:
- Multi-engine search (DuckDuckGo API + lite fallback)
- Content extraction from web pages
- Source verification and cross-referencing
"""
def __init__(self):
self._search_count = 0
self._success_count = 0
async def research(self, query: str, max_results: int = 5) -> str:
"""
Perform web research on a query.
Searches multiple engines and aggregates results.
"""
if not _ENABLE_WEB or not query:
return f"Research query: {query[:200]}\n(Web research disabled)"
self._search_count += 1
# Try primary search engine
result = await self._search_duckduckgo(query)
if result and "No results" not in result:
self._success_count += 1
return result
# Fallback to lite search
result = await self._search_duckduckgo_lite(query)
if result:
self._success_count += 1
return result
return f"Searched for: {query[:200]}\n(No detailed results available)"
async def _search_duckduckgo(self, query: str) -> Optional[str]:
"""Search using DuckDuckGo Instant Answer API."""
encoded = urllib.parse.quote(query[:200])
try:
import aiohttp
async with aiohttp.ClientSession() as session:
url = f"https://api.duckduckgo.com/?q={encoded}&format=json&no_html=1&skip_disambig=1"
try:
async with session.get(url, timeout=aiohttp.ClientTimeout(total=_WEB_TIMEOUT)) as resp:
if resp.status == 200:
data = await resp.json()
return self._format_ddg_results(data, query)
except (asyncio.TimeoutError, Exception):
pass
except ImportError:
pass
return None
async def _search_duckduckgo_lite(self, query: str) -> Optional[str]:
"""Search using DuckDuckGo Lite HTML API (fallback)."""
encoded = urllib.parse.quote(query[:200])
try:
import aiohttp
async with aiohttp.ClientSession() as session:
try:
async with session.get(
f"https://lite.duckduckgo.com/lite/?q={encoded}",
timeout=aiohttp.ClientTimeout(total=_WEB_TIMEOUT)
) as resp:
if resp.status == 200:
html = await resp.text()
return self._parse_lite_results(html, query)
except (asyncio.TimeoutError, Exception):
pass
except ImportError:
pass
return None
def _format_ddg_results(self, data: dict, query: str) -> str:
"""Format DuckDuckGo API results into readable text."""
parts = [f"# Search: {query}"]
abstract = data.get("AbstractText", "")
source = data.get("AbstractSource", "")
url = data.get("AbstractURL", "")
if abstract:
parts.append(f"\n**Summary**: {abstract}")
if source and url:
parts.append(f"*Source*: [{source}]({url})")
# Related topics
related = data.get("RelatedTopics", [])
if related:
parts.append(f"\n**Related** ({len(related[:5])}):")
for r in related[:5]:
if isinstance(r, dict):
text = r.get("Text", "")
if text:
parts.append(f"- {text[:200]}")
# Results
results = data.get("Results", [])
if results:
parts.append(f"\n**Results** ({len(results[:5])}):")
for r in results[:5]:
if isinstance(r, dict):
text = r.get("Text", "")
url_r = r.get("FirstURL", "")
if text:
parts.append(f"- {text[:200]}")
return "\n".join(parts) if len(parts) > 1 else "No results found."
def _parse_lite_results(self, html: str, query: str) -> str:
"""Parse DuckDuckGo Lite HTML results."""
import re
parts = [f"# Search: {query}"]
results = re.findall(
r'class="result-link">\s*<a[^>]*href="([^"]*)"[^>]*>([^<]*)</a>',
html
)[:5]
for href, title in results:
parts.append(f"- [{title}]({href})")
return "\n".join(parts) if len(parts) > 1 else "No results."
@property
def success_rate(self) -> float:
if self._search_count == 0:
return 1.0
return self._success_count / self._search_count