Spaces:
Running
Running
| import asyncio | |
| import re | |
| import os | |
| from schemas.agent import ToolOutput | |
| BROWSER_ENABLED = os.getenv("AGENT_ENABLE_BROWSER", "false").lower() == "true" | |
| async def browse(url: str = "", query: str = "") -> ToolOutput: | |
| if not BROWSER_ENABLED: | |
| return ToolOutput(tool_name="browser", output="Browser tool disabled (set AGENT_ENABLE_BROWSER=true)", latency_ms=0) | |
| target_url = url or _extract_url(query) or "" | |
| if not target_url: | |
| return ToolOutput(tool_name="browser", output="No URL provided", error="Missing URL", latency_ms=0) | |
| if not target_url.startswith("http"): | |
| target_url = "https://" + target_url | |
| try: | |
| from playwright.async_api import async_playwright | |
| async with async_playwright() as p: | |
| browser = await p.chromium.launch(headless=True, args=[ | |
| "--no-sandbox", "--disable-setuid-sandbox", | |
| "--disable-dev-shm-usage", "--disable-gpu" | |
| ]) | |
| context = await browser.new_context( | |
| user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", | |
| viewport={"width": 1280, "height": 720} | |
| ) | |
| page = await context.new_page() | |
| await page.goto(target_url, timeout=15000, wait_until="domcontentloaded") | |
| await page.wait_for_timeout(1000) | |
| title = await page.title() | |
| content = await page.evaluate(""" | |
| () => { | |
| const main = document.querySelector('main, article, .content, #content'); | |
| const text = (main || document.body).innerText; | |
| return text.substring(0, 3000); | |
| } | |
| """) | |
| await browser.close() | |
| output = f"# {title}\\n\\n{content[:3000]}" | |
| return ToolOutput(tool_name="browser", output=output, latency_ms=0) | |
| except ImportError: | |
| return ToolOutput(tool_name="browser", output="Playwright not installed. Run: playwright install chromium", error="Playwright not available", latency_ms=0) | |
| except Exception as e: | |
| return ToolOutput(tool_name="browser", output="", error=str(e), latency_ms=0) | |
| def _extract_url(text: str) -> str: | |
| urls = re.findall(r'https?://[^\\s<>\"]+|www\\.[^\\s<>\"]+', text) | |
| return urls[0] if urls else "" | |