Spaces:
Sleeping
Sleeping
| """ | |
| Browser automation via Playwright (runs inside E2B sandbox when available, | |
| otherwise locally). Provides retry-safe, structured browser actions. | |
| """ | |
| from __future__ import annotations | |
| import base64 | |
| import logging | |
| import os | |
| from dataclasses import dataclass, field | |
| from typing import Any, Dict, List, Optional | |
| logger = logging.getLogger("browser") | |
| class BrowserResult: | |
| ok: bool | |
| action: str | |
| url: str = "" | |
| text: str = "" | |
| screenshot_b64: str = "" | |
| error: str = "" | |
| meta: Dict[str, Any] = field(default_factory=dict) | |
| class BrowserController: | |
| """Lightweight controller; lazy-initializes Playwright.""" | |
| def __init__(self) -> None: | |
| self._playwright = None | |
| self._browser = None | |
| self._context = None | |
| self._page = None | |
| self._available: Optional[bool] = None | |
| def available(self) -> bool: | |
| if self._available is None: | |
| try: | |
| import playwright # noqa: F401 | |
| from playwright.sync_api import sync_playwright # noqa: F401 | |
| self._available = True | |
| except Exception as e: | |
| logger.warning("Playwright not installed: %s", e) | |
| self._available = False | |
| return self._available | |
| def _ensure(self): | |
| if self._page is not None: | |
| return self._page | |
| from playwright.sync_api import sync_playwright | |
| self._playwright = sync_playwright().start() | |
| self._browser = self._playwright.chromium.launch(headless=True, args=["--no-sandbox", "--disable-dev-shm-usage"]) | |
| self._context = self._browser.new_context() | |
| self._page = self._context.new_page() | |
| return self._page | |
| def navigate(self, url: str, timeout_ms: int = 30000) -> BrowserResult: | |
| if not self.available: | |
| return BrowserResult(ok=False, action="navigate", url=url, error="playwright not available") | |
| try: | |
| page = self._ensure() | |
| page.goto(url, timeout=timeout_ms, wait_until="domcontentloaded") | |
| return BrowserResult(ok=True, action="navigate", url=page.url, text=page.title()) | |
| except Exception as e: | |
| logger.exception("navigate failed") | |
| return BrowserResult(ok=False, action="navigate", url=url, error=str(e)) | |
| def click(self, selector: str, timeout_ms: int = 10000) -> BrowserResult: | |
| if not self.available: | |
| return BrowserResult(ok=False, action="click", error="playwright not available") | |
| try: | |
| page = self._ensure() | |
| page.click(selector, timeout=timeout_ms) | |
| return BrowserResult(ok=True, action="click", meta={"selector": selector}) | |
| except Exception as e: | |
| return BrowserResult(ok=False, action="click", error=str(e)) | |
| def type_text(self, selector: str, text: str, timeout_ms: int = 10000) -> BrowserResult: | |
| if not self.available: | |
| return BrowserResult(ok=False, action="type", error="playwright not available") | |
| try: | |
| page = self._ensure() | |
| page.fill(selector, text, timeout=timeout_ms) | |
| return BrowserResult(ok=True, action="type", meta={"selector": selector}) | |
| except Exception as e: | |
| return BrowserResult(ok=False, action="type", error=str(e)) | |
| def screenshot(self) -> BrowserResult: | |
| if not self.available: | |
| return BrowserResult(ok=False, action="screenshot", error="playwright not available") | |
| try: | |
| page = self._ensure() | |
| png = page.screenshot(full_page=False) | |
| b64 = base64.b64encode(png).decode("ascii") | |
| return BrowserResult(ok=True, action="screenshot", url=page.url, screenshot_b64=b64) | |
| except Exception as e: | |
| return BrowserResult(ok=False, action="screenshot", error=str(e)) | |
| def scrape_text(self) -> BrowserResult: | |
| if not self.available: | |
| return BrowserResult(ok=False, action="scrape", error="playwright not available") | |
| try: | |
| page = self._ensure() | |
| content = page.evaluate("() => document.body ? document.body.innerText : ''") | |
| return BrowserResult(ok=True, action="scrape", url=page.url, text=(content or "")[:20000]) | |
| except Exception as e: | |
| return BrowserResult(ok=False, action="scrape", error=str(e)) | |
| def close(self): | |
| try: | |
| if self._context: self._context.close() | |
| except Exception: pass | |
| try: | |
| if self._browser: self._browser.close() | |
| except Exception: pass | |
| try: | |
| if self._playwright: self._playwright.stop() | |
| except Exception: pass | |
| self._context = self._browser = self._page = self._playwright = None | |
| _browser: Optional[BrowserController] = None | |
| def get_browser() -> BrowserController: | |
| global _browser | |
| if _browser is None: | |
| _browser = BrowserController() | |
| return _browser | |
| def run_browser_action(action: Dict[str, Any]) -> BrowserResult: | |
| """action: {"action": "navigate"|"click"|"type"|"screenshot"|"scrape", ...}""" | |
| b = get_browser() | |
| op = action.get("action", "") | |
| if op == "navigate": | |
| return b.navigate(action.get("url", "")) | |
| if op == "click": | |
| return b.click(action.get("selector", "")) | |
| if op == "type": | |
| return b.type_text(action.get("selector", ""), action.get("text", "")) | |
| if op == "screenshot": | |
| return b.screenshot() | |
| if op == "scrape": | |
| return b.scrape_text() | |
| return BrowserResult(ok=False, action=op, error=f"unknown action: {op}") | |