AI Developer Agent
AI Developer Agent v1.0 backend
763ef0d
"""
Browser automation via Playwright (runs inside E2B sandbox when available,
otherwise locally). Provides retry-safe, structured browser actions.
"""
from __future__ import annotations
import base64
import logging
import os
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
logger = logging.getLogger("browser")
@dataclass
class BrowserResult:
ok: bool
action: str
url: str = ""
text: str = ""
screenshot_b64: str = ""
error: str = ""
meta: Dict[str, Any] = field(default_factory=dict)
class BrowserController:
"""Lightweight controller; lazy-initializes Playwright."""
def __init__(self) -> None:
self._playwright = None
self._browser = None
self._context = None
self._page = None
self._available: Optional[bool] = None
@property
def available(self) -> bool:
if self._available is None:
try:
import playwright # noqa: F401
from playwright.sync_api import sync_playwright # noqa: F401
self._available = True
except Exception as e:
logger.warning("Playwright not installed: %s", e)
self._available = False
return self._available
def _ensure(self):
if self._page is not None:
return self._page
from playwright.sync_api import sync_playwright
self._playwright = sync_playwright().start()
self._browser = self._playwright.chromium.launch(headless=True, args=["--no-sandbox", "--disable-dev-shm-usage"])
self._context = self._browser.new_context()
self._page = self._context.new_page()
return self._page
def navigate(self, url: str, timeout_ms: int = 30000) -> BrowserResult:
if not self.available:
return BrowserResult(ok=False, action="navigate", url=url, error="playwright not available")
try:
page = self._ensure()
page.goto(url, timeout=timeout_ms, wait_until="domcontentloaded")
return BrowserResult(ok=True, action="navigate", url=page.url, text=page.title())
except Exception as e:
logger.exception("navigate failed")
return BrowserResult(ok=False, action="navigate", url=url, error=str(e))
def click(self, selector: str, timeout_ms: int = 10000) -> BrowserResult:
if not self.available:
return BrowserResult(ok=False, action="click", error="playwright not available")
try:
page = self._ensure()
page.click(selector, timeout=timeout_ms)
return BrowserResult(ok=True, action="click", meta={"selector": selector})
except Exception as e:
return BrowserResult(ok=False, action="click", error=str(e))
def type_text(self, selector: str, text: str, timeout_ms: int = 10000) -> BrowserResult:
if not self.available:
return BrowserResult(ok=False, action="type", error="playwright not available")
try:
page = self._ensure()
page.fill(selector, text, timeout=timeout_ms)
return BrowserResult(ok=True, action="type", meta={"selector": selector})
except Exception as e:
return BrowserResult(ok=False, action="type", error=str(e))
def screenshot(self) -> BrowserResult:
if not self.available:
return BrowserResult(ok=False, action="screenshot", error="playwright not available")
try:
page = self._ensure()
png = page.screenshot(full_page=False)
b64 = base64.b64encode(png).decode("ascii")
return BrowserResult(ok=True, action="screenshot", url=page.url, screenshot_b64=b64)
except Exception as e:
return BrowserResult(ok=False, action="screenshot", error=str(e))
def scrape_text(self) -> BrowserResult:
if not self.available:
return BrowserResult(ok=False, action="scrape", error="playwright not available")
try:
page = self._ensure()
content = page.evaluate("() => document.body ? document.body.innerText : ''")
return BrowserResult(ok=True, action="scrape", url=page.url, text=(content or "")[:20000])
except Exception as e:
return BrowserResult(ok=False, action="scrape", error=str(e))
def close(self):
try:
if self._context: self._context.close()
except Exception: pass
try:
if self._browser: self._browser.close()
except Exception: pass
try:
if self._playwright: self._playwright.stop()
except Exception: pass
self._context = self._browser = self._page = self._playwright = None
_browser: Optional[BrowserController] = None
def get_browser() -> BrowserController:
global _browser
if _browser is None:
_browser = BrowserController()
return _browser
def run_browser_action(action: Dict[str, Any]) -> BrowserResult:
"""action: {"action": "navigate"|"click"|"type"|"screenshot"|"scrape", ...}"""
b = get_browser()
op = action.get("action", "")
if op == "navigate":
return b.navigate(action.get("url", ""))
if op == "click":
return b.click(action.get("selector", ""))
if op == "type":
return b.type_text(action.get("selector", ""), action.get("text", ""))
if op == "screenshot":
return b.screenshot()
if op == "scrape":
return b.scrape_text()
return BrowserResult(ok=False, action=op, error=f"unknown action: {op}")