Spaces:
Paused
Paused
| """Playwright browser lifecycle management.""" | |
| from __future__ import annotations | |
| import asyncio | |
| import logging | |
| import subprocess | |
| from contextlib import asynccontextmanager | |
| from collections.abc import AsyncIterator | |
| from cloakbrowser import ensure_binary, get_default_stealth_args | |
| from playwright.async_api import Browser, BrowserContext, Page, async_playwright | |
| from veilrender.config import settings | |
| logger = logging.getLogger(__name__) | |
| CDP_PORT = 9222 | |
| class BrowserManager: | |
| """Manages a shared Playwright browser instance. | |
| Launches Chromium directly (not via Playwright's launch()) so that | |
| ``--remote-debugging-port`` actually takes effect. Playwright then | |
| connects over CDP, and the CDP port is also available for external | |
| clients via the WebSocket proxy. | |
| """ | |
| def __init__(self) -> None: | |
| self._playwright = None | |
| self._browser: Browser | None = None | |
| self._chrome_proc: subprocess.Popen | None = None # type: ignore[type-arg] | |
| self._semaphore = asyncio.Semaphore(settings.max_concurrent) | |
| self._lock = asyncio.Lock() | |
| async def start(self) -> None: | |
| """Launch Chromium with CDP and connect Playwright to it.""" | |
| executable_path = ensure_binary() | |
| stealth_args = get_default_stealth_args() | |
| chrome_args = [ | |
| executable_path, | |
| "--headless", | |
| f"--remote-debugging-port={CDP_PORT}", | |
| "--no-first-run", | |
| "--no-default-browser-check", | |
| "--disable-setuid-sandbox", | |
| "--disable-dev-shm-usage", | |
| "--disable-gpu", | |
| *stealth_args, | |
| "about:blank", | |
| ] | |
| self._chrome_proc = subprocess.Popen( | |
| chrome_args, | |
| stdout=subprocess.DEVNULL, | |
| stderr=subprocess.PIPE, | |
| ) | |
| # Wait for CDP to be ready | |
| cdp_url = f"http://127.0.0.1:{CDP_PORT}" | |
| for attempt in range(30): | |
| try: | |
| import urllib.request | |
| urllib.request.urlopen(f"{cdp_url}/json/version", timeout=1) | |
| break | |
| except Exception: | |
| await asyncio.sleep(0.5) | |
| else: | |
| stderr = ( | |
| self._chrome_proc.stderr.read().decode() | |
| if self._chrome_proc.stderr | |
| else "" | |
| ) | |
| raise RuntimeError( | |
| f"Chromium CDP not ready after 15s. stderr: {stderr[:500]}" | |
| ) | |
| # Connect Playwright over CDP | |
| self._playwright = await async_playwright().start() | |
| self._browser = await self._playwright.chromium.connect_over_cdp( | |
| f"http://127.0.0.1:{CDP_PORT}" | |
| ) | |
| logger.info( | |
| "Browser started (CloakBrowser %s, CDP on :%d)", executable_path, CDP_PORT | |
| ) | |
| async def get_cdp_url(self) -> str | None: | |
| """Return the internal CDP WebSocket URL, or None if unavailable.""" | |
| await self._ensure_browser() | |
| try: | |
| import urllib.request | |
| import json | |
| resp = urllib.request.urlopen( | |
| f"http://127.0.0.1:{CDP_PORT}/json/version", timeout=2 | |
| ) | |
| data = json.loads(resp.read()) | |
| ws_url = data.get("webSocketDebuggerUrl") | |
| if ws_url: | |
| return ws_url | |
| except Exception: | |
| logger.debug("Failed to get CDP WebSocket URL", exc_info=True) | |
| return f"ws://127.0.0.1:{CDP_PORT}" | |
| async def stop(self) -> None: | |
| """Close the browser, Playwright, and Chromium process.""" | |
| if self._browser: | |
| try: | |
| await self._browser.close() | |
| except Exception: | |
| pass | |
| self._browser = None | |
| if self._playwright: | |
| await self._playwright.stop() | |
| self._playwright = None | |
| if self._chrome_proc: | |
| self._chrome_proc.terminate() | |
| try: | |
| self._chrome_proc.wait(timeout=5) | |
| except subprocess.TimeoutExpired: | |
| self._chrome_proc.kill() | |
| self._chrome_proc = None | |
| logger.info("Browser stopped") | |
| async def _ensure_browser(self) -> Browser: | |
| """Restart browser if it crashed.""" | |
| async with self._lock: | |
| chrome_dead = ( | |
| self._chrome_proc is None or self._chrome_proc.poll() is not None | |
| ) | |
| browser_dead = self._browser is None or not self._browser.is_connected() | |
| if chrome_dead or browser_dead: | |
| logger.warning("Browser not connected, restarting...") | |
| await self.stop() | |
| await self.start() | |
| assert self._browser is not None | |
| return self._browser | |
| def active_pages(self) -> int: | |
| """Number of browser pages currently in use.""" | |
| return settings.max_concurrent - self._semaphore._value | |
| def is_browser_alive(self) -> bool: | |
| """Whether the browser process is connected.""" | |
| return self._browser is not None and self._browser.is_connected() | |
| async def get_page( | |
| self, | |
| *, | |
| viewport_width: int | None = None, | |
| viewport_height: int | None = None, | |
| ) -> AsyncIterator[tuple[BrowserContext, Page]]: | |
| """Create an isolated browser context and page. | |
| Yields: | |
| A (context, page) tuple. Both are closed automatically. | |
| """ | |
| async with self._semaphore: | |
| browser = await self._ensure_browser() | |
| context: BrowserContext | None = None | |
| try: | |
| context = await browser.new_context( | |
| viewport={ | |
| "width": viewport_width or settings.viewport_width, | |
| "height": viewport_height or settings.viewport_height, | |
| }, | |
| user_agent=None, # use Playwright default | |
| ) | |
| page = await context.new_page() | |
| yield context, page | |
| finally: | |
| if context: | |
| await context.close() | |
| browser_manager = BrowserManager() | |