Peng Ding
add stats dashboard at GET / and restrict deploy-hf triggers
aefeb84
Raw
History Blame Contribute Delete
6.18 kB
"""Playwright browser lifecycle management."""
from __future__ import annotations
import asyncio
import logging
import subprocess
from contextlib import asynccontextmanager
from collections.abc import AsyncIterator
from cloakbrowser import ensure_binary, get_default_stealth_args
from playwright.async_api import Browser, BrowserContext, Page, async_playwright
from veilrender.config import settings
logger = logging.getLogger(__name__)
CDP_PORT = 9222
class BrowserManager:
"""Manages a shared Playwright browser instance.
Launches Chromium directly (not via Playwright's launch()) so that
``--remote-debugging-port`` actually takes effect. Playwright then
connects over CDP, and the CDP port is also available for external
clients via the WebSocket proxy.
"""
def __init__(self) -> None:
self._playwright = None
self._browser: Browser | None = None
self._chrome_proc: subprocess.Popen | None = None # type: ignore[type-arg]
self._semaphore = asyncio.Semaphore(settings.max_concurrent)
self._lock = asyncio.Lock()
async def start(self) -> None:
"""Launch Chromium with CDP and connect Playwright to it."""
executable_path = ensure_binary()
stealth_args = get_default_stealth_args()
chrome_args = [
executable_path,
"--headless",
f"--remote-debugging-port={CDP_PORT}",
"--no-first-run",
"--no-default-browser-check",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
*stealth_args,
"about:blank",
]
self._chrome_proc = subprocess.Popen(
chrome_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
)
# Wait for CDP to be ready
cdp_url = f"http://127.0.0.1:{CDP_PORT}"
for attempt in range(30):
try:
import urllib.request
urllib.request.urlopen(f"{cdp_url}/json/version", timeout=1)
break
except Exception:
await asyncio.sleep(0.5)
else:
stderr = (
self._chrome_proc.stderr.read().decode()
if self._chrome_proc.stderr
else ""
)
raise RuntimeError(
f"Chromium CDP not ready after 15s. stderr: {stderr[:500]}"
)
# Connect Playwright over CDP
self._playwright = await async_playwright().start()
self._browser = await self._playwright.chromium.connect_over_cdp(
f"http://127.0.0.1:{CDP_PORT}"
)
logger.info(
"Browser started (CloakBrowser %s, CDP on :%d)", executable_path, CDP_PORT
)
async def get_cdp_url(self) -> str | None:
"""Return the internal CDP WebSocket URL, or None if unavailable."""
await self._ensure_browser()
try:
import urllib.request
import json
resp = urllib.request.urlopen(
f"http://127.0.0.1:{CDP_PORT}/json/version", timeout=2
)
data = json.loads(resp.read())
ws_url = data.get("webSocketDebuggerUrl")
if ws_url:
return ws_url
except Exception:
logger.debug("Failed to get CDP WebSocket URL", exc_info=True)
return f"ws://127.0.0.1:{CDP_PORT}"
async def stop(self) -> None:
"""Close the browser, Playwright, and Chromium process."""
if self._browser:
try:
await self._browser.close()
except Exception:
pass
self._browser = None
if self._playwright:
await self._playwright.stop()
self._playwright = None
if self._chrome_proc:
self._chrome_proc.terminate()
try:
self._chrome_proc.wait(timeout=5)
except subprocess.TimeoutExpired:
self._chrome_proc.kill()
self._chrome_proc = None
logger.info("Browser stopped")
async def _ensure_browser(self) -> Browser:
"""Restart browser if it crashed."""
async with self._lock:
chrome_dead = (
self._chrome_proc is None or self._chrome_proc.poll() is not None
)
browser_dead = self._browser is None or not self._browser.is_connected()
if chrome_dead or browser_dead:
logger.warning("Browser not connected, restarting...")
await self.stop()
await self.start()
assert self._browser is not None
return self._browser
@property
def active_pages(self) -> int:
"""Number of browser pages currently in use."""
return settings.max_concurrent - self._semaphore._value
@property
def is_browser_alive(self) -> bool:
"""Whether the browser process is connected."""
return self._browser is not None and self._browser.is_connected()
@asynccontextmanager
async def get_page(
self,
*,
viewport_width: int | None = None,
viewport_height: int | None = None,
) -> AsyncIterator[tuple[BrowserContext, Page]]:
"""Create an isolated browser context and page.
Yields:
A (context, page) tuple. Both are closed automatically.
"""
async with self._semaphore:
browser = await self._ensure_browser()
context: BrowserContext | None = None
try:
context = await browser.new_context(
viewport={
"width": viewport_width or settings.viewport_width,
"height": viewport_height or settings.viewport_height,
},
user_agent=None, # use Playwright default
)
page = await context.new_page()
yield context, page
finally:
if context:
await context.close()
browser_manager = BrowserManager()