""" Playwright browser management with stealth for Janus crawler. """ import asyncio import logging import random from typing import Optional logger = logging.getLogger(__name__) USER_AGENTS = [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", ] STEALTH_SCRIPTS = [ """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); """, """ window.chrome = { runtime: {} }; """, """ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); """, """ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); """, ] class BrowserManager: """Manages Playwright browser lifecycle with stealth.""" def __init__(self): self._browser = None self._context = None self._playwright = None async def __aenter__(self): await self.launch() return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self.close() async def launch(self): """Launch browser with stealth configuration.""" try: from playwright.async_api import async_playwright except ImportError: logger.error( "Playwright not installed. Run: pip install playwright && playwright install" ) raise try: self._playwright = await async_playwright().start() self._browser = await self._playwright.chromium.launch( headless=True, args=[ "--disable-blink-features=AutomationControlled", "--no-sandbox", "--disable-dev-shm-usage", ], ) except Exception as e: logger.error( "Playwright browser launch failed. Install browser binaries with: playwright install chromium (%s)", e, ) if self._playwright: await self._playwright.stop() self._playwright = None raise ua = random.choice(USER_AGENTS) self._context = await self._browser.new_context( user_agent=ua, viewport={"width": 1920, "height": 1080}, locale="en-US", ) for script in STEALTH_SCRIPTS: await self._context.add_init_script(script) logger.debug("Browser launched with stealth") async def get_page(self): """Get a new page from the browser context.""" if not self._context: await self.launch() return await self._context.new_page() async def close(self): """Clean up browser resources.""" try: if self._context: await self._context.close() if self._browser: await self._browser.close() if self._playwright: await self._playwright.stop() logger.debug("Browser closed") except Exception as e: logger.warning(f"Error closing browser: {e}")