Spaces:
Running
Running
| """ | |
| Playwright browser management with stealth for Janus crawler. | |
| """ | |
| import asyncio | |
| import logging | |
| import random | |
| from typing import Optional | |
| logger = logging.getLogger(__name__) | |
| USER_AGENTS = [ | |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", | |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", | |
| "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", | |
| ] | |
| STEALTH_SCRIPTS = [ | |
| """ | |
| Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); | |
| """, | |
| """ | |
| window.chrome = { runtime: {} }; | |
| """, | |
| """ | |
| Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); | |
| """, | |
| """ | |
| Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); | |
| """, | |
| ] | |
| class BrowserManager: | |
| """Manages Playwright browser lifecycle with stealth.""" | |
| def __init__(self): | |
| self._browser = None | |
| self._context = None | |
| self._playwright = None | |
| async def __aenter__(self): | |
| await self.launch() | |
| return self | |
| async def __aexit__(self, exc_type, exc_val, exc_tb): | |
| await self.close() | |
| async def launch(self): | |
| """Launch browser with stealth configuration.""" | |
| try: | |
| from playwright.async_api import async_playwright | |
| except ImportError: | |
| logger.error( | |
| "Playwright not installed. Run: pip install playwright && playwright install" | |
| ) | |
| raise | |
| try: | |
| self._playwright = await async_playwright().start() | |
| self._browser = await self._playwright.chromium.launch( | |
| headless=True, | |
| args=[ | |
| "--disable-blink-features=AutomationControlled", | |
| "--no-sandbox", | |
| "--disable-dev-shm-usage", | |
| ], | |
| ) | |
| except Exception as e: | |
| logger.error( | |
| "Playwright browser launch failed. Install browser binaries with: playwright install chromium (%s)", | |
| e, | |
| ) | |
| if self._playwright: | |
| await self._playwright.stop() | |
| self._playwright = None | |
| raise | |
| ua = random.choice(USER_AGENTS) | |
| self._context = await self._browser.new_context( | |
| user_agent=ua, | |
| viewport={"width": 1920, "height": 1080}, | |
| locale="en-US", | |
| ) | |
| for script in STEALTH_SCRIPTS: | |
| await self._context.add_init_script(script) | |
| logger.debug("Browser launched with stealth") | |
| async def get_page(self): | |
| """Get a new page from the browser context.""" | |
| if not self._context: | |
| await self.launch() | |
| return await self._context.new_page() | |
| async def close(self): | |
| """Clean up browser resources.""" | |
| try: | |
| if self._context: | |
| await self._context.close() | |
| if self._browser: | |
| await self._browser.close() | |
| if self._playwright: | |
| await self._playwright.stop() | |
| logger.debug("Browser closed") | |
| except Exception as e: | |
| logger.warning(f"Error closing browser: {e}") | |