import logging import time from pathlib import Path from playwright.sync_api import sync_playwright, Browser, BrowserContext, Page from config import HOME_URL logger = logging.getLogger("bot") class BrowserSession: def __init__(self, headless: bool = False, state_dir: str = ".browser_state", timeout: int = 30000, logs_dir: str = None): self.headless = headless self.state_path = Path(state_dir) / "state.json" self.timeout = timeout # Use absolute path for logs directory if logs_dir is None: self.logs_dir = Path.cwd() / "logs" else: logs_dir_path = Path(logs_dir) self.logs_dir = logs_dir_path if logs_dir_path.is_absolute() else Path.cwd() / logs_dir_path self._playwright = None self._browser: Browser = None self._context: BrowserContext = None self.page: Page = None def _ensure_page(self) -> Page: try: if self.page is None or self.page.is_closed(): self.page = self._context.new_page() except Exception: try: self.page = self._context.new_page() except Exception: return None return self.page def start(self): self.state_path.parent.mkdir(exist_ok=True) self.logs_dir.mkdir(exist_ok=True, parents=True) self._playwright = sync_playwright().start() self._browser = self._playwright.chromium.launch( headless=self.headless, args=["--no-sandbox", "--disable-dev-shm-usage"], ) storage_state = str(self.state_path) if self.state_path.exists() else None # Set some common headers to reduce bot-detection surface extra_headers = { "accept-language": "en-US,en;q=0.9", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "referer": HOME_URL, "upgrade-insecure-requests": "1", "sec-ch-ua": '"Chromium";v="120", "Google Chrome";v="120", ";Not A Brand";v="99"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"Windows"', } self._context = self._browser.new_context( storage_state=storage_state, viewport={"width": 1280, "height": 900}, user_agent=( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/120.0.0.0 Safari/537.36" ), extra_http_headers=extra_headers, ) self._context.set_default_timeout(self.timeout) self.page = self._context.new_page() logger.info("Browser session started") def save_state(self): self._context.storage_state(path=str(self.state_path)) logger.debug("Browser state saved") def stop(self): try: if self._context: self._context.close() if self._browser: self._browser.close() if self._playwright: self._playwright.stop() except Exception as e: logger.debug(f"Browser close error (ignored): {e}") logger.info("Browser session closed") def goto(self, url: str, wait_until: str = "domcontentloaded", timeout_override: int = None) -> bool: nav_timeout = timeout_override if timeout_override else self.timeout for attempt in range(1, 3): try: page = self._ensure_page() if page is None: raise RuntimeError("Browser page is not available") response = page.goto(url, wait_until=wait_until, timeout=nav_timeout) if response is not None: status = response.status if status >= 400: logger.error(f"Navigation returned HTTP {status} for {url}") try: self.screenshot(f"http_{status}") html = page.content() Path("logs").mkdir(exist_ok=True) Path(f"logs/http_{status}.html").write_text(html, encoding="utf-8") except Exception: pass return False try: self.screenshot("latest") except Exception: pass return True except Exception as e: logger.error(f"Navigation failed [{url}] attempt {attempt}/2: {e}") try: self.screenshot("navigation_error") except Exception: pass if attempt < 2: time.sleep(1) continue return False def cleanup_page(self): """ Lightweight cleanup hook for long runs. The previous implementation cleared browser storage on every page, which can invalidate authenticated site state. Keep this method non-destructive so it cannot disrupt the session mid-run. """ try: if self.page: logger.debug("Page cleanup skipped to preserve authenticated state") except Exception as e: logger.debug(f"Page cleanup warning (non-critical): {e}") def screenshot(self, name: str = "screenshot"): try: path = self.logs_dir / f"{name}.png" self.page.screenshot(path=str(path), full_page=True) # Also write a consistent latest screenshot for the UI try: latest = self.logs_dir / "screenshot_latest.png" # overwrite latest from shutil import copyfile copyfile(str(path), str(latest)) logger.info(f"Screenshot saved: {path} -> {latest} (latest exists: {latest.exists()})") except Exception as e: logger.error(f"Screenshot copy failed: {e}") logger.debug(f"Screenshot: {path}") except Exception as e: logger.error(f"Screenshot failed: {e}")