abdulsalam2121
Enhance error handling and logging during authentication and navigation processes
2cd7bd9 | import logging | |
| import time | |
| from pathlib import Path | |
| from playwright.sync_api import sync_playwright, Browser, BrowserContext, Page | |
| from config import HOME_URL | |
| logger = logging.getLogger("bot") | |
| class BrowserSession: | |
| def __init__(self, headless: bool = False, state_dir: str = ".browser_state", timeout: int = 30000, logs_dir: str = None): | |
| self.headless = headless | |
| self.state_path = Path(state_dir) / "state.json" | |
| self.timeout = timeout | |
| # Use absolute path for logs directory | |
| if logs_dir is None: | |
| self.logs_dir = Path.cwd() / "logs" | |
| else: | |
| logs_dir_path = Path(logs_dir) | |
| self.logs_dir = logs_dir_path if logs_dir_path.is_absolute() else Path.cwd() / logs_dir_path | |
| self._playwright = None | |
| self._browser: Browser = None | |
| self._context: BrowserContext = None | |
| self.page: Page = None | |
| def _ensure_page(self) -> Page: | |
| try: | |
| if self.page is None or self.page.is_closed(): | |
| self.page = self._context.new_page() | |
| except Exception: | |
| try: | |
| self.page = self._context.new_page() | |
| except Exception: | |
| return None | |
| return self.page | |
| def start(self): | |
| self.state_path.parent.mkdir(exist_ok=True) | |
| self.logs_dir.mkdir(exist_ok=True, parents=True) | |
| self._playwright = sync_playwright().start() | |
| self._browser = self._playwright.chromium.launch( | |
| headless=self.headless, | |
| args=["--no-sandbox", "--disable-dev-shm-usage"], | |
| ) | |
| storage_state = str(self.state_path) if self.state_path.exists() else None | |
| # Set some common headers to reduce bot-detection surface | |
| extra_headers = { | |
| "accept-language": "en-US,en;q=0.9", | |
| "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | |
| "referer": HOME_URL, | |
| "upgrade-insecure-requests": "1", | |
| "sec-ch-ua": '"Chromium";v="120", "Google Chrome";v="120", ";Not A Brand";v="99"', | |
| "sec-ch-ua-mobile": "?0", | |
| "sec-ch-ua-platform": '"Windows"', | |
| } | |
| self._context = self._browser.new_context( | |
| storage_state=storage_state, | |
| viewport={"width": 1280, "height": 900}, | |
| user_agent=( | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) " | |
| "Chrome/120.0.0.0 Safari/537.36" | |
| ), | |
| extra_http_headers=extra_headers, | |
| ) | |
| self._context.set_default_timeout(self.timeout) | |
| self.page = self._context.new_page() | |
| logger.info("Browser session started") | |
| def save_state(self): | |
| self._context.storage_state(path=str(self.state_path)) | |
| logger.debug("Browser state saved") | |
| def stop(self): | |
| try: | |
| if self._context: | |
| self._context.close() | |
| if self._browser: | |
| self._browser.close() | |
| if self._playwright: | |
| self._playwright.stop() | |
| except Exception as e: | |
| logger.debug(f"Browser close error (ignored): {e}") | |
| logger.info("Browser session closed") | |
| def goto(self, url: str, wait_until: str = "domcontentloaded", timeout_override: int = None) -> bool: | |
| nav_timeout = timeout_override if timeout_override else self.timeout | |
| for attempt in range(1, 3): | |
| try: | |
| page = self._ensure_page() | |
| if page is None: | |
| raise RuntimeError("Browser page is not available") | |
| response = page.goto(url, wait_until=wait_until, timeout=nav_timeout) | |
| if response is not None: | |
| status = response.status | |
| if status >= 400: | |
| logger.error(f"Navigation returned HTTP {status} for {url}") | |
| try: | |
| self.screenshot(f"http_{status}") | |
| html = page.content() | |
| Path("logs").mkdir(exist_ok=True) | |
| Path(f"logs/http_{status}.html").write_text(html, encoding="utf-8") | |
| except Exception: | |
| pass | |
| return False | |
| try: | |
| self.screenshot("latest") | |
| except Exception: | |
| pass | |
| return True | |
| except Exception as e: | |
| logger.error(f"Navigation failed [{url}] attempt {attempt}/2: {e}") | |
| try: | |
| self.screenshot("navigation_error") | |
| except Exception: | |
| pass | |
| if attempt < 2: | |
| time.sleep(1) | |
| continue | |
| return False | |
| def cleanup_page(self): | |
| """ | |
| Lightweight cleanup hook for long runs. | |
| The previous implementation cleared browser storage on every page, which | |
| can invalidate authenticated site state. Keep this method non-destructive | |
| so it cannot disrupt the session mid-run. | |
| """ | |
| try: | |
| if self.page: | |
| logger.debug("Page cleanup skipped to preserve authenticated state") | |
| except Exception as e: | |
| logger.debug(f"Page cleanup warning (non-critical): {e}") | |
| def screenshot(self, name: str = "screenshot"): | |
| try: | |
| path = self.logs_dir / f"{name}.png" | |
| self.page.screenshot(path=str(path), full_page=True) | |
| # Also write a consistent latest screenshot for the UI | |
| try: | |
| latest = self.logs_dir / "screenshot_latest.png" | |
| # overwrite latest | |
| from shutil import copyfile | |
| copyfile(str(path), str(latest)) | |
| logger.info(f"Screenshot saved: {path} -> {latest} (latest exists: {latest.exists()})") | |
| except Exception as e: | |
| logger.error(f"Screenshot copy failed: {e}") | |
| logger.debug(f"Screenshot: {path}") | |
| except Exception as e: | |
| logger.error(f"Screenshot failed: {e}") | |