"""
Patchright Scraper Module

This module provides a robust web scraping implementation using Patchright
(undetected Playwright fork). It supports advanced features like stealth mode,
human simulation, CAPTCHA handling, and cloudflare bypassing.
"""

from typing import Any, Dict, List, Optional

from patchright.async_api import async_playwright, Browser, BrowserContext, Page
from .base_scraper import BaseScraper
import asyncio
import random
import logging
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
import platform
import subprocess
import os
import tempfile
# import aioconsole

from ..utils.error_handler import ErrorMessages


def _get_browser_channel():
    """Get browser channel based on platform. Chrome not available on ARM64 Linux."""
    system = platform.system().lower()
    machine = platform.machine().lower()

    # In Docker/Linux, we often prefer the installed chromium
    if system == "linux":
        # Check if chrome actually exists
        if os.path.exists("/usr/bin/google-chrome") or os.path.exists("/opt/google/chrome/chrome"):
            return "chrome"
        return None  # Fallback to chromium

    return "chrome"  # Use Chrome for better stealth on macOS/Windows


class ScraperConfig:
    """Configuration class for the Patchright scraper."""

    def __init__(
        self,
        use_stealth: bool = True,
        simulate_human: bool = False,
        use_custom_headers: bool = False,  # Disabled by default - creates detection signatures
        hide_webdriver: bool = True,
        bypass_cloudflare: bool = True,
        headless: bool = True,
        debug: bool = False,
        timeout: int = 30000,
        wait_for: str = 'domcontentloaded',
        use_current_browser: bool = False,
        use_persistent_context: bool = False,  # Use persistent context for max stealth
        max_retries: int = 3,
        delay_after_load: int = 2,
        max_concurrent_pages: int = 5,
        locale: str | None = None,  # e.g., 'en-US' - matches browser locale
        timezone_id: str | None = None,  # e.g., 'America/New_York' - matches browser timezone
    ):
        self.use_stealth = use_stealth
        self.simulate_human = simulate_human
        self.use_custom_headers = use_custom_headers
        self.hide_webdriver = hide_webdriver
        self.bypass_cloudflare = bypass_cloudflare
        self.headless = headless
        self.debug = debug
        self.timeout = timeout
        self.wait_for = wait_for
        self.use_current_browser = use_current_browser
        self.use_persistent_context = use_persistent_context
        self.max_retries = max_retries
        self.delay_after_load = delay_after_load
        self.max_concurrent_pages = max_concurrent_pages
        self.locale = locale
        self.timezone_id = timezone_id


class PlaywrightScraper(BaseScraper):
    """
    Advanced web scraper implementation using Patchright (undetected Playwright fork).

    Features:
    - Undetected browser automation (bypasses Cloudflare, Akamai, etc.)
    - Browser instance reuse for better performance
    - Concurrent page scraping with configurable concurrency
    - Persistent context support for maximum stealth
    - CAPTCHA handling
    - Automatic stealth patches via Patchright
    """

    def __init__(self, config: ScraperConfig | None = None):
        config = config or ScraperConfig()
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG if config.debug else logging.INFO)
        self.config = config
        self.chrome_process = None
        self.temp_user_data_dir = None
        # Browser pooling
        self._playwright = None
        self._browser: Browser | None = None
        self._persistent_context: BrowserContext | None = None
        self._browser_lock = asyncio.Lock()

    async def _get_browser(self, proxy: str | None = None, handle_captcha: bool = False) -> Browser:
        """Get or create a pooled browser instance with thread-safe locking."""
        async with self._browser_lock:
            if self._browser is None or not self._browser.is_connected():
                if self._playwright is None:
                    self._playwright = await async_playwright().start()
                if self.config.use_current_browser:
                    self._browser = await self.launch_and_connect_to_chrome(self._playwright)
                elif self.config.use_persistent_context:
                    # Persistent context for maximum stealth - returns context, not browser
                    self._persistent_context = await self.launch_persistent_context(
                        self._playwright, proxy, handle_captcha
                    )
                    self._browser = self._persistent_context.browser
                else:
                    self._browser = await self.launch_browser(self._playwright, proxy, handle_captcha)
            return self._browser

    async def launch_persistent_context(
        self, playwright, proxy: Optional[str] = None, handle_captcha: bool = False
    ) -> BrowserContext:
        """
        Launch a persistent browser context for maximum stealth.

        Patchright recommends persistent context with real Chrome for best undetectability.
        This uses a real Chrome profile which appears more human-like.

        Args:
            playwright: Patchright instance
            proxy: Optional proxy server
            handle_captcha: Whether CAPTCHA handling is enabled

        Returns:
            BrowserContext: Persistent browser context
        """
        if self.temp_user_data_dir is None:
            self.temp_user_data_dir = tempfile.mkdtemp(prefix="patchright_profile_")

        context_options = {
            'user_data_dir': self.temp_user_data_dir,
            'channel': _get_browser_channel(),
            'headless': self.config.headless and not handle_captcha,
            'no_viewport': True,  # Removes viewport fingerprint
            'proxy': {'server': proxy} if proxy else None,
            'args': ['--no-sandbox', '--disable-setuid-sandbox'],
            'ignore_https_errors': True,
            'locale': self.config.locale,
            'timezone_id': self.config.timezone_id,
        }

        try:
            context = await playwright.chromium.launch_persistent_context(
                **{k: v for k, v in context_options.items() if v is not None}
            )

            # Note: Init script disabled - Patchright handles stealth automatically
            # if self.config.use_stealth:
            #     await self._add_stealth_init_script(context)

            return context
        except Exception as e:
            raise Exception(
                f"Failed to launch persistent context: {str(e)}\n\n"
                "Make sure Chrome is installed or run: patchright install chrome"
            )

    async def fetch_content(
        self,
        url: str,
        proxy: str | None = None,
        pages: str | None = None,
        url_pattern: str | None = None,
        handle_captcha: bool = False
    ) -> list[str]:
        """
        Fetch content from a given URL using Playwright with browser pooling.

        Args:
            url: The URL to fetch content from
            proxy: Proxy server to use for the request
            pages: Page numbers to scrape (e.g., "1-5" or "1,3,5")
            url_pattern: Pattern for constructing multi-page URLs
            handle_captcha: Whether to pause for CAPTCHA solving

        Returns:
            List of content strings from scraped pages
        """
        browser = await self._get_browser(proxy, handle_captcha)
        context = None
        try:
            if handle_captcha:
                # For CAPTCHA mode: create context, handle CAPTCHA, then scrape pages
                context = await self.create_context(browser, proxy)
                page = await context.new_page()

                if self.config.use_stealth:
                    await self.apply_stealth_settings(page)
                await self.set_browser_features(page)

                await self.handle_captcha(page, url)

                # After CAPTCHA is solved, get content from the current page
                await asyncio.sleep(self.config.delay_after_load)
                first_page_content = await page.content()

                # Check if we need to scrape multiple pages
                if pages:
                    page_numbers = self.parse_page_numbers(pages)
                    if not url_pattern:
                        url_pattern = self.detect_url_pattern(url)

                    contents = [first_page_content]  # First page already scraped

                    # Scrape remaining pages (skip first one since we already have it)
                    for page_num in page_numbers[1:]:
                        page_url = self.apply_url_pattern(url, url_pattern, page_num) if url_pattern else url
                        self.logger.info(f"Scraping page {page_num}: {page_url}")
                        await page.goto(page_url, wait_until=self.config.wait_for, timeout=self.config.timeout)
                        await asyncio.sleep(self.config.delay_after_load)
                        content = await page.content()
                        contents.append(content)
                else:
                    contents = [first_page_content]
            else:
                # Normal mode: use scrape_multiple_pages
                contents = await self.scrape_multiple_pages(browser, url, pages, url_pattern, proxy)
        except Exception as e:
            import traceback
            error_details = f"{type(e).__name__}: {str(e)}"
            self.logger.error(f"Error during scraping: {error_details}")
            self.logger.error(traceback.format_exc())
            contents = [f"Error: {error_details}"]
        finally:
            if context:
                await context.close()
            # Close browser after CAPTCHA mode to clean up the visible window
            if handle_captcha and self._browser and self._browser.is_connected():
                await self._browser.close()
                self._browser = None

        return contents

    async def handle_captcha(self, page: Page, url: str):
        """
        Handle CAPTCHA solving by pausing execution and waiting for user input.

        This method navigates to the URL and waits for the user to solve any CAPTCHAs
        manually before continuing with the scraping process.

        Args:
            page (Page): Playwright page object
            url (str): URL to navigate to for CAPTCHA solving
        """
        self.logger.info("Waiting for user to solve CAPTCHA...")
        try:
            await page.goto(url, wait_until=self.config.wait_for, timeout=self.config.timeout)

            print("Please solve the CAPTCHA in the browser window.")
            print("Once solved, press Enter in this console to continue...")
            # await aioconsole.ainput()
            # For now, just sleep to allow manual interaction if aioconsole is missing
            await asyncio.sleep(60)

            # Use 'load' instead of 'networkidle' - modern sites never reach networkidle
            # due to constant analytics/tracking requests
            await page.wait_for_load_state('load', timeout=5000)
            self.logger.info("CAPTCHA handling completed.")
        except Exception as e:
            # Handle browser closure or timeout gracefully
            if "closed" in str(e).lower():
                self.logger.warning("Browser was closed during CAPTCHA handling")
                raise
            self.logger.warning(f"CAPTCHA wait completed with: {e}")

    async def launch_and_connect_to_chrome(self, playwright):
        """
        Launch a new Chrome instance with remote debugging enabled and connect to it.

        This method creates a temporary user data directory and launches Chrome
        with remote debugging on port 9222, then connects to it via Playwright.

        Args:
            playwright: Playwright instance

        Returns:
            Browser: Connected browser instance

        Raises:
            Exception: If unable to connect to Chrome after 30 seconds
        """
        if self.chrome_process is None:
            self.temp_user_data_dir = tempfile.mkdtemp(prefix="chrome_debug_profile_")
            chrome_executable = self.get_chrome_executable()
            command = [
                chrome_executable,
                f"--user-data-dir={self.temp_user_data_dir}",
                "--remote-debugging-port=9222",
                "--no-first-run",
                "--no-default-browser-check"
            ]
            self.chrome_process = subprocess.Popen(command)
            self.logger.info("Launched Chrome with remote debugging.")

        for _ in range(30):
            try:
                browser = await playwright.chromium.connect_over_cdp("http://localhost:9222")
                self.logger.info("Successfully connected to Chrome.")
                return browser
            except Exception as e:
                self.logger.debug(f"Connection attempt failed: {str(e)}")
                await asyncio.sleep(1)

        raise Exception("Failed to connect to Chrome after 30 seconds")

    def get_chrome_executable(self):
        """
        Get the path to Chrome executable based on the operating system.

        Returns:
            str: Path to Chrome executable

        Raises:
            NotImplementedError: If the operating system is not supported
        """
        system = platform.system()
        if system == "Darwin":  # macOS
            return "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
        elif system == "Linux":
            return "google-chrome"
        elif system == "Windows":
            return "chrome"
        else:
            raise NotImplementedError(f"Unsupported operating system: {system}")

    async def close(self) -> None:
        """
        Clean up resources including browser, Chrome process, and temp directories.

        This should be called when done using the scraper, or use it as an async context manager.
        """
        import shutil

        # Close persistent context if used
        if self._persistent_context:
            await self._persistent_context.close()
            self._persistent_context = None
            self.logger.info("Persistent context closed.")

        # Close pooled browser
        if self._browser and self._browser.is_connected():
            await self._browser.close()
            self._browser = None
            self.logger.info("Browser closed.")

        # Stop playwright
        if self._playwright:
            await self._playwright.stop()
            self._playwright = None

        # Terminate Chrome process if we started one
        if self.chrome_process:
            self.chrome_process.terminate()
            self.chrome_process.wait()
            self.chrome_process = None
            self.logger.info("Chrome process terminated.")

        # Remove temp directory
        if self.temp_user_data_dir:
            shutil.rmtree(self.temp_user_data_dir, ignore_errors=True)
            self.logger.info(f"Temporary user data directory removed: {self.temp_user_data_dir}")
            self.temp_user_data_dir = None

    async def __aenter__(self):
        """Async context manager entry."""
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit with cleanup."""
        await self.close()
        return False

    async def connect_to_current_browser(self, playwright):
        """
        Connect to an existing browser instance with remote debugging enabled.

        This method launches a browser with remote debugging and attempts to
        connect to it via Playwright.

        Args:
            playwright: Playwright instance

        Returns:
            Browser: Connected browser instance

        Raises:
            NotImplementedError: If the operating system is not supported
            Exception: If unable to connect to the browser after 30 seconds
        """
        system = platform.system()
        if system == "Darwin":
            subprocess.Popen(["open", "-a", "Google Chrome", "--args", "--remote-debugging-port=9222"])
        elif system == "Linux":
            subprocess.Popen(["google-chrome", "--remote-debugging-port=9222"])
        elif system == "Windows":
            subprocess.Popen(["start", "chrome", "--remote-debugging-port=9222"], shell=True)
        else:
            raise NotImplementedError(f"Connecting to current browser is not implemented for {system}")

        self.logger.info("Waiting for browser to start...")
        for _ in range(30):
            try:
                browser = await playwright.chromium.connect_over_cdp("http://localhost:9222")
                self.logger.info("Successfully connected to the browser.")
                return browser
            except Exception as e:
                self.logger.debug(f"Connection attempt failed: {str(e)}")
                await asyncio.sleep(1)

        raise Exception("Failed to connect to the current browser after 30 seconds")

    async def launch_browser(self, playwright, proxy: Optional[str] = None, handle_captcha: bool = False) -> Browser:
        """
        Launch a new browser instance with specified configuration.

        Args:
            playwright: Patchright instance
            proxy (Optional[str]): Proxy server to use
            handle_captcha (bool): Whether CAPTCHA handling is enabled

        Returns:
            Browser: Launched browser instance
        """
        try:
            channel = _get_browser_channel()
            launch_options = {
                'headless': self.config.headless and not handle_captcha,
                'args': ['--no-sandbox', '--disable-setuid-sandbox', '--disable-infobars',
                         '--window-position=0,0', '--ignore-certifcate-errors',
                         '--ignore-certifcate-errors-spki-list'],
                'proxy': {'server': proxy} if proxy else None
            }
            if channel:
                launch_options['channel'] = channel
            return await playwright.chromium.launch(**launch_options)
        except EOFError:
            raise Exception(
                "Patchright browsers are not installed.\n\n"
                "Please run: patchright install chromium\n\n"
                "Or for better stealth: patchright install chrome"
            )
        except Exception as e:
            raise Exception(f"Failed to launch browser: {str(e)}")

    async def create_context(self, browser: Browser, proxy: Optional[str] = None) -> BrowserContext:
        """
        Create a new browser context with specified settings.

        Note: Patchright recommends NOT setting custom user_agent or viewport
        as these create detection signatures. Let the browser use defaults.

        Args:
            browser (Browser): Browser instance
            proxy (Optional[str]): Proxy server to use

        Returns:
            BrowserContext: Created browser context
        """
        context_options = {
            'proxy': {'server': proxy} if proxy else None,
            'java_script_enabled': True,
            'ignore_https_errors': True,
            'locale': self.config.locale,
            'timezone_id': self.config.timezone_id,
        }
        # Don't set viewport or user_agent - Patchright handles stealth better without them
        context = await browser.new_context(**{k: v for k, v in context_options.items() if v is not None})

        # Note: Init script disabled - Patchright handles stealth automatically
        # If needed, uncomment:
        # if self.config.use_stealth:
        #     await self._add_stealth_init_script(context)

        return context

    async def _add_stealth_init_script(self, context: BrowserContext):
        """
        Add init script to context for additional stealth before page scripts run.

        This runs before any page JavaScript executes, patching detection vectors
        that Patchright might not cover. Uses the Playwright add_init_script API.

        Args:
            context: Browser context to add init script to
        """
        stealth_script = '''
            () => {
                // Patch chrome.runtime to avoid detection
                if (!window.chrome) {
                    window.chrome = {};
                }
                if (!window.chrome.runtime) {
                    window.chrome.runtime = {};
                }

                // Patch plugins array to look like real browser
                Object.defineProperty(navigator, 'plugins', {
                    get: () => {
                        const plugins = [
                            { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer' },
                            { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai' },
                            { name: 'Native Client', filename: 'internal-nacl-plugin' }
                        ];
                        plugins.item = (i) => plugins[i] || null;
                        plugins.namedItem = (name) => plugins.find(p => p.name === name) || null;
                        plugins.refresh = () => {};
                        return plugins;
                    }
                });

                // Patch languages to look normal
                Object.defineProperty(navigator, 'languages', {
                    get: () => ['en-US', 'en']
                });

                // Ensure webdriver is not set (Patchright handles this but extra safety)
                Object.defineProperty(navigator, 'webdriver', {
                    get: () => undefined
                });

                // Patch connection info
                if (navigator.connection) {
                    Object.defineProperty(navigator.connection, 'rtt', {
                        get: () => 50
                    });
                }
            }
        '''
        await context.add_init_script(stealth_script)

    async def apply_stealth_settings(self, page: Page):
        """
        Apply additional stealth settings to avoid bot detection.

        Note: Patchright already handles most stealth features automatically:
        - navigator.webdriver is already undefined
        - Automation flags are already removed
        - Runtime.enable leak is already patched

        This method only applies minimal additional patches that don't interfere.

        Args:
            page (Page): Patchright page object
        """
        # Patchright handles most stealth automatically via isolated contexts
        # Only apply minimal non-conflicting patches
        await page.evaluate('''
            () => {
                // Patch permissions query for notifications
                const originalQuery = window.navigator.permissions.query;
                if (originalQuery) {
                    window.navigator.permissions.query = (parameters) => (
                        parameters.name === 'notifications' ?
                            Promise.resolve({ state: Notification.permission }) :
                            originalQuery(parameters)
                    );
                }
            }
        ''')

    async def set_browser_features(self, page: Page):
        """
        Set browser features like custom headers if enabled in configuration.
        
        Args:
            page (Page): Playwright page object
        """
        if self.config.use_custom_headers:
            await page.set_extra_http_headers({
                'Accept-Language': 'en-US,en;q=0.9',
                'Accept-Encoding': 'gzip, deflate, br',
                'Referer': 'https://www.google.com/',
                'Sec-Fetch-Dest': 'document',
                'Sec-Fetch-Mode': 'navigate',
                'Sec-Fetch-Site': 'none',
                'Sec-Fetch-User': '?1',
                'Upgrade-Insecure-Requests': '1'
            })

    async def scrape_multiple_pages(
        self,
        browser: Browser,
        base_url: str,
        pages: str | None = None,
        url_pattern: str | None = None,
        proxy: str | None = None
    ) -> list[str]:
        """
        Scrape content from single or multiple pages with concurrent execution.

        Uses asyncio.gather with a semaphore for controlled concurrency.
        Supports both regular browser contexts and persistent contexts.

        Args:
            browser: Browser instance for creating contexts
            base_url: Base URL to scrape
            pages: Page numbers to scrape
            url_pattern: Pattern for constructing multi-page URLs
            proxy: Optional proxy for context creation

        Returns:
            List of content strings from scraped pages
        """
        if not url_pattern:
            url_pattern = self.detect_url_pattern(base_url)

        # Use persistent context if available, otherwise create new contexts
        use_persistent = self._persistent_context is not None

        if not url_pattern and not pages:
            # Single page scraping
            self.logger.info(f"Scraping single page: {base_url}")
            if use_persistent:
                page = await self._persistent_context.new_page()
                try:
                    if self.config.use_stealth:
                        await self.apply_stealth_settings(page)
                    await self.set_browser_features(page)
                    content = await self.navigate_and_get_content(page, base_url)
                    return [content]
                finally:
                    await page.close()
            else:
                context = await self.create_context(browser, proxy)
                try:
                    page = await context.new_page()
                    if self.config.use_stealth:
                        await self.apply_stealth_settings(page)
                    await self.set_browser_features(page)
                    content = await self.navigate_and_get_content(page, base_url)
                    return [content]
                finally:
                    await context.close()

        # Multiple page scraping with concurrency
        page_numbers = self.parse_page_numbers(pages) if pages else [1]
        urls = [
            self.apply_url_pattern(base_url, url_pattern, page_num) if url_pattern else base_url
            for page_num in page_numbers
        ]

        semaphore = asyncio.Semaphore(self.config.max_concurrent_pages)

        async def scrape_with_context(url: str, page_num: int) -> str:
            async with semaphore:
                self.logger.info(f"Scraping page {page_num}: {url}")
                if use_persistent:
                    page = await self._persistent_context.new_page()
                    try:
                        if self.config.use_stealth:
                            await self.apply_stealth_settings(page)
                        await self.set_browser_features(page)
                        content = await self.navigate_and_get_content(page, url)
                        await asyncio.sleep(random.uniform(0.5, 1.5))
                        return content
                    finally:
                        await page.close()
                else:
                    context = await self.create_context(browser, proxy)
                    try:
                        page = await context.new_page()
                        if self.config.use_stealth:
                            await self.apply_stealth_settings(page)
                        await self.set_browser_features(page)
                        content = await self.navigate_and_get_content(page, url)
                        await asyncio.sleep(random.uniform(0.5, 1.5))
                        return content
                    finally:
                        await context.close()

        # Execute concurrently and maintain order
        tasks = [
            scrape_with_context(url, page_num)
            for page_num, url in zip(page_numbers, urls)
        ]
        return await asyncio.gather(*tasks)

    async def navigate_and_get_content(self, page: Page, url: str) -> str:
        """
        Navigate to a URL and extract its content.

        Args:
            page (Page): Playwright page object
            url (str): URL to navigate to

        Returns:
            str: Page content or error message
        """
        try:
            self.logger.info(f"Navigating to {url}")
            await page.goto(url, wait_until=self.config.wait_for, timeout=self.config.timeout)
            self.logger.info(f"Successfully loaded {url}")

            await asyncio.sleep(self.config.delay_after_load)

            self.logger.info("Extracting page content")
            content = await page.content()
            self.logger.info(f"Successfully extracted content (length: {len(content)})")
            return content
        except asyncio.TimeoutError:
            self.logger.error(f"Timeout loading {url}")
            return f"Error: {ErrorMessages.TIMEOUT_ERROR}"
        except Exception as e:
            self.logger.error(f"Error navigating to {url}: {str(e)}")
            error_details = str(e) if len(str(e)) < 200 else str(e)[:200] + "..."
            return f"Error: {ErrorMessages.SCRAPING_FAILED}\n\nDetails: {error_details}"

    async def bypass_cloudflare(self, page: Page, url: str) -> str:
        """
        Attempt to bypass Cloudflare protection.
        
        This method reloads the page multiple times and simulates human behavior
        to try to bypass Cloudflare's bot detection.
        
        Args:
            page (Page): Playwright page object
            url (str): URL to bypass Cloudflare for
            
        Returns:
            str: Page content after bypass attempt
        """
        max_retries = 3
        for _ in range(max_retries):
            await page.reload(wait_until=self.config.wait_for, timeout=self.config.timeout)
            if self.config.simulate_human:
                await self.simulate_human_behavior(page)
            else:
                await asyncio.sleep(2)

            content = await page.content()
            if "Cloudflare" not in content or "ray ID" not in content.lower():
                self.logger.info("Successfully bypassed Cloudflare")
                return content

            self.logger.info("Cloudflare still detected, retrying...")

        self.logger.warning("Failed to bypass Cloudflare after multiple attempts")
        return content

    async def simulate_human_behavior(self, page: Page):
        """
        Simulate human-like browsing behavior.
        
        This method simulates human behavior like scrolling, mouse movements,
        and hovering over elements to make automation less detectable.
        
        Args:
            page (Page): Playwright page object
        """
        # Scrolling behavior
        await page.evaluate('window.scrollBy(0, window.innerHeight / 2)')
        await asyncio.sleep(random.uniform(0.5, 1))

        # Mouse movement behavior
        for _ in range(2):
            x = random.randint(100, 500)
            y = random.randint(100, 500)
            await page.mouse.move(x, y)
            await asyncio.sleep(random.uniform(0.1, 0.3))

        # Hover over a random element (without clicking)
        elements = await page.query_selector_all('a, button, input, select')
        if elements:
            random_element = random.choice(elements)
            await random_element.hover()
            await asyncio.sleep(random.uniform(0.3, 0.7))

    def detect_url_pattern(self, url: str) -> Optional[str]:
        """
        Detect URL pagination pattern from a given URL.
        
        This method analyzes the URL to identify common pagination patterns
        in query parameters or path segments.
        
        Args:
            url (str): URL to analyze for pagination patterns
            
        Returns:
            Optional[str]: Detected pattern or None if no pattern found
        """
        parsed_url = urlparse(url)
        query = parse_qs(parsed_url.query)

        for param, value in query.items():
            if value and value[0].isdigit():
                return f"{param}={{{param}}}"

        path_parts = parsed_url.path.split('/')
        for i, part in enumerate(path_parts):
            if part.isdigit():
                path_parts[i] = "{page}"
                return '/'.join(path_parts)

        return None

    def apply_url_pattern(self, base_url: str, pattern: str, page_num: int) -> str:
        """
        Apply a URL pattern to generate a paginated URL.
        
        Args:
            base_url (str): Base URL to apply pattern to
            pattern (str): Pattern to apply
            page_num (int): Page number to insert into pattern
            
        Returns:
            str: Generated URL with page number applied
        """
        parsed_url = urlparse(base_url)
        if '=' in pattern: 
            query = parse_qs(parsed_url.query)
            param, value = pattern.split('=')
            query[param] = [value.format(**{param: page_num})]
            return urlunparse(parsed_url._replace(query=urlencode(query, doseq=True)))
        elif '{page}' in pattern:
            return urlunparse(parsed_url._replace(path=pattern.format(page=page_num)))
        else:
            return base_url

    def parse_page_numbers(self, pages: Optional[str]) -> List[int]:
        """
        Parse page number specification into a list of integers.
        
        This method parses page specifications like "1-5" or "1,3,5" into
        a sorted list of unique page numbers.
        
        Args:
            pages (Optional[str]): Page specification string
            
        Returns:
            List[int]: Sorted list of unique page numbers
        """
        if not pages:
            return [1]

        page_numbers = []
        for part in pages.split(','):
            if '-' in part:
                start, end = map(int, part.split('-'))
                page_numbers.extend(range(start, end + 1))
            else:
                page_numbers.append(int(part))

        return sorted(set(page_numbers))

    async def extract(self, content: str) -> Dict[str, Any]:
        """
        Extract structured data from content.
        
        For the Playwright scraper, this method simply returns the raw content
        since Playwright is primarily used for fetching content rather than
        extracting structured data.
        
        Args:
            content (str): Raw content to extract data from
            
        Returns:
            Dict[str, Any]: Dictionary containing the raw content
        """
        return {"raw_content": content}