from __future__ import annotations

import asyncio
import logging
from typing import Any, Dict, List, Optional

logger = logging.getLogger(__name__)


class ScraperService:
    """
    Generic web scraping service using the Scrapling library.

    Supports three fetcher modes:
    - ``http``   — lightweight HTTP requests (AsyncFetcher)
    - ``dynamic`` — Playwright-based full browser (DynamicFetcher)
    - ``stealth`` — Undetectable headless browser (StealthyFetcher)
    """

    FETCHERS_AVAILABLE: List[str] = []

    @staticmethod
    async def _check_fetchers() -> List[str]:
        available: List[str] = []
        try:
            from scrapling.fetchers import AsyncFetcher  # noqa: F401
            available.append("http")
        except ImportError:
            pass
        try:
            from scrapling.fetchers import StealthyFetcher  # noqa: F401
            available.append("stealth")
        except ImportError:
            pass
        try:
            from scrapling.fetchers import DynamicFetcher  # noqa: F401
            available.append("dynamic")
        except ImportError:
            pass
        return available

    @classmethod
    async def _fetch_page(
        cls,
        url: str,
        fetcher_type: str,
        proxy: Optional[str],
        network_idle: bool,
    ) -> Any:
        from scrapling.fetchers import AsyncFetcher, DynamicFetcher, StealthyFetcher

        if fetcher_type == "stealth":
            logger.info("fetcher=stealth url=%s", url)
            return await asyncio.to_thread(
                StealthyFetcher.fetch,
                url,
                headless=True,
                network_idle=network_idle,
                proxy=proxy,
            )

        if fetcher_type == "dynamic":
            logger.info("fetcher=dynamic url=%s", url)
            return await asyncio.to_thread(
                DynamicFetcher.fetch,
                url,
                headless=True,
                network_idle=network_idle,
                proxy=proxy,
            )

        logger.info("fetcher=http url=%s", url)
        return await AsyncFetcher.get(url, proxy=proxy)

    @staticmethod
    def _run_selector(page: Any, rule: Dict[str, Any]) -> Any:
        kwargs: Dict[str, Any] = {
            "auto_save": rule.get("auto_save", False),
            "adaptive": rule.get("auto_match", False),
        }
        selector = rule["selector"]
        if rule.get("selector_type", "css") == "xpath":
            return page.xpath(selector, **kwargs)
        return page.css(selector, **kwargs)

    @staticmethod
    def _unpack(elements: Any, extract_all: bool) -> Any:
        if extract_all:
            if hasattr(elements, "getall"):
                return elements.getall()
            return [str(e) for e in elements]
        if hasattr(elements, "get"):
            return elements.get()
        if elements:
            return str(elements[0])
        return None

    @classmethod
    async def extract(
        cls,
        url: str,
        fetcher_type: str,
        rules: List[Dict[str, Any]],
        proxy: Optional[str] = None,
        network_idle: bool = False,
    ) -> Dict[str, Any]:
        try:
            page = await cls._fetch_page(url, fetcher_type, proxy, network_idle)
        except Exception as exc:
            logger.exception("fetch_failed url=%s", url)
            return {"error": f"Failed to fetch {url}: {exc}"}

        result: Dict[str, Any] = {}
        for rule in rules:
            field = rule.get("field_name", "unknown")
            try:
                elements = cls._run_selector(page, rule)
                result[field] = cls._unpack(elements, rule.get("extract_all", False))
            except Exception as exc:
                logger.warning("parse_failed field=%s selector=%s error=%s", field, rule.get("selector"), exc)
                result[field] = None

        return result

    @classmethod
    async def health(cls) -> Dict[str, Any]:
        available = await cls._check_fetchers()
        cls.FETCHERS_AVAILABLE = available
        try:
            import scrapling
            version = getattr(scrapling, "__version__", "0.4+")
        except ImportError:
            version = "not installed"
        return {
            "available": available,
            "version": version,
        }