"""
Shared helpers + Parser registry used by every site module.

Each site module (sites/amazon.py, sites/myntra.py, etc.) exports a
`parse(soup) -> ProductData` function. The registry maps URL patterns
to those parsers.
"""
import re
from dataclasses import dataclass, field, asdict
from typing import Callable, Optional

from bs4 import BeautifulSoup


# ── Shared data structure every parser must return ──────────────────

@dataclass
class ProductData:
    title: str = ""
    description: str = ""
    features: str = ""           # bullet points / key features
    specs: str = ""              # key:value pairs joined with " | "
    materials: str = ""          # composition / fabric / ingredients
    sizes: str = ""              # available sizes (clothing)
    return_policy: str = ""      # return/exchange policy text
    rating_text: str = ""        # "4.3 out of 5 stars · 1,234 ratings"
    reviews: list = field(default_factory=list)  # [{title, text, rating}]
    source: str = "generic"

    def to_dict(self) -> dict:
        return asdict(self)


# ── Text helpers ────────────────────────────────────────────────────

MAX_FIELD_CHARS = 8000


def clean(text: str, limit: int = MAX_FIELD_CHARS) -> str:
    if not text:
        return ""
    text = re.sub(r"\s+", " ", str(text)).strip()
    return text[:limit]


def first_text(soup: BeautifulSoup, *selectors: str) -> str:
    """Return text from the first matching CSS selector, or ''."""
    for sel in selectors:
        try:
            tag = soup.select_one(sel)
        except Exception:
            continue
        if tag:
            txt = tag.get_text(" ", strip=True)
            if txt:
                return txt
    return ""


def all_text_join(soup: BeautifulSoup, selector: str, sep: str = " · ",
                  limit_items: int = 30) -> str:
    """Join the text of all matching elements."""
    try:
        tags = soup.select(selector)[:limit_items]
    except Exception:
        return ""
    return sep.join(t.get_text(" ", strip=True) for t in tags
                    if t.get_text(strip=True))


def extract_kv_table(soup: BeautifulSoup, selector: str) -> str:
    """For tables of key:value spec rows."""
    rows = []
    try:
        tables = soup.select(selector)
    except Exception:
        return ""
    for table in tables:
        for row in table.select("tr"):
            cells = row.find_all(["th", "td"])
            if len(cells) >= 2:
                k = cells[0].get_text(" ", strip=True)
                v = cells[-1].get_text(" ", strip=True)
                if k and v and k != v:
                    entry = f"{k}: {v}"
                    if entry not in rows:
                        rows.append(entry)
    return " | ".join(rows)


def parse_rating_number(text: str) -> Optional[float]:
    """Extract first float from text. '4.5 out of 5' -> 4.5"""
    if not text:
        return None
    m = re.search(r"([0-9]+\.?[0-9]*)", text)
    if not m:
        return None
    try:
        return float(m.group(1))
    except ValueError:
        return None


# ── Registry ────────────────────────────────────────────────────────

ParserFn = Callable[[BeautifulSoup], ProductData]
_REGISTRY: dict[str, ParserFn] = {}


def register(*url_keywords: str):
    """Decorator: register a parser for one or more URL substring keys."""
    def deco(fn: ParserFn) -> ParserFn:
        for key in url_keywords:
            _REGISTRY[key.lower()] = fn
        return fn
    return deco


def find_parser(url: str) -> Optional[ParserFn]:
    url_lower = (url or "").lower()
    for key, fn in _REGISTRY.items():
        if key in url_lower:
            return fn
    return None


def get_registered_sites() -> list[str]:
    return sorted(_REGISTRY.keys())