"""
Scrape product data from an Amalfa product page URL.
"""

import json
import re
from typing import Any
from urllib.parse import urlparse

import requests
from bs4 import BeautifulSoup


def _clean_text(s: str) -> str:
    if not s:
        return ""
    return " ".join(s.split()).strip()


def _extract_price_from_text(text: str) -> str:
    """Find first price like Rs 1,299 or ₹1299."""
    if not text:
        return ""
    m = re.search(r"(?:Rs\.?|₹)\s*([\d,]+(?:\.\d{2})?)", text, re.I)
    if m:
        return m.group(0).strip()
    m = re.search(r"[\d,]+(?:\.\d{2})?", text)
    if m:
        return m.group(0)
    return ""


def scrape_product(url: str) -> dict[str, Any]:
    """
    Fetch an Amalfa product page and extract product_name, description, price,
    offers, product_images, brand, category. Strategy fields left empty for AI/user.
    """
    parsed = urlparse(url)
    if not parsed.scheme or not parsed.netloc:
        raise ValueError(f"Invalid URL: {url}")

    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-IN,en;q=0.9",
    }
    resp = requests.get(url, headers=headers, timeout=15)
    resp.raise_for_status()
    html = resp.text
    soup = BeautifulSoup(html, "html.parser")

    product: dict[str, Any] = {
        "product_name": "",
        "description": "",
        "price": "",
        "offers": "",
        "product_images": "",
        "brand": "",
        "category": "",
        "target_audience": "",
        "competitors": "",
        "psychological_triggers": "",
        "show_product": None,
    }

    for script in soup.find_all("script", type="application/ld+json"):
        try:
            data = json.loads(script.string or "{}")
            if isinstance(data, dict) and data.get("@type") == "Product":
                product["product_name"] = _clean_text(data.get("name") or "")
                product["description"] = _clean_text(data.get("description") or "")
                if data.get("offers") and isinstance(data["offers"], dict):
                    product["price"] = str(data["offers"].get("price", ""))
                elif isinstance(data.get("offers"), list) and data["offers"]:
                    product["price"] = str(data["offers"][0].get("price", ""))
                if data.get("image"):
                    imgs = data["image"] if isinstance(data["image"], list) else [data["image"]]
                    product["product_images"] = ", ".join(str(u).strip() for u in imgs[:9] if u)
                if product["product_name"] and product["price"]:
                    break
        except (json.JSONDecodeError, TypeError):
            continue

    if not product["product_name"]:
        meta = soup.find("meta", property="og:title")
        if meta and meta.get("content"):
            product["product_name"] = _clean_text(meta["content"].split("|")[0].strip())
    if not product["description"]:
        meta = soup.find("meta", property="og:description") or soup.find("meta", attrs={"name": "description"})
        if meta and meta.get("content"):
            product["description"] = _clean_text(meta["content"])
    if not product["product_images"]:
        meta = soup.find("meta", property="og:image")
        if meta and meta.get("content"):
            product["product_images"] = meta["content"].strip()

    if not product["product_name"]:
        h1 = soup.find("h1")
        if h1:
            product["product_name"] = _clean_text(h1.get_text())

    if not product["price"]:
        for sel in ["[class*='price']", ".product__price", "[data-product-price]", ".price-item"]:
            el = soup.select_one(sel)
            if el:
                product["price"] = _extract_price_from_text(el.get_text())
                if product["price"]:
                    break
        if not product["price"]:
            product["price"] = _extract_price_from_text(soup.get_text())

    if not product["description"]:
        desc_el = (
            soup.find("div", class_=re.compile(r"description|product-description|product__description", re.I))
            or soup.find("meta", attrs={"name": "description"})
        )
        if desc_el:
            product["description"] = _clean_text(
                desc_el.get_text() if hasattr(desc_el, "get_text") else (desc_el.get("content") or "")
            )

    # Shopify product JSON has the full images list (primary source for product images)
    path_parts = (parsed.path or "").strip("/").split("/")
    if path_parts and path_parts[0] == "products" and len(path_parts) >= 2:
        handle = path_parts[1]
        product_json_url = f"{parsed.scheme}://{parsed.netloc}/products/{handle}.json"
        try:
            r = requests.get(product_json_url, headers={**headers, "Accept": "application/json"}, timeout=10)
            if r.ok:
                data = r.json()
                # Shopify Ajax API: root is the product object, or wrapped as {"product": {...}}
                prod = data.get("product") if isinstance(data.get("product"), dict) else data
                if isinstance(prod, dict):
                    images = prod.get("images")
                    if isinstance(images, list) and len(images) >= 1:
                        urls = []
                        for img in images[:9]:
                            u = None
                            if isinstance(img, dict) and img.get("src"):
                                u = (img.get("src") or "").strip()
                            elif isinstance(img, str) and img.strip():
                                u = img.strip()
                            if u:
                                if u.startswith("//"):
                                    u = "https:" + u
                                if u.startswith("http") and u not in urls:
                                    urls.append(u)
                        if urls:
                            product["product_images"] = ", ".join(urls)
        except (requests.RequestException, ValueError, KeyError):
            pass

    path = (parsed.path or "").lower()
    if "earring" in path:
        product["category"] = product["category"] or "Earrings"
    elif "necklace" in path or "pendant" in path or "choker" in path:
        product["category"] = product["category"] or "Necklaces"
    elif "ring" in path:
        product["category"] = product["category"] or "Rings"
    elif "bracelet" in path or "bangle" in path:
        product["category"] = product["category"] or "Bracelets"
    elif "anklet" in path:
        product["category"] = product["category"] or "Anklets"

    if not product["category"]:
        product["category"] = "Jewellery"

    # Log scraped data for verification (especially product images)
    _images = [u.strip() for u in (product.get("product_images") or "").split(",") if u.strip()]
    print(
        "[scraper] product_name=%r category=%r | product_images count=%d | urls=%s"
        % (product.get("product_name"), product.get("category"), len(_images), _images)
    )

    return product