from __future__ import annotations

import re
from functools import lru_cache
from typing import Iterable, List

import inflect

_p = inflect.engine()


def normalize_text(text: str) -> str:
    text = (text or "").lower().strip()
    text = re.sub(r"[\u2018\u2019\u201c\u201d]", "'", text)
    text = re.sub(r"[^a-z0-9\s\-']+", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


@lru_cache(maxsize=4096)
def singularize(word: str) -> str:
    word = normalize_text(word)
    singular = _p.singular_noun(word)
    return singular if singular else word


def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
    seen = set()
    out = []
    for item in items:
        item = normalize_text(item)
        if item and item not in seen:
            seen.add(item)
            out.append(item)
    return out


_AMOUNT_RE = re.compile(
    r"^(?:\d+(?:\.\d+)?|\d+/\d+|[\u00bc\u00bd\u00be\u2153\u2154\u215b\u215c\u215d\u215e])\s*"
)
_MEASURE_RE = re.compile(
    r"^(?:g|kg|mg|ml|l|oz|lb|lbs|cup|cups|tbsp|tablespoon|tsp|teaspoon|clove|cloves|slice|slices|piece|pieces|can|cans|bunch|handful|pinch|large|small|medium|whole)\s+"
)


def strip_amounts_and_preps(text: str) -> str:
    """Remove leading quantities and prep words from a recipe fragment."""
    text = normalize_text(text)
    text = _AMOUNT_RE.sub("", text)
    text = _MEASURE_RE.sub("", text)
    text = re.sub(r"^of\s+", "", text)
    text = re.sub(r"\(.*?\)", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


def tokenize_recipe_segments(text: str) -> List[str]:
    """Split a recipe into ingredient-like chunks.

    The MVP demo worked best with comma-separated ingredients, so we keep that
    behaviour first and only use a small fallback split when the recipe has no
    commas.
    """
    raw = text or ""
    parts = re.split(r",|\n|;", raw)
    if len(parts) == 1:
        parts = re.split(r"\s+and\s+", raw, flags=re.IGNORECASE)

    cleaned = []
    for part in parts:
        item = strip_amounts_and_preps(part)
        if item and len(item) > 1:
            cleaned.append(item)
    return dedupe_preserve_order(cleaned)


def ingredient_variants(ingredient: str) -> List[str]:
    ing = normalize_text(ingredient)
    variants = [ing]

    singular = singularize(ing)
    if singular != ing:
        variants.append(singular)

    suffixes = [" cheese", " oil", " milk", " cream", " butter", " powder", " sauce", " paste", " extract"]
    for suffix in suffixes:
        if ing.endswith(suffix) and len(ing) > len(suffix) + 1:
            base = ing[:-len(suffix)].strip()
            variants.append(base)
            base_singular = singularize(base)
            if base_singular != base:
                variants.append(base_singular)

    words = ing.split()
    if len(words) > 1:
        variants.extend([
            words[0],
            words[-1],
            " ".join(words[:2]),
            " ".join(words[1:]),
        ])

    # very short fragments create false matches, so keep them only as a last resort
    return dedupe_preserve_order(variants)


def as_aliases(aliases: str | float | None) -> List[str]:
    if aliases is None:
        return []
    if not isinstance(aliases, str):
        return []
    return dedupe_preserve_order(alias.strip() for alias in aliases.split("|"))