"""Nemotron 3 Nano 4B inference for transforming wishes into poetic sayings."""

from __future__ import annotations

import os
import random
import re

try:
    import spaces  # ZeroGPU runtime; absent in local dev.
except ImportError:
    class _SpacesShim:
        @staticmethod
        def GPU(*args, **kwargs):
            if args and callable(args[0]):
                return args[0]
            def deco(fn):
                return fn
            return deco
    spaces = _SpacesShim()

MODEL_ID = os.environ.get(
    "MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct"
)

# Local-dev fallback so the UI can be exercised without CUDA/FP8.
# Hard-disabled on HF Spaces (SPACE_ID is set there) so prod always uses the real model.
STUB_MODEL = os.environ.get("STUB_MODEL") == "1" and not os.environ.get("SPACE_ID")

STUB_SAYINGS = [
    "Even the smallest light remembers it once was a wish.",
    "Hope travels farther than any star — it crosses the dark and arrives whole.",
    "What you whisper to the night, the night whispers to the morning.",
    "Every constellation began as someone daring to wish out loud.",
    "The cosmos keeps your hope safe in a quiet pocket of starlight.",
]

SYSTEM_PROMPT = (
    "You are a wise cosmic oracle who speaks in brief, luminous poetry. "
    "When given someone's wish or hope, respond with a single poetic saying "
    "of one or two sentences. Be warm, timeless, and uplifting. "
    "Reply with only the saying — no preamble, labels, or quotation marks."
)

CATEGORIES = ("SHAPE", "BOON", "JOURNEY", "BOND", "TRIBUTE")

REVIEW_PROMPT = (
    "You are a thoughtful gatekeeper for a public 'wishes among the stars' constellation.\n"
    "Do two things at once: judge the wish and classify its theme.\n\n"
    "Judgement is one of:\n"
    "- POSITIVE: a sincere hope, dream, intention, or aspiration suitable to share publicly.\n"
    "- NEGATIVE: contains vulgarity, hatred, cruelty, wishes for harm, or anything unethical.\n"
    "- NONSENSE: random characters, gibberish, or not a coherent wish.\n\n"
    "Theme is one of:\n"
    "- SHAPE: self-change, personal growth, becoming someone different.\n"
    "- BOON: gifts, possessions, gear, money, material things.\n"
    "- JOURNEY: travel, going somewhere, exploration.\n"
    "- BOND: relationships, family, friends, love, connection with others.\n"
    "- TRIBUTE: giving to others, sacrifice, service, helping someone else.\n\n"
    "Respond on a SINGLE line in EXACTLY one of these formats, with no extra words:\n"
    "POSITIVE | <THEME>\n"
    "NEGATIVE | <THEME> | <a sincere, kind-hearted rewrite of the same underlying hope, one sentence>\n"
    "NONSENSE"
)

_tokenizer = None
_model = None
_tts_pipeline = None


def _load_model() -> tuple:
    global _tokenizer, _model
    if _model is not None and _tokenizer is not None:
        return _tokenizer, _model

    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer

    _tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
    _model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
        trust_remote_code=True,
        device_map="auto" if torch.cuda.is_available() else None,
    )
    if not torch.cuda.is_available():
        _model = _model.to("cpu")
    return _tokenizer, _model


def _strip_reasoning(text: str) -> str:
    """Remove Nemotron thinking traces if they appear in the output."""
    think_open = "<" + "think" + ">"
    think_close = "</" + "think" + ">"
    text = re.sub(re.escape(think_open) + r".*?" + re.escape(think_close), "", text, flags=re.DOTALL | re.IGNORECASE)
    text = re.sub(r"<reasoning>.*?</reasoning>", "", text, flags=re.DOTALL | re.IGNORECASE)
    return text.strip()


def _coerce_category(raw: str) -> str:
    raw = raw.strip().upper()
    for cat in CATEGORIES:
        if raw.startswith(cat):
            return cat
    return "SHAPE"


@spaces.GPU(duration=60)
def review_wish(wish: str) -> dict:
    """Moderate a wish AND classify its theme in one model call.

    Returns {"kind": "ok"|"rewrite"|"nonsense",
             "rewrite": str|None,
             "category": str|None}.
    """
    wish = wish.strip()
    if not wish:
        return {"kind": "nonsense", "rewrite": None, "category": None}
    if STUB_MODEL:
        return {"kind": "ok", "rewrite": None, "category": "SHAPE"}

    import torch

    tokenizer, model = _load_model()
    messages = [
        {"role": "system", "content": REVIEW_PROMPT},
        {"role": "user", "content": f'Wish: "{wish}"'},
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
        return_dict=True,
    ).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

    text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    text = _strip_reasoning(text).strip()
    print(f"[review_wish] wish={wish!r} raw={text!r}", flush=True)

    upper = text.upper()
    if "NONSENSE" in upper:
        return {"kind": "nonsense", "rewrite": None, "category": None}

    # Find the line that actually carries the verdict, even if the model adds preamble.
    verdict_line = ""
    for line in text.splitlines():
        line_upper = line.upper()
        if "POSITIVE" in line_upper or "NEGATIVE" in line_upper:
            verdict_line = line
            break

    parts = [p.strip() for p in verdict_line.split("|")] if verdict_line else []
    verdict = parts[0].upper() if parts else ""
    category = _coerce_category(parts[1]) if len(parts) > 1 else "SHAPE"

    if "NEGATIVE" in verdict:
        rewrite = parts[2].strip().strip('"').strip("'") if len(parts) > 2 else ""
        if not rewrite:
            return {"kind": "ok", "rewrite": None, "category": category}
        return {"kind": "rewrite", "rewrite": rewrite, "category": category}

    return {"kind": "ok", "rewrite": None, "category": category}


def _load_tts():
    global _tts_pipeline
    if _tts_pipeline is not None:
        return _tts_pipeline
    from kokoro import KPipeline
    _tts_pipeline = KPipeline(lang_code="a")  # 'a' = American English
    return _tts_pipeline


@spaces.GPU(duration=60)
def synthesize_whisper(text: str):
    """Speak the given text in a soft, hushed voice via Kokoro-82M.

    Returns (sample_rate, np.ndarray of float32) or None if text is empty.
    """
    text = (text or "").strip()
    if not text:
        return None

    import numpy as np

    pipeline = _load_tts()
    # 'af_nicole' is Kokoro's softest American voice; slower speed + reduced
    # amplitude give it the whisper-quiet feel.
    chunks = []
    for _, _, audio in pipeline(text, voice="af_nicole", speed=0.85):
        if hasattr(audio, "detach"):
            audio = audio.detach().cpu().numpy()
        chunks.append(np.asarray(audio, dtype=np.float32))
    if not chunks:
        return None

    waveform = np.concatenate(chunks)
    waveform = waveform * 0.55  # quiet the voice toward a whisper

    return 24000, waveform


@spaces.GPU(duration=120)
def generate_poetic_saying(wish: str, max_new_tokens: int = 120) -> str:
    """Transform a user's wish into a short poetic, wise saying."""
    wish = wish.strip()
    if not wish:
        return "Even silence holds a star waiting to be named."

    if STUB_MODEL:
        return random.choice(STUB_SAYINGS)

    import torch

    tokenizer, model = _load_model()

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {
            "role": "user",
            "content": (
                f"A traveler shares this wish with the cosmos:\n\n\"{wish}\"\n\n"
                "Speak the star's wisdom."
            ),
        },
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
        return_dict=True,
    ).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

    generated = outputs[0][inputs["input_ids"].shape[-1] :]
    saying = tokenizer.decode(generated, skip_special_tokens=True)
    saying = _strip_reasoning(saying).strip().strip('"').strip("'")

    if not saying:
        saying = "Your hope already burns — a quiet star the universe remembers."

    return saying