"""Prompt construction and structured-output parsing for TemperCheck.

The model is asked to return a single JSON object. Small VLMs drift from
requested formats, so `parse_verdict` is deliberately defensive: it extracts the
first JSON object it can find and clamps/falls back on every field.
"""

from __future__ import annotations

import json
import re
from dataclasses import dataclass, field
from typing import Any

# The whole app hangs off this contract. If you change the shape, update
# parse_verdict, the Gradio output rendering, and tests/test_parsing.py together.
SYSTEM_PROMPT = """\
You are TemperCheck, a playful but sharp-eyed party-game judge. Given a photo or
screenshot of a social-media profile, you estimate — for entertainment only — how
short-tempered / "cranky to deal with" the person seems. This is a whimsical
novelty read, NOT a real personality assessment or a factual claim about anyone.

Judge mainly by what the person CHOSE TO PUT in their profile (bio text, display
name, handle, captions, stated attitude); treat facial expression and overall
vibe as a lighter, secondary signal. Read the profile text carefully and look for:

1. HOSTILITY & SARCASM — a biting, contemptuous, passive-aggressive, or sarcastic
   tone; insults; mocking others; "I don't care what you think" energy.
2. PRIDE IN CONFLICT — does the profile brag about upsetting people, starting
   fights, blocking/roasting/"destroying" others, or being "brutally honest"?
   Treating confrontation as a personality is a strong temper signal.
3. EXPLICIT WARNING SIGNS stated right in the profile — slurs, hateful or
   demeaning language, open anger, ALL-CAPS ranting, relentless negativity or
   doom, or harsh criticism aimed at other people.

The more of these appear, and the more prominent they are, the higher the score.
A warm, friendly, or neutral profile with none of them scores low. If there is
little readable text, lean on overall vibe, say so, and keep the score mid-range.

Score guide (0 = delightful, 100 = run):
  0-20   warm/friendly, no red flags
  21-40  mostly pleasant, minor edge
  41-60  mixed signals, or too little to tell
  61-80  clearly prickly: sarcasm, negativity, or pride-in-conflict present
  81-100 multiple strong warning signs (e.g. hostility + slurs/anger + conflict-pride)

Two extra content flags, independent of temper — ALWAYS add the tag when present:
- NSFW — inspect the ACTUAL IMAGE as carefully as the text, and flag adult
  content from EITHER source. Add the signal "NSFW" if the picture shows nudity,
  sexual or fetish content, underwear/lingerie as the focus, or other explicit or
  adult visual material, OR if the text is sexually suggestive ("18+", "spicy
  content", adult-site links). This visual check OVERRIDES the "text-first"
  guidance above: flag NSFW based on the image even when the bio text is perfectly
  innocuous. In your rationale, say whether the NSFW signal came from the image,
  the text, or both.
- If the profile mentions crypto (Bitcoin, Ethereum, NFTs, tokens, "$TICKER",
  "to the moon", web3, "diamond hands", trading shills), add the signal "crypto".
Put any NSFW/crypto tags FIRST in the signals list so they are never dropped.

Respond with ONLY a single JSON object, no prose before or after, of the form:
{
  "score": <integer 0-100, higher = shorter-tempered / crankier>,
  "verdict": "<3-6 word punchy label>",
  "rationale": "<1-2 sentences naming the specific signals you actually saw>",
  "signals": ["<short flag>", "<short flag>", "<short flag>"]
}
Each signal must be a concrete thing you observed, e.g. "sarcastic bio", "brags
about blocking people", "ALL-CAPS anger", "slur in bio", or "warm smile, kind bio".
Stay good-natured: you are reading the profile's stated content and vibe, never
mocking how someone looks."""

USER_INSTRUCTION = (
    "Read this profile — its bio/handle text first, then the overall vibe — and "
    "return the temper JSON. Be playful, not mean."
)

# 0-100 score bucket -> emoji label used by the UI.
SCORE_BANDS = [
    (20, "😇 Sunshine"),
    (40, "🙂 Easygoing"),
    (60, "😐 Depends on the day"),
    (80, "😤 Bit prickly"),
    (101, "🌋 Approach with coffee"),
]


@dataclass
class TemperVerdict:
    score: int
    verdict: str
    rationale: str
    signals: list[str] = field(default_factory=list)
    raw: str = ""  # original model text, for debugging / agent traces

    @property
    def band(self) -> str:
        for ceiling, label in SCORE_BANDS:
            if self.score < ceiling:
                return label
        return SCORE_BANDS[-1][1]


def build_messages(image_ref: Any) -> list[dict]:
    """Chat messages in the Gemma 4 multimodal format (image before text)."""
    return [
        {"role": "system", "content": [{"type": "text", "text": SYSTEM_PROMPT}]},
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image_ref},
                {"type": "text", "text": USER_INSTRUCTION},
            ],
        },
    ]


def _first_json_object(text: str) -> dict | None:
    """Find the first balanced {...} block and parse it, tolerating junk around it."""
    start = text.find("{")
    while start != -1:
        depth = 0
        for i in range(start, len(text)):
            if text[i] == "{":
                depth += 1
            elif text[i] == "}":
                depth -= 1
                if depth == 0:
                    try:
                        return json.loads(text[start : i + 1])
                    except json.JSONDecodeError:
                        break
        start = text.find("{", start + 1)
    return None


def parse_verdict(text: str) -> TemperVerdict:
    """Parse model output into a TemperVerdict, never raising on bad output."""
    data = _first_json_object(text) or {}

    # score: accept int/float/str, clamp to 0-100, default mid on failure.
    raw_score = data.get("score", 50)
    try:
        score = int(round(float(raw_score)))
    except (TypeError, ValueError):
        m = re.search(r"-?\d+", str(raw_score))
        score = int(m.group()) if m else 50
    score = max(0, min(100, score))

    signals = data.get("signals", [])
    if not isinstance(signals, list):
        signals = [str(signals)]
    signals = [str(s).strip() for s in signals if str(s).strip()][:6]

    return TemperVerdict(
        score=score,
        verdict=str(data.get("verdict", "Inscrutable")).strip() or "Inscrutable",
        rationale=str(data.get("rationale", "The model kept its cards close."))
        .strip()
        or "The model kept its cards close.",
        signals=signals,
        raw=text,
    )