"""Case generation pipeline: two creative LLM calls + structural scaffold + solver gate.

On a solvability failure the pipeline bumps the seed and regenerates, up to a small
cap. Generation is decomposed so each model call stays small and reliable.
"""

from __future__ import annotations

from dataclasses import dataclass

from ..llm.backend import LLMBackend, LLMError
from ..llm.decoding import generate_model
from ..schemas.case import CaseFile, GenerationKnobs
from ..schemas.timeline import TimeWindow
from ..solver.checker import CheckReport, check
from .assemble import assemble_case
from .crime_profiles import kind_for_seed, profile_for
from .stages import MysteryOut, WorldCastOut, mystery_prompt, world_cast_prompt

# A fixed incident window keeps alibi reasoning simple; the model still invents the rest.
MURDER_WINDOW = TimeWindow(start_min=21 * 60, end_min=22 * 60)
TIME_OF_DEATH = TimeWindow(start_min=21 * 60 + 20, end_min=21 * 60 + 50)

# Output caps sized to healthy generations (~600-1100 / ~200-400 tokens). The caps only
# bite on runaway outputs, where on 2 vCPU an uncapped 4096-token decode wastes minutes
# before the retry can even start.
_WORLD_MAX_TOKENS = 1800
_MYSTERY_MAX_TOKENS = 900


@dataclass(frozen=True)
class GenerationResult:
    case: CaseFile
    report: CheckReport
    attempts: int


def _clamp(index: int, count: int) -> int:
    return max(0, min(index, count - 1))


def _ensure_female(world: WorldCastOut, seed: int) -> WorldCastOut:
    """Guarantee every case has at least one woman in the cast (variety), flipping one
    suspect deterministically if the model produced an all-male cast."""
    suspects = list(world.suspects)
    if any((s.gender or "").lower().startswith("f") for s in suspects):
        return world
    idx = seed % len(suspects)
    suspects[idx] = suspects[idx].model_copy(update={"gender": "female"})
    return world.model_copy(update={"suspects": suspects})


def generate_case(
    backend: LLMBackend,
    *,
    seed: int,
    knobs: GenerationKnobs | None = None,
    max_attempts: int = 2,
) -> GenerationResult:
    knobs = knobs or GenerationKnobs()
    profile = profile_for(knobs.crime_kind) if knobs.crime_kind else profile_for(kind_for_seed(seed))
    case: CaseFile | None = None
    report: CheckReport | None = None

    for attempt in range(max_attempts):
        attempt_seed = seed + attempt
        world = generate_model(
            backend,
            world_cast_prompt(profile, knobs.setting_hint, knobs.era_hint, knobs.tone_hint,
                              knobs.n_suspects, MURDER_WINDOW.start_min, MURDER_WINDOW.end_min),
            WorldCastOut, temperature=0.85, max_tokens=_WORLD_MAX_TOKENS, seed=attempt_seed,
        )
        world = _ensure_female(world, attempt_seed)
        n = len(world.suspects)
        n_loc = len(world.locations)
        culprit_idx = attempt_seed % n
        crime_idx = _clamp(world.found_at_index, n_loc)
        claimed_idx = (crime_idx + 1) % n_loc
        culprit = world.suspects[culprit_idx]

        mystery = generate_model(
            backend,
            mystery_prompt(profile, culprit.name, culprit.role, world.victim_name,
                           world.weapon_name, world.locations[crime_idx],
                           world.locations[claimed_idx],
                           MURDER_WINDOW.start_min, MURDER_WINDOW.end_min),
            MysteryOut, temperature=0.6, max_tokens=_MYSTERY_MAX_TOKENS, seed=attempt_seed,
        )

        case = assemble_case(
            case_id=f"gen-{seed:06d}", seed=attempt_seed, knobs=knobs, world=world, mystery=mystery,
            profile=profile, window=MURDER_WINDOW, tod=TIME_OF_DEATH,
            culprit_idx=culprit_idx, crime_idx=crime_idx, claimed_idx=claimed_idx,
        )
        report = check(case)
        if report.ok:
            return GenerationResult(case=case, report=report, attempts=attempt + 1)

    if case is None or report is None:
        raise LLMError("generate_case produced no case after all attempts")
    return GenerationResult(case=case, report=report, attempts=max_attempts)