"""Deterministic scripted interrogation for golden / seed cases (no LLM). Reads the golden's per-suspect answer/delta tables directly, so the golden case plays instantly and identically every time - ideal for the demo, tests, and offline CI. The live in-process llama.cpp engine replaces this for generated cases (same wire result). The deltas never reach the client; only the suspect's spoken reply + the resulting server-side suspicion do. """ from __future__ import annotations RATTLED_DELTA = 14 CORNERED_DELTA = 24 def _suspect(golden: dict, sus_id: str) -> dict: for s in golden["suspects"]: if s["id"] == sus_id: return s raise KeyError(sus_id) def scripted_turn( golden: dict, sus_id: str, *, question_id: str | None = None, present_evidence_id: str | None = None, free_text: str | None = None, ) -> tuple[str, int]: """Return (spoken_reply, suspicion_delta) for one scripted turn.""" suspect = _suspect(golden, sus_id) default = suspect.get("default", "I've told you what I know, detective.") if present_evidence_id is not None: entry = (suspect.get("present") or {}).get(present_evidence_id) if entry: return entry["a"], int(entry["d"]) return default, 1 if question_id is not None: for q in suspect.get("questions", []): if q["id"] == question_id: return q["a"], int(q["d"]) return default, 2 # free text: a small model would answer live; the scripted engine deflects. return default, 2