case0 / src /case_zero /api /scripted.py
HusseinEid's picture
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55
"""Deterministic scripted interrogation for golden / seed cases (no LLM).
Reads the golden's per-suspect answer/delta tables directly, so the golden case plays
instantly and identically every time - ideal for the demo, tests, and offline CI. The
live in-process llama.cpp engine replaces this for generated cases (same wire result).
The deltas never reach the client; only the suspect's spoken reply + the resulting
server-side suspicion do.
"""
from __future__ import annotations
RATTLED_DELTA = 14
CORNERED_DELTA = 24
def _suspect(golden: dict, sus_id: str) -> dict:
for s in golden["suspects"]:
if s["id"] == sus_id:
return s
raise KeyError(sus_id)
def scripted_turn(
golden: dict,
sus_id: str,
*,
question_id: str | None = None,
present_evidence_id: str | None = None,
free_text: str | None = None,
) -> tuple[str, int]:
"""Return (spoken_reply, suspicion_delta) for one scripted turn."""
suspect = _suspect(golden, sus_id)
default = suspect.get("default", "I've told you what I know, detective.")
if present_evidence_id is not None:
entry = (suspect.get("present") or {}).get(present_evidence_id)
if entry:
return entry["a"], int(entry["d"])
return default, 1
if question_id is not None:
for q in suspect.get("questions", []):
if q["id"] == question_id:
return q["a"], int(q["d"])
return default, 2
# free text: a small model would answer live; the scripted engine deflects.
return default, 2