case0 / src /case_zero /engine /interrogation_loop.py
HusseinEid's picture
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55
"""One interrogation turn: prompt -> streamed model call -> deterministic director.
Exactly one model call per turn. Spoken text streams out as it arrives; mechanics are
decided afterwards by the director against ground truth.
"""
from __future__ import annotations
import re
from collections.abc import Iterator
from dataclasses import dataclass
from ..llm.backend import LLMBackend
from ..llm.decoding import stream_turn
from ..projections.suspect_brief import SuspectBrief
from ..schemas.case import CaseFile
from ..schemas.enums import Relevance
from ..schemas.interrogation import InterrogationTurn
from ..suspects.memory import buffer_text, ledger_text
from ..suspects.persona import build_prompt
from ..suspects.scrub import scrub_spoken
from .director import Adjudication, adjudicate
from .game_state import GameState
from .relevance import assess_relevance
from .state_update import apply_turn
_SUSPECT_TEMPERATURE = 0.8
# Deterministic in-character deflections, used as a backstop when the small model returns an
# empty "spoken" field OR parrots a line it already used this session. Same pattern as the
# confession backstop in scrub.py: the model authors the dialogue; canned lines only stand in
# when its output is unusable, so the player never sees a blank or a duplicate reply.
_PRESSED = (
"I... I don't know what you want me to say.",
"Don't look at me like that - I had nothing to do with this.",
"You're putting words in my mouth.",
"That doesn't prove a thing, and you know it.",
"You can stare all you like - it wasn't me.",
"I'm done answering that. I want a lawyer.",
"You're twisting this. That is not what happened.",
)
_CALM = (
"I've already told you everything I know.",
"There's nothing more to it than that.",
"I don't see what that has to do with me.",
"You're reaching, Detective.",
"Ask me something that actually matters.",
"I wasn't anywhere near it, if that's what you're getting at.",
"What else do you want me to say?",
"That's all there is to tell.",
)
def _norm(s: str) -> str:
return re.sub(r"[^a-z0-9 ]", " ", s.lower()).strip()
def _too_similar(a: str, b: str) -> bool:
"""True if two replies are effectively the same line (a small model parroting itself)."""
na, nb = _norm(a), _norm(b)
if not na or not nb:
return False
if na == nb:
return True
wa, wb = set(na.split()), set(nb.split())
if not wa or not wb:
return False
return len(wa & wb) / min(len(wa), len(wb)) >= 0.7
def _distinct_deflection(relevance: Relevance, recent: list[str], key: str) -> str:
"""Pick an in-character deflection that is NOT a near-repeat of any recent answer."""
pool = _PRESSED if relevance in (Relevance.BREAKING, Relevance.DIRECT) else _CALM
start = sum(map(ord, key)) % len(pool)
for i in range(len(pool)):
cand = pool[(start + i) % len(pool)]
if not any(_too_similar(cand, prev) for prev in recent):
return cand
return pool[start]
@dataclass
class FinalTurn:
turn: InterrogationTurn
adjudication: Adjudication
state: GameState
@dataclass
class InterrogationEvent:
spoken_delta: str = ""
final: FinalTurn | None = None
def interrogate(
backend: LLMBackend,
case: CaseFile,
brief: SuspectBrief,
state: GameState,
sus_id: str,
question: str,
presented_clue_id: str | None = None,
seed: int | None = None,
) -> Iterator[InterrogationEvent]:
suspect = case.suspect(sus_id)
sstate = state.state_for(sus_id)
rel = assess_relevance(case, suspect, presented_clue_id)
clue = case.clue(presented_clue_id) if presented_clue_id else None
prompt = build_prompt(
case=case,
brief=brief,
ledger=ledger_text(case, suspect, sstate),
buffer=buffer_text(sstate),
question=question,
clue=clue,
relevance=rel.relevance,
)
turn: InterrogationTurn | None = None
for event in stream_turn(backend, prompt, seed=seed, temperature=_SUSPECT_TEMPERATURE):
if event.spoken_delta:
yield InterrogationEvent(spoken_delta=event.spoken_delta)
if event.final is not None:
turn = event.final
if turn is None:
turn = InterrogationTurn.safe_default()
# Backstop 1: no suspect line is ever allowed to confess - the win lives in the
# director, not in the suspect's mouth. (Rebuilds the frozen turn with a clean line.)
clean = scrub_spoken(turn.spoken, breaking=rel.relevance is Relevance.BREAKING)
# Backstop 2: never show a blank line or a near-verbatim repeat of a recent answer - a 1.5B
# model does both. Swap in a distinct in-character deflection (deterministic, no extra call).
recent = [e.answer for e in sstate.transcript[-4:]]
if not clean or not clean.strip() or any(_too_similar(clean, prev) for prev in recent):
clean = _distinct_deflection(rel.relevance, recent, f"{sus_id}:{question}:{len(sstate.transcript)}")
if clean != turn.spoken:
turn = turn.model_copy(update={"spoken": clean})
adj = adjudicate(case, suspect, sstate, turn, presented_clue_id)
new_state = apply_turn(state, case, sus_id, question, turn, adj, presented_clue_id)
yield InterrogationEvent(final=FinalTurn(turn=turn, adjudication=adj, state=new_state))