Spaces:

build-small-hackathon
/

case0

Running

case0 / src /case_zero /engine /interrogation_loop.py

Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)

414dc55 3 days ago

raw

history blame contribute delete

5.37 kB

	"""One interrogation turn: prompt -> streamed model call -> deterministic director.

	Exactly one model call per turn. Spoken text streams out as it arrives; mechanics are
	decided afterwards by the director against ground truth.
	"""

	from __future__ import annotations

	import re
	from collections.abc import Iterator
	from dataclasses import dataclass

	from ..llm.backend import LLMBackend
	from ..llm.decoding import stream_turn
	from ..projections.suspect_brief import SuspectBrief
	from ..schemas.case import CaseFile
	from ..schemas.enums import Relevance
	from ..schemas.interrogation import InterrogationTurn
	from ..suspects.memory import buffer_text, ledger_text
	from ..suspects.persona import build_prompt
	from ..suspects.scrub import scrub_spoken
	from .director import Adjudication, adjudicate
	from .game_state import GameState
	from .relevance import assess_relevance
	from .state_update import apply_turn

	_SUSPECT_TEMPERATURE = 0.8

	# Deterministic in-character deflections, used as a backstop when the small model returns an
	# empty "spoken" field OR parrots a line it already used this session. Same pattern as the
	# confession backstop in scrub.py: the model authors the dialogue; canned lines only stand in
	# when its output is unusable, so the player never sees a blank or a duplicate reply.
	_PRESSED = (
	"I... I don't know what you want me to say.",
	"Don't look at me like that - I had nothing to do with this.",
	"You're putting words in my mouth.",
	"That doesn't prove a thing, and you know it.",
	"You can stare all you like - it wasn't me.",
	"I'm done answering that. I want a lawyer.",
	"You're twisting this. That is not what happened.",
	)
	_CALM = (
	"I've already told you everything I know.",
	"There's nothing more to it than that.",
	"I don't see what that has to do with me.",
	"You're reaching, Detective.",
	"Ask me something that actually matters.",
	"I wasn't anywhere near it, if that's what you're getting at.",
	"What else do you want me to say?",
	"That's all there is to tell.",
	)


	def _norm(s: str) -> str:
	return re.sub(r"[^a-z0-9 ]", " ", s.lower()).strip()


	def _too_similar(a: str, b: str) -> bool:
	"""True if two replies are effectively the same line (a small model parroting itself)."""
	na, nb = _norm(a), _norm(b)
	if not na or not nb:
	return False
	if na == nb:
	return True
	wa, wb = set(na.split()), set(nb.split())
	if not wa or not wb:
	return False
	return len(wa & wb) / min(len(wa), len(wb)) >= 0.7


	def _distinct_deflection(relevance: Relevance, recent: list[str], key: str) -> str:
	"""Pick an in-character deflection that is NOT a near-repeat of any recent answer."""
	pool = _PRESSED if relevance in (Relevance.BREAKING, Relevance.DIRECT) else _CALM
	start = sum(map(ord, key)) % len(pool)
	for i in range(len(pool)):
	cand = pool[(start + i) % len(pool)]
	if not any(_too_similar(cand, prev) for prev in recent):
	return cand
	return pool[start]


	@dataclass
	class FinalTurn:
	turn: InterrogationTurn
	adjudication: Adjudication
	state: GameState


	@dataclass
	class InterrogationEvent:
	spoken_delta: str = ""
	final: FinalTurn \| None = None


	def interrogate(
	backend: LLMBackend,
	case: CaseFile,
	brief: SuspectBrief,
	state: GameState,
	sus_id: str,
	question: str,
	presented_clue_id: str \| None = None,
	seed: int \| None = None,
	) -> Iterator[InterrogationEvent]:
	suspect = case.suspect(sus_id)
	sstate = state.state_for(sus_id)
	rel = assess_relevance(case, suspect, presented_clue_id)
	clue = case.clue(presented_clue_id) if presented_clue_id else None

	prompt = build_prompt(
	case=case,
	brief=brief,
	ledger=ledger_text(case, suspect, sstate),
	buffer=buffer_text(sstate),
	question=question,
	clue=clue,
	relevance=rel.relevance,
	)

	turn: InterrogationTurn \| None = None
	for event in stream_turn(backend, prompt, seed=seed, temperature=_SUSPECT_TEMPERATURE):
	if event.spoken_delta:
	yield InterrogationEvent(spoken_delta=event.spoken_delta)
	if event.final is not None:
	turn = event.final
	if turn is None:
	turn = InterrogationTurn.safe_default()

	# Backstop 1: no suspect line is ever allowed to confess - the win lives in the
	# director, not in the suspect's mouth. (Rebuilds the frozen turn with a clean line.)
	clean = scrub_spoken(turn.spoken, breaking=rel.relevance is Relevance.BREAKING)

	# Backstop 2: never show a blank line or a near-verbatim repeat of a recent answer - a 1.5B
	# model does both. Swap in a distinct in-character deflection (deterministic, no extra call).
	recent = [e.answer for e in sstate.transcript[-4:]]
	if not clean or not clean.strip() or any(_too_similar(clean, prev) for prev in recent):
	clean = _distinct_deflection(rel.relevance, recent, f"{sus_id}:{question}:{len(sstate.transcript)}")

	if clean != turn.spoken:
	turn = turn.model_copy(update={"spoken": clean})

	adj = adjudicate(case, suspect, sstate, turn, presented_clue_id)
	new_state = apply_turn(state, case, sus_id, question, turn, adj, presented_clue_id)
	yield InterrogationEvent(final=FinalTurn(turn=turn, adjudication=adj, state=new_state))