case0 / src /case_zero /suspects /scrub.py
HusseinEid's picture
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55
"""Deterministic guard against a suspect confessing in dialogue.
A small model, when shown the breaking clue, will sometimes simply admit guilt
("you caught me", "it was me", "I hid the body"). That collapses the mystery - the
player should WIN by reasoning from evidence and accusing, never because the suspect
narrated their own downfall. The win condition lives in the deterministic director;
no suspect line is ever allowed to confess.
This is a backstop layered under a prompt that already forbids confessions: any spoken
sentence that reads as a self-incriminating admission is replaced, in character, with
a rattled or flat deflection. Ordinary denials ("I didn't do it", "I never went near
the office") pass through untouched.
"""
from __future__ import annotations
import re
# Self-incriminating admissions. Each pattern is an ADMISSION, not a denial - the
# negation guard below lets "I didn't kill anyone" / "I never touched it" pass.
_ADMIT = re.compile(
r"""\b(
i\s+(killed|murdered|stabbed|poisoned|strangled|shot|drowned|smothered)\b
| i\s+did\s+it\b
| i\s+planned\s+(it|the\s+\w+)
| i\s+committed\b
| i\s+confess\b
| i\s+am\s+(the\s+)?(killer|guilty|to\s+blame|responsible)
| i'?m\s+(the\s+)?(killer|guilty|to\s+blame|responsible)
| it\s+was\s+me\b
| i\s+hid\s+the\s+(body|weapon|knife|gun)
| caught\s+me\s+(red[\s-]?handed|trying|with|in\s+the|hiding|leaving|sneaking|taking)
| my\s+secret\s+is\s+out
)""",
re.IGNORECASE | re.VERBOSE,
)
# If a negation sits in the same sentence, treat it as a denial, not an admission.
_NEG = re.compile(
r"\b(didn'?t|did\s+not|never|wasn'?t|was\s+not|won'?t|would\s+n'?t|can'?t|"
r"could\s+n'?t|don'?t|do\s+not|no\s+one|nothing|not\s+me)\b",
re.IGNORECASE,
)
# In-character replacements. Rattled (when the breaking clue is on the table) vs. flat.
_DEFLECT_BREAK = (
"That doesn't prove a thing, and you know it.",
"You can look at me all you like - I had nothing to do with this.",
"I'm done answering that. I want to speak to someone.",
"You're twisting this. That is not what happened.",
"I don't have to explain that to you.",
)
_DEFLECT_CALM = (
"I've already told you everything I know.",
"I don't see what that has to do with me.",
"You're reaching, Detective.",
"Ask me something that actually matters.",
"I wasn't anywhere near it.",
)
# Meta-narration the small model sometimes leaks by echoing the "spoken" field's intent
# ("What I'm saying out loud is that...", "I say:", "My answer is..."). Stripped from the
# START of a line so only the actual spoken words remain.
_META = re.compile(
r"^\s*(?:"
r"what\s+i(?:'?m| am)\s+saying(?:\s+out\s+loud)?\s+is(?:\s+that)?\s+"
r"|what\s+i\s+(?:say|said|mean|meant)(?:\s+out\s+loud)?\s+is(?:\s+that)?\s+"
r"|out\s+loud[,:]?\s+i\s+say(?:\s+that)?\s+"
r"|i\s+(?:say|respond|reply|answer)(?:\s+out\s+loud)?[,:]\s+"
r"|my\s+(?:spoken\s+)?(?:words|response|reply|answer)\s+(?:is|are)[,:]?\s+(?:that\s+)?"
r"|spoken[,:]\s+"
r")",
re.IGNORECASE,
)
_SENTENCE_SPLIT = re.compile(r"(?<=[.!?])\s+")
def _strip_meta(sentence: str) -> str:
new = _META.sub("", sentence, count=1)
if new != sentence and new:
new = new[0].upper() + new[1:]
return new
def _is_confession(sentence: str) -> bool:
return bool(_ADMIT.search(sentence)) and not _NEG.search(sentence)
def _replacement(sentence: str, *, breaking: bool) -> str:
pool = _DEFLECT_BREAK if breaking else _DEFLECT_CALM
return pool[sum(map(ord, sentence)) % len(pool)]
def scrub_spoken(text: str, *, breaking: bool = False) -> str:
"""Return ``text`` with any confessing sentence replaced by an in-character
deflection. Non-confessing dialogue is returned unchanged."""
if not text or not text.strip():
return text
out: list[str] = []
for sentence in _SENTENCE_SPLIT.split(text.strip()):
s = _strip_meta(sentence)
if not s.strip():
continue
out.append(_replacement(s, breaking=breaking) if _is_confession(s) else s)
return " ".join(out).strip()