Spaces:

build-small-hackathon
/

case0

Running

case0 / src /case_zero /suspects /scrub.py

Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)

414dc55 3 days ago

raw

history blame contribute delete

4.17 kB

	"""Deterministic guard against a suspect confessing in dialogue.

	A small model, when shown the breaking clue, will sometimes simply admit guilt
	("you caught me", "it was me", "I hid the body"). That collapses the mystery - the
	player should WIN by reasoning from evidence and accusing, never because the suspect
	narrated their own downfall. The win condition lives in the deterministic director;
	no suspect line is ever allowed to confess.

	This is a backstop layered under a prompt that already forbids confessions: any spoken
	sentence that reads as a self-incriminating admission is replaced, in character, with
	a rattled or flat deflection. Ordinary denials ("I didn't do it", "I never went near
	the office") pass through untouched.
	"""

	from __future__ import annotations

	import re

	# Self-incriminating admissions. Each pattern is an ADMISSION, not a denial - the
	# negation guard below lets "I didn't kill anyone" / "I never touched it" pass.
	_ADMIT = re.compile(
	r"""\b(
	i\s+(killed\|murdered\|stabbed\|poisoned\|strangled\|shot\|drowned\|smothered)\b
	\| i\s+did\s+it\b
	\| i\s+planned\s+(it\|the\s+\w+)
	\| i\s+committed\b
	\| i\s+confess\b
	\| i\s+am\s+(the\s+)?(killer\|guilty\|to\s+blame\|responsible)
	\| i'?m\s+(the\s+)?(killer\|guilty\|to\s+blame\|responsible)
	\| it\s+was\s+me\b
	\| i\s+hid\s+the\s+(body\|weapon\|knife\|gun)
	\| caught\s+me\s+(red[\s-]?handed\|trying\|with\|in\s+the\|hiding\|leaving\|sneaking\|taking)
	\| my\s+secret\s+is\s+out
	)""",
	re.IGNORECASE \| re.VERBOSE,
	)

	# If a negation sits in the same sentence, treat it as a denial, not an admission.
	_NEG = re.compile(
	r"\b(didn'?t\|did\s+not\|never\|wasn'?t\|was\s+not\|won'?t\|would\s+n'?t\|can'?t\|"
	r"could\s+n'?t\|don'?t\|do\s+not\|no\s+one\|nothing\|not\s+me)\b",
	re.IGNORECASE,
	)

	# In-character replacements. Rattled (when the breaking clue is on the table) vs. flat.
	_DEFLECT_BREAK = (
	"That doesn't prove a thing, and you know it.",
	"You can look at me all you like - I had nothing to do with this.",
	"I'm done answering that. I want to speak to someone.",
	"You're twisting this. That is not what happened.",
	"I don't have to explain that to you.",
	)
	_DEFLECT_CALM = (
	"I've already told you everything I know.",
	"I don't see what that has to do with me.",
	"You're reaching, Detective.",
	"Ask me something that actually matters.",
	"I wasn't anywhere near it.",
	)

	# Meta-narration the small model sometimes leaks by echoing the "spoken" field's intent
	# ("What I'm saying out loud is that...", "I say:", "My answer is..."). Stripped from the
	# START of a line so only the actual spoken words remain.
	_META = re.compile(
	r"^\s*(?:"
	r"what\s+i(?:'?m\| am)\s+saying(?:\s+out\s+loud)?\s+is(?:\s+that)?\s+"
	r"\|what\s+i\s+(?:say\|said\|mean\|meant)(?:\s+out\s+loud)?\s+is(?:\s+that)?\s+"
	r"\|out\s+loud[,:]?\s+i\s+say(?:\s+that)?\s+"
	r"\|i\s+(?:say\|respond\|reply\|answer)(?:\s+out\s+loud)?[,:]\s+"
	r"\|my\s+(?:spoken\s+)?(?:words\|response\|reply\|answer)\s+(?:is\|are)[,:]?\s+(?:that\s+)?"
	r"\|spoken[,:]\s+"
	r")",
	re.IGNORECASE,
	)

	_SENTENCE_SPLIT = re.compile(r"(?<=[.!?])\s+")


	def _strip_meta(sentence: str) -> str:
	new = _META.sub("", sentence, count=1)
	if new != sentence and new:
	new = new[0].upper() + new[1:]
	return new


	def _is_confession(sentence: str) -> bool:
	return bool(_ADMIT.search(sentence)) and not _NEG.search(sentence)


	def _replacement(sentence: str, *, breaking: bool) -> str:
	pool = _DEFLECT_BREAK if breaking else _DEFLECT_CALM
	return pool[sum(map(ord, sentence)) % len(pool)]


	def scrub_spoken(text: str, *, breaking: bool = False) -> str:
	"""Return ``text`` with any confessing sentence replaced by an in-character
	deflection. Non-confessing dialogue is returned unchanged."""
	if not text or not text.strip():
	return text
	out: list[str] = []
	for sentence in _SENTENCE_SPLIT.split(text.strip()):
	s = _strip_meta(sentence)
	if not s.strip():
	continue
	out.append(_replacement(s, breaking=breaking) if _is_confession(s) else s)
	return " ".join(out).strip()