Spaces:

build-small-hackathon
/

case0

Running

File size: 4,492 Bytes

"""Deterministic guard against a suspect confessing in dialogue.

A small model, when shown the breaking clue, will sometimes simply admit guilt
("you caught me", "it was me", "I hid the body"). That collapses the mystery - the
player should WIN by reasoning from evidence and accusing, never because the suspect
narrated their own downfall. The win condition lives in the deterministic director;
no suspect line is ever allowed to confess.

This is a backstop layered under a prompt that already forbids confessions: any spoken
sentence that reads as a self-incriminating admission is replaced, in character, with
a rattled or flat deflection. Ordinary denials ("I didn't do it", "I never went near
the office") pass through untouched.
"""

from __future__ import annotations

import re

# Self-incriminating admissions. Each pattern is an ADMISSION, not a denial - the
# negation guard below lets "I didn't kill anyone" / "I never touched it" pass.
_ADMIT = re.compile(
    r"""\b(
        i\s+(killed|murdered|stabbed|poisoned|strangled|shot|drowned|smothered)\b
      | i\s+(stole|robbed|forged|defrauded|swindled|blackmailed|extorted|torched|abducted|kidnapped)\b
      | i\s+(set|started|lit)\s+the\s+fire\b
      | i\s+took\s+(it|him|her|them|the\s+\w+)\b
      | i\s+did\s+it\b
      | i\s+planned\s+(it|the\s+\w+)
      | i\s+committed\b
      | i\s+confess\b
      | i\s+am\s+(the\s+)?(killer|thief|arsonist|blackmailer|fraudster|abductor|guilty|to\s+blame|responsible)
      | i'?m\s+(the\s+)?(killer|thief|arsonist|blackmailer|fraudster|abductor|guilty|to\s+blame|responsible)
      | it\s+was\s+me\b
      | i\s+hid\s+the\s+(body|weapon|knife|gun|jewels?|money|ledger|letters?)
      | caught\s+me\s+(red[\s-]?handed|trying|with|in\s+the|hiding|leaving|sneaking|taking)
      | my\s+secret\s+is\s+out
    )""",
    re.IGNORECASE | re.VERBOSE,
)

# If a negation sits in the same sentence, treat it as a denial, not an admission.
_NEG = re.compile(
    r"\b(didn'?t|did\s+not|never|wasn'?t|was\s+not|won'?t|would\s+n'?t|can'?t|"
    r"could\s+n'?t|don'?t|do\s+not|no\s+one|nothing|not\s+me)\b",
    re.IGNORECASE,
)

# In-character replacements. Rattled (when the breaking clue is on the table) vs. flat.
_DEFLECT_BREAK = (
    "That doesn't prove a thing, and you know it.",
    "You can look at me all you like - I had nothing to do with this.",
    "I'm done answering that. I want to speak to someone.",
    "You're twisting this. That is not what happened.",
    "I don't have to explain that to you.",
)
_DEFLECT_CALM = (
    "I've already told you everything I know.",
    "I don't see what that has to do with me.",
    "You're reaching, Detective.",
    "Ask me something that actually matters.",
    "I wasn't anywhere near it.",
)

# Meta-narration the small model sometimes leaks by echoing the "spoken" field's intent
# ("What I'm saying out loud is that...", "I say:", "My answer is..."). Stripped from the
# START of a line so only the actual spoken words remain.
_META = re.compile(
    r"^\s*(?:"
    r"what\s+i(?:'?m| am)\s+saying(?:\s+out\s+loud)?\s+is(?:\s+that)?\s+"
    r"|what\s+i\s+(?:say|said|mean|meant)(?:\s+out\s+loud)?\s+is(?:\s+that)?\s+"
    r"|out\s+loud[,:]?\s+i\s+say(?:\s+that)?\s+"
    r"|i\s+(?:say|respond|reply|answer)(?:\s+out\s+loud)?[,:]\s+"
    r"|my\s+(?:spoken\s+)?(?:words|response|reply|answer)\s+(?:is|are)[,:]?\s+(?:that\s+)?"
    r"|spoken[,:]\s+"
    r")",
    re.IGNORECASE,
)

_SENTENCE_SPLIT = re.compile(r"(?<=[.!?])\s+")


def _strip_meta(sentence: str) -> str:
    new = _META.sub("", sentence, count=1)
    if new != sentence and new:
        new = new[0].upper() + new[1:]
    return new


def _is_confession(sentence: str) -> bool:
    return bool(_ADMIT.search(sentence)) and not _NEG.search(sentence)


def _replacement(sentence: str, *, breaking: bool) -> str:
    pool = _DEFLECT_BREAK if breaking else _DEFLECT_CALM
    return pool[sum(map(ord, sentence)) % len(pool)]


def scrub_spoken(text: str, *, breaking: bool = False) -> str:
    """Return ``text`` with any confessing sentence replaced by an in-character
    deflection. Non-confessing dialogue is returned unchanged."""
    if not text or not text.strip():
        return text
    out: list[str] = []
    for sentence in _SENTENCE_SPLIT.split(text.strip()):
        s = _strip_meta(sentence)
        if not s.strip():
            continue
        out.append(_replacement(s, breaking=breaking) if _is_confession(s) else s)
    return " ".join(out).strip()