case0 / src /case_zero /projections /suspect_brief.py
HusseinEid's picture
feat: multi-crime cases, scene+exhibit pixel art, background AI generation
80cd1f2 verified
"""SuspectBrief - the only view of the case an actor LLM ever receives.
A brief contains a suspect's OWN knowledge slice and nothing else: no global
solution, no other suspect's secrets, no ``is_culprit`` flag. Cross-suspect leakage
is therefore impossible by construction - a jailbreak can only ever surface what is
already in this suspect's own slice, and the win condition is decided elsewhere.
"""
from __future__ import annotations
from pydantic import BaseModel, ConfigDict
from ..schemas.case import CaseFile
from ..schemas.suspect import Suspect
def _minute_to_clock(minute: int) -> str:
return f"{minute // 60:02d}:{minute % 60:02d}"
class LieBrief(BaseModel):
model_config = ConfigDict(frozen=True)
lie_id: str
topic: str
claimed: str
fallback: str
class SuspectBrief(BaseModel):
model_config = ConfigDict(frozen=True)
sus_id: str
name: str
role: str
persona_summary: str
demeanour: str = ""
composure: float
aggression: float
evasiveness: float
tells: tuple[str, ...]
deception_skill: float
i_know: tuple[str, ...]
i_did: tuple[str, ...]
i_must_conceal: tuple[str, ...]
i_will_lie_about: tuple[LieBrief, ...]
def _facts_known(case: CaseFile, suspect: Suspect) -> tuple[str, ...]:
by_id = {f.fact_id: f.statement for f in case.facts}
return tuple(by_id[fid] for fid in suspect.knows_facts if fid in by_id)
def _whereabouts(case: CaseFile, suspect: Suspect) -> tuple[str, ...]:
loc_names = {loc.loc_id: loc.name for loc in case.setting.locations}
out: list[str] = []
for seg in suspect.true_whereabouts:
clock = f"{_minute_to_clock(seg.window.start_min)}-{_minute_to_clock(seg.window.end_min)}"
place = loc_names.get(seg.loc_id, seg.loc_id)
activity = seg.activity or "present"
out.append(f"{clock}: you were in {place} ({activity}).")
return tuple(out)
def _relationships(case: CaseFile, suspect: Suspect) -> tuple[str, ...]:
out: list[str] = []
names = {s.sus_id: s.name for s in case.suspects}
for rel in case.relationships:
if not rel.known_publicly:
continue
if rel.from_sus_id == suspect.sus_id and rel.to_sus_id in names:
out.append(f"You are {rel.kind} toward {names[rel.to_sus_id]}.")
return tuple(out)
def build_suspect_brief(case: CaseFile, suspect: Suspect) -> SuspectBrief:
"""Project a suspect's private knowledge. The culprit truthfully knows their own
actions (so they can roleplay concealment); innocents know their innocent truth."""
i_know = _facts_known(case, suspect) + _whereabouts(case, suspect) + _relationships(case, suspect)
i_did: list[str] = [seg.activity for seg in suspect.true_whereabouts if seg.activity]
i_must_conceal: list[str] = list(suspect.secrets)
if suspect.is_culprit:
from ..generator.crime_profiles import profile_for
locs = {loc.loc_id: loc.name for loc in case.setting.locations}
deed = profile_for(case.crime_kind).brief_deed.format(
victim=case.victim.name, instrument=case.weapon.name,
room=locs.get(case.victim.found_at_loc_id, "the scene"),
)
i_did.append(case.culprit.method_narrative)
i_must_conceal.append(f"{deed} You must never admit this; deflect and deny.")
i_must_conceal.append(
f"Your alibi is a lie: you claim {case.culprit.alibi_lie.claimed_loc_id} "
f"but were actually at {case.culprit.alibi_lie.actual_loc_id}."
)
lies = tuple(
LieBrief(lie_id=lie.lie_id, topic=lie.topic, claimed=lie.claimed, fallback=lie.fallback)
for lie in suspect.anchored_lies
)
return SuspectBrief(
sus_id=suspect.sus_id,
name=suspect.name,
role=suspect.role,
persona_summary=suspect.persona_summary,
demeanour=suspect.demeanour,
composure=suspect.personality.composure,
aggression=suspect.personality.aggression,
evasiveness=suspect.personality.evasiveness,
tells=suspect.tells,
deception_skill=round(0.5 * suspect.personality.evasiveness + 0.5 * suspect.personality.composure, 3),
i_know=tuple(i_know),
i_did=tuple(i_did),
i_must_conceal=tuple(i_must_conceal),
i_will_lie_about=lies,
)