case0 / scripts /prebake_cases.py
HusseinEid's picture
feat: multi-crime cases, scene+exhibit pixel art, background AI generation
80cd1f2 verified
raw
history blame
7.18 kB
"""Pre-bake a pool of full, model-authored cases for instant New Case serving.
Generation on a 2-vCPU Space takes ~1-2 minutes, so the player would otherwise stare at a
loading screen. This script runs the SAME in-process llama.cpp generator offline, keeps only
solvable, well-formed, "exciting" cases (distinct human suspects, a real motive, no
detective/officer suspects, a gender mix), assigns each a stable Case ID, and writes the full
sealed CaseFile JSON to ``cases/prebaked/``. The Space ships these and serves one instantly on
New Case while still running every interrogation live (and generating fresh cases when the
hardware allows). The pre-baked cases are authored by the local model - no cloud, still
Off-the-Grid.
New cases are APPENDED after the existing pool (existing Case IDs keep working as share
links) and cycle through the crime kinds so the pool is not all murders.
python scripts/prebake_cases.py [target_count] [start_seed]
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT / "src"))
from case_zero.config import get_settings # noqa: E402
from case_zero.generator.pipeline import generate_case # noqa: E402
from case_zero.llm.backend import make_backend # noqa: E402
from case_zero.persistence.case_store import save_case # noqa: E402
from case_zero.persistence.paths import prebaked_cases_dir # noqa: E402
from case_zero.schemas.case import CaseFile, GenerationKnobs # noqa: E402
from case_zero.schemas.enums import CrimeKind # noqa: E402
# The existing pool is homicide-heavy, so new bakes lean into the other kinds first.
_KIND_PLAN: tuple[CrimeKind, ...] = (
CrimeKind.THEFT, CrimeKind.BLACKMAIL, CrimeKind.ARSON, CrimeKind.MISSING,
CrimeKind.FRAUD, CrimeKind.THEFT, CrimeKind.HOMICIDE, CrimeKind.MISSING,
CrimeKind.ARSON, CrimeKind.FRAUD, CrimeKind.HOMICIDE, CrimeKind.BLACKMAIL,
)
_SMALL = {"a", "an", "and", "at", "but", "by", "for", "in", "of", "on", "or", "the", "to"}
def _titlecase(raw: str) -> str:
words = (raw or "").strip().split()
out = []
for i, w in enumerate(words):
lw = w.lower()
out.append(lw if (i not in (0, len(words) - 1) and lw in _SMALL) else lw.capitalize())
return " ".join(out)
_BAD_ROLE = re.compile(
r"\b(detective|officer|investigator|police|inspector|sergeant|constable|cop|agent)\b",
re.IGNORECASE,
)
# Filler names a small model reaches for - they read as obviously fake and kill the mood.
_PLACEHOLDER_NAMES = {
"john doe", "jane doe", "john smith", "jane smith", "joe bloggs", "richard roe",
"mary major", "john q public", "tom johnson", "tom smith", "jack smith", "jane roe",
"john brown", "bob smith", "foo bar", "first last", "name surname",
}
# A "name" that is really a role description ("Rival Curator", "Business Partner").
_ROLE_AS_NAME = re.compile(
r"\b(rival|partner|business|curator|servant|butler|maid|cousin|nephew|niece|heir|"
r"the\s|guest|stranger|visitor|neighbou?r|colleague|assistant|clerk|owner|manager)\b",
re.IGNORECASE,
)
def _name_malformed(n: str) -> bool:
# The model sometimes bakes a gender/age/label into the name: "John Smith, Male",
# "Lara White, 45" - or hands back a role instead of a name ("Rival Curator").
return bool("," in n or any(c.isdigit() for c in n)
or re.search(r"\b(male|female)\b", n, re.I) or _ROLE_AS_NAME.search(n))
def _name_prefix(n: str) -> str:
return " ".join(n.lower().replace(",", " ").split()[:2])
def _is_exciting(case: CaseFile) -> tuple[bool, str]:
"""Reject bland or malformed cases; keep ones that will read well to a judge."""
title = (case.title or "").strip()
if len(title) < 5:
return False, "weak title"
vname = case.victim.name.strip()
if not vname or " " not in vname or _name_malformed(vname):
return False, f"victim needs a clean full name: '{vname}'"
names = [s.name.strip() for s in case.suspects]
low = [n.lower() for n in names]
if len(set(low)) != len(names):
return False, "duplicate suspect names"
if any(len(n) < 3 or " " not in n for n in names):
return False, "suspect needs a full name"
if any(_name_malformed(n) for n in names):
return False, f"malformed name (comma/digit/gender): {names}"
if any(_name_prefix(n) in _PLACEHOLDER_NAMES for n in names):
return False, f"placeholder name: {names}"
roles = [s.role.strip().lower() for s in case.suspects]
if len(set(roles)) < len(roles):
return False, "duplicate suspect roles"
for s in case.suspects:
if _BAD_ROLE.search(s.role) or _BAD_ROLE.search(s.name):
return False, f"detective-like suspect: {s.name} ({s.role})"
if not any((s.visual.gender or "").lower().startswith("f") for s in case.suspects):
return False, "no female suspect"
if not any((s.visual.gender or "").lower().startswith("m") for s in case.suspects):
return False, "no male suspect"
# A real culprit with a written motive and method makes the mystery land.
if not (case.culprit.method_narrative or "").strip():
return False, "no method narrative"
return True, "ok"
def main() -> int:
target = int(sys.argv[1]) if len(sys.argv) > 1 else 8
start_seed = int(sys.argv[2]) if len(sys.argv) > 2 else 51000
max_attempts = target * 4 + 8
backend = make_backend(get_settings())
out_dir = prebaked_cases_dir()
out_dir.mkdir(parents=True, exist_ok=True)
existing = len(list(out_dir.glob("CASE-*.json")))
print(f"pool has {existing} cases; appending {target} new ones across crime kinds")
kept: list[CaseFile] = []
seed = start_seed
attempts = 0
while len(kept) < target and attempts < max_attempts:
attempts += 1
kind = _KIND_PLAN[len(kept) % len(_KIND_PLAN)]
try:
result = generate_case(backend, seed=seed,
knobs=GenerationKnobs(crime_kind=kind))
except Exception as exc: # generation hiccup - skip this seed
print(f"[seed {seed}] generation error: {exc}")
seed += 1
continue
seed += 1
if not result.report.ok:
print(f"[seed {seed - 1}] unsolvable, skipped")
continue
ok, why = _is_exciting(result.case)
if not ok:
print(f"[seed {seed - 1}] rejected: {why} -- '{result.case.title}'")
continue
case_id = f"CASE-{existing + len(kept) + 1:04d}"
case = result.case.model_copy(update={"case_id": case_id,
"title": _titlecase(result.case.title)})
save_case(case, out_dir / f"{case_id}.json")
kept.append(case)
cast = ", ".join(f"{s.name} ({s.visual.gender[:1].upper()})" for s in case.suspects)
print(f"[KEEP {case_id}] ({kind.value}) '{case.title}' - victim {case.victim.name} | {cast}")
print(f"\nDONE: kept {len(kept)}/{target} in {attempts} attempts -> {out_dir}")
return 0 if kept else 1
if __name__ == "__main__":
raise SystemExit(main())