Spaces:
Running
Running
File size: 5,478 Bytes
414dc55 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | """Pre-bake a pool of full, model-authored cases for instant New Case serving.
Generation on a 2-vCPU Space takes ~1-2 minutes, so the player would otherwise stare at a
loading screen. This script runs the SAME in-process llama.cpp generator offline, keeps only
solvable, well-formed, "exciting" cases (distinct human suspects, a real motive, no
detective/officer suspects, a gender mix), assigns each a stable Case ID, and writes the full
sealed CaseFile JSON to ``cases/prebaked/``. The Space ships these and serves one instantly on
New Case while still running every interrogation live (and generating fresh cases when the
hardware allows). The pre-baked cases are authored by the local model - no cloud, still
Off-the-Grid.
python scripts/prebake_cases.py [target_count] [start_seed]
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT / "src"))
from case_zero.config import get_settings # noqa: E402
from case_zero.generator.pipeline import generate_case # noqa: E402
from case_zero.llm.backend import make_backend # noqa: E402
from case_zero.persistence.case_store import save_case # noqa: E402
from case_zero.persistence.paths import prebaked_cases_dir # noqa: E402
from case_zero.schemas.case import CaseFile # noqa: E402
_BAD_ROLE = re.compile(
r"\b(detective|officer|investigator|police|inspector|sergeant|constable|cop|agent)\b",
re.IGNORECASE,
)
# Filler names a small model reaches for - they read as obviously fake and kill the mood.
_PLACEHOLDER_NAMES = {
"john doe", "jane doe", "john smith", "jane smith", "joe bloggs", "richard roe",
"mary major", "john q public", "tom johnson", "tom smith", "jack smith", "jane roe",
"john brown", "bob smith", "foo bar", "first last", "name surname",
}
def _name_malformed(n: str) -> bool:
# The model sometimes bakes a gender/age/label into the name: "John Smith, Male",
# "Lara White, 45". Reject anything with a comma, digits, or a gender word.
return bool("," in n or any(c.isdigit() for c in n) or re.search(r"\b(male|female)\b", n, re.I))
def _name_prefix(n: str) -> str:
return " ".join(n.lower().replace(",", " ").split()[:2])
def _is_exciting(case: CaseFile) -> tuple[bool, str]:
"""Reject bland or malformed cases; keep ones that will read well to a judge."""
title = (case.title or "").strip()
if len(title) < 5:
return False, "weak title"
vname = case.victim.name.strip()
if not vname or " " not in vname or _name_malformed(vname):
return False, f"victim needs a clean full name: '{vname}'"
names = [s.name.strip() for s in case.suspects]
low = [n.lower() for n in names]
if len(set(low)) != len(names):
return False, "duplicate suspect names"
if any(len(n) < 3 or " " not in n for n in names):
return False, "suspect needs a full name"
if any(_name_malformed(n) for n in names):
return False, f"malformed name (comma/digit/gender): {names}"
if any(_name_prefix(n) in _PLACEHOLDER_NAMES for n in names):
return False, f"placeholder name: {names}"
roles = [s.role.strip().lower() for s in case.suspects]
if len(set(roles)) < len(roles):
return False, "duplicate suspect roles"
for s in case.suspects:
if _BAD_ROLE.search(s.role) or _BAD_ROLE.search(s.name):
return False, f"detective-like suspect: {s.name} ({s.role})"
if not any((s.visual.gender or "").lower().startswith("f") for s in case.suspects):
return False, "no female suspect"
if not any((s.visual.gender or "").lower().startswith("m") for s in case.suspects):
return False, "no male suspect"
# A real culprit with a written motive and method makes the mystery land.
if not (case.culprit.method_narrative or "").strip():
return False, "no method narrative"
return True, "ok"
def main() -> int:
target = int(sys.argv[1]) if len(sys.argv) > 1 else 10
start_seed = int(sys.argv[2]) if len(sys.argv) > 2 else 42000
max_attempts = target * 4 + 8
backend = make_backend(get_settings())
out_dir = prebaked_cases_dir()
out_dir.mkdir(parents=True, exist_ok=True)
kept: list[CaseFile] = []
seed = start_seed
attempts = 0
while len(kept) < target and attempts < max_attempts:
attempts += 1
try:
result = generate_case(backend, seed=seed)
except Exception as exc: # generation hiccup - skip this seed
print(f"[seed {seed}] generation error: {exc}")
seed += 1
continue
seed += 1
if not result.report.ok:
print(f"[seed {seed - 1}] unsolvable, skipped")
continue
ok, why = _is_exciting(result.case)
if not ok:
print(f"[seed {seed - 1}] rejected: {why} -- '{result.case.title}'")
continue
case_id = f"CASE-{len(kept) + 1:04d}"
case = result.case.model_copy(update={"case_id": case_id})
save_case(case, out_dir / f"{case_id}.json")
kept.append(case)
cast = ", ".join(f"{s.name} ({s.visual.gender[:1].upper()})" for s in case.suspects)
print(f"[KEEP {case_id}] '{case.title}' - victim {case.victim.name} | {cast}")
print(f"\nDONE: kept {len(kept)}/{target} in {attempts} attempts -> {out_dir}")
return 0 if kept else 1
if __name__ == "__main__":
raise SystemExit(main())
|