"""Pre-bake a pool of full, model-authored cases for instant New Case serving. Generation on a 2-vCPU Space takes ~1-2 minutes, so the player would otherwise stare at a loading screen. This script runs the SAME in-process llama.cpp generator offline, keeps only solvable, well-formed, "exciting" cases (distinct human suspects, a real motive, no detective/officer suspects, a gender mix), assigns each a stable Case ID, and writes the full sealed CaseFile JSON to ``cases/prebaked/``. The Space ships these and serves one instantly on New Case while still running every interrogation live (and generating fresh cases when the hardware allows). The pre-baked cases are authored by the local model - no cloud, still Off-the-Grid. python scripts/prebake_cases.py [target_count] [start_seed] """ from __future__ import annotations import re import sys from pathlib import Path ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT / "src")) from case_zero.config import get_settings # noqa: E402 from case_zero.generator.pipeline import generate_case # noqa: E402 from case_zero.llm.backend import make_backend # noqa: E402 from case_zero.persistence.case_store import save_case # noqa: E402 from case_zero.persistence.paths import prebaked_cases_dir # noqa: E402 from case_zero.schemas.case import CaseFile # noqa: E402 _BAD_ROLE = re.compile( r"\b(detective|officer|investigator|police|inspector|sergeant|constable|cop|agent)\b", re.IGNORECASE, ) # Filler names a small model reaches for - they read as obviously fake and kill the mood. _PLACEHOLDER_NAMES = { "john doe", "jane doe", "john smith", "jane smith", "joe bloggs", "richard roe", "mary major", "john q public", "tom johnson", "tom smith", "jack smith", "jane roe", "john brown", "bob smith", "foo bar", "first last", "name surname", } def _name_malformed(n: str) -> bool: # The model sometimes bakes a gender/age/label into the name: "John Smith, Male", # "Lara White, 45". Reject anything with a comma, digits, or a gender word. return bool("," in n or any(c.isdigit() for c in n) or re.search(r"\b(male|female)\b", n, re.I)) def _name_prefix(n: str) -> str: return " ".join(n.lower().replace(",", " ").split()[:2]) def _is_exciting(case: CaseFile) -> tuple[bool, str]: """Reject bland or malformed cases; keep ones that will read well to a judge.""" title = (case.title or "").strip() if len(title) < 5: return False, "weak title" vname = case.victim.name.strip() if not vname or " " not in vname or _name_malformed(vname): return False, f"victim needs a clean full name: '{vname}'" names = [s.name.strip() for s in case.suspects] low = [n.lower() for n in names] if len(set(low)) != len(names): return False, "duplicate suspect names" if any(len(n) < 3 or " " not in n for n in names): return False, "suspect needs a full name" if any(_name_malformed(n) for n in names): return False, f"malformed name (comma/digit/gender): {names}" if any(_name_prefix(n) in _PLACEHOLDER_NAMES for n in names): return False, f"placeholder name: {names}" roles = [s.role.strip().lower() for s in case.suspects] if len(set(roles)) < len(roles): return False, "duplicate suspect roles" for s in case.suspects: if _BAD_ROLE.search(s.role) or _BAD_ROLE.search(s.name): return False, f"detective-like suspect: {s.name} ({s.role})" if not any((s.visual.gender or "").lower().startswith("f") for s in case.suspects): return False, "no female suspect" if not any((s.visual.gender or "").lower().startswith("m") for s in case.suspects): return False, "no male suspect" # A real culprit with a written motive and method makes the mystery land. if not (case.culprit.method_narrative or "").strip(): return False, "no method narrative" return True, "ok" def main() -> int: target = int(sys.argv[1]) if len(sys.argv) > 1 else 10 start_seed = int(sys.argv[2]) if len(sys.argv) > 2 else 42000 max_attempts = target * 4 + 8 backend = make_backend(get_settings()) out_dir = prebaked_cases_dir() out_dir.mkdir(parents=True, exist_ok=True) kept: list[CaseFile] = [] seed = start_seed attempts = 0 while len(kept) < target and attempts < max_attempts: attempts += 1 try: result = generate_case(backend, seed=seed) except Exception as exc: # generation hiccup - skip this seed print(f"[seed {seed}] generation error: {exc}") seed += 1 continue seed += 1 if not result.report.ok: print(f"[seed {seed - 1}] unsolvable, skipped") continue ok, why = _is_exciting(result.case) if not ok: print(f"[seed {seed - 1}] rejected: {why} -- '{result.case.title}'") continue case_id = f"CASE-{len(kept) + 1:04d}" case = result.case.model_copy(update={"case_id": case_id}) save_case(case, out_dir / f"{case_id}.json") kept.append(case) cast = ", ".join(f"{s.name} ({s.visual.gender[:1].upper()})" for s in case.suspects) print(f"[KEEP {case_id}] '{case.title}' - victim {case.victim.name} | {cast}") print(f"\nDONE: kept {len(kept)}/{target} in {attempts} attempts -> {out_dir}") return 0 if kept else 1 if __name__ == "__main__": raise SystemExit(main())