Spaces:
Running
Running
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55 | """Pre-bake a pool of full, model-authored cases for instant New Case serving. | |
| Generation on a 2-vCPU Space takes ~1-2 minutes, so the player would otherwise stare at a | |
| loading screen. This script runs the SAME in-process llama.cpp generator offline, keeps only | |
| solvable, well-formed, "exciting" cases (distinct human suspects, a real motive, no | |
| detective/officer suspects, a gender mix), assigns each a stable Case ID, and writes the full | |
| sealed CaseFile JSON to ``cases/prebaked/``. The Space ships these and serves one instantly on | |
| New Case while still running every interrogation live (and generating fresh cases when the | |
| hardware allows). The pre-baked cases are authored by the local model - no cloud, still | |
| Off-the-Grid. | |
| python scripts/prebake_cases.py [target_count] [start_seed] | |
| """ | |
| from __future__ import annotations | |
| import re | |
| import sys | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parent.parent | |
| sys.path.insert(0, str(ROOT / "src")) | |
| from case_zero.config import get_settings # noqa: E402 | |
| from case_zero.generator.pipeline import generate_case # noqa: E402 | |
| from case_zero.llm.backend import make_backend # noqa: E402 | |
| from case_zero.persistence.case_store import save_case # noqa: E402 | |
| from case_zero.persistence.paths import prebaked_cases_dir # noqa: E402 | |
| from case_zero.schemas.case import CaseFile # noqa: E402 | |
| _BAD_ROLE = re.compile( | |
| r"\b(detective|officer|investigator|police|inspector|sergeant|constable|cop|agent)\b", | |
| re.IGNORECASE, | |
| ) | |
| # Filler names a small model reaches for - they read as obviously fake and kill the mood. | |
| _PLACEHOLDER_NAMES = { | |
| "john doe", "jane doe", "john smith", "jane smith", "joe bloggs", "richard roe", | |
| "mary major", "john q public", "tom johnson", "tom smith", "jack smith", "jane roe", | |
| "john brown", "bob smith", "foo bar", "first last", "name surname", | |
| } | |
| def _name_malformed(n: str) -> bool: | |
| # The model sometimes bakes a gender/age/label into the name: "John Smith, Male", | |
| # "Lara White, 45". Reject anything with a comma, digits, or a gender word. | |
| return bool("," in n or any(c.isdigit() for c in n) or re.search(r"\b(male|female)\b", n, re.I)) | |
| def _name_prefix(n: str) -> str: | |
| return " ".join(n.lower().replace(",", " ").split()[:2]) | |
| def _is_exciting(case: CaseFile) -> tuple[bool, str]: | |
| """Reject bland or malformed cases; keep ones that will read well to a judge.""" | |
| title = (case.title or "").strip() | |
| if len(title) < 5: | |
| return False, "weak title" | |
| vname = case.victim.name.strip() | |
| if not vname or " " not in vname or _name_malformed(vname): | |
| return False, f"victim needs a clean full name: '{vname}'" | |
| names = [s.name.strip() for s in case.suspects] | |
| low = [n.lower() for n in names] | |
| if len(set(low)) != len(names): | |
| return False, "duplicate suspect names" | |
| if any(len(n) < 3 or " " not in n for n in names): | |
| return False, "suspect needs a full name" | |
| if any(_name_malformed(n) for n in names): | |
| return False, f"malformed name (comma/digit/gender): {names}" | |
| if any(_name_prefix(n) in _PLACEHOLDER_NAMES for n in names): | |
| return False, f"placeholder name: {names}" | |
| roles = [s.role.strip().lower() for s in case.suspects] | |
| if len(set(roles)) < len(roles): | |
| return False, "duplicate suspect roles" | |
| for s in case.suspects: | |
| if _BAD_ROLE.search(s.role) or _BAD_ROLE.search(s.name): | |
| return False, f"detective-like suspect: {s.name} ({s.role})" | |
| if not any((s.visual.gender or "").lower().startswith("f") for s in case.suspects): | |
| return False, "no female suspect" | |
| if not any((s.visual.gender or "").lower().startswith("m") for s in case.suspects): | |
| return False, "no male suspect" | |
| # A real culprit with a written motive and method makes the mystery land. | |
| if not (case.culprit.method_narrative or "").strip(): | |
| return False, "no method narrative" | |
| return True, "ok" | |
| def main() -> int: | |
| target = int(sys.argv[1]) if len(sys.argv) > 1 else 10 | |
| start_seed = int(sys.argv[2]) if len(sys.argv) > 2 else 42000 | |
| max_attempts = target * 4 + 8 | |
| backend = make_backend(get_settings()) | |
| out_dir = prebaked_cases_dir() | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| kept: list[CaseFile] = [] | |
| seed = start_seed | |
| attempts = 0 | |
| while len(kept) < target and attempts < max_attempts: | |
| attempts += 1 | |
| try: | |
| result = generate_case(backend, seed=seed) | |
| except Exception as exc: # generation hiccup - skip this seed | |
| print(f"[seed {seed}] generation error: {exc}") | |
| seed += 1 | |
| continue | |
| seed += 1 | |
| if not result.report.ok: | |
| print(f"[seed {seed - 1}] unsolvable, skipped") | |
| continue | |
| ok, why = _is_exciting(result.case) | |
| if not ok: | |
| print(f"[seed {seed - 1}] rejected: {why} -- '{result.case.title}'") | |
| continue | |
| case_id = f"CASE-{len(kept) + 1:04d}" | |
| case = result.case.model_copy(update={"case_id": case_id}) | |
| save_case(case, out_dir / f"{case_id}.json") | |
| kept.append(case) | |
| cast = ", ".join(f"{s.name} ({s.visual.gender[:1].upper()})" for s in case.suspects) | |
| print(f"[KEEP {case_id}] '{case.title}' - victim {case.victim.name} | {cast}") | |
| print(f"\nDONE: kept {len(kept)}/{target} in {attempts} attempts -> {out_dir}") | |
| return 0 if kept else 1 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |