File size: 5,478 Bytes
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""Pre-bake a pool of full, model-authored cases for instant New Case serving.

Generation on a 2-vCPU Space takes ~1-2 minutes, so the player would otherwise stare at a
loading screen. This script runs the SAME in-process llama.cpp generator offline, keeps only
solvable, well-formed, "exciting" cases (distinct human suspects, a real motive, no
detective/officer suspects, a gender mix), assigns each a stable Case ID, and writes the full
sealed CaseFile JSON to ``cases/prebaked/``. The Space ships these and serves one instantly on
New Case while still running every interrogation live (and generating fresh cases when the
hardware allows). The pre-baked cases are authored by the local model - no cloud, still
Off-the-Grid.

    python scripts/prebake_cases.py [target_count] [start_seed]
"""

from __future__ import annotations

import re
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT / "src"))

from case_zero.config import get_settings  # noqa: E402
from case_zero.generator.pipeline import generate_case  # noqa: E402
from case_zero.llm.backend import make_backend  # noqa: E402
from case_zero.persistence.case_store import save_case  # noqa: E402
from case_zero.persistence.paths import prebaked_cases_dir  # noqa: E402
from case_zero.schemas.case import CaseFile  # noqa: E402

_BAD_ROLE = re.compile(
    r"\b(detective|officer|investigator|police|inspector|sergeant|constable|cop|agent)\b",
    re.IGNORECASE,
)
# Filler names a small model reaches for - they read as obviously fake and kill the mood.
_PLACEHOLDER_NAMES = {
    "john doe", "jane doe", "john smith", "jane smith", "joe bloggs", "richard roe",
    "mary major", "john q public", "tom johnson", "tom smith", "jack smith", "jane roe",
    "john brown", "bob smith", "foo bar", "first last", "name surname",
}
def _name_malformed(n: str) -> bool:
    # The model sometimes bakes a gender/age/label into the name: "John Smith, Male",
    # "Lara White, 45". Reject anything with a comma, digits, or a gender word.
    return bool("," in n or any(c.isdigit() for c in n) or re.search(r"\b(male|female)\b", n, re.I))


def _name_prefix(n: str) -> str:
    return " ".join(n.lower().replace(",", " ").split()[:2])


def _is_exciting(case: CaseFile) -> tuple[bool, str]:
    """Reject bland or malformed cases; keep ones that will read well to a judge."""
    title = (case.title or "").strip()
    if len(title) < 5:
        return False, "weak title"
    vname = case.victim.name.strip()
    if not vname or " " not in vname or _name_malformed(vname):
        return False, f"victim needs a clean full name: '{vname}'"
    names = [s.name.strip() for s in case.suspects]
    low = [n.lower() for n in names]
    if len(set(low)) != len(names):
        return False, "duplicate suspect names"
    if any(len(n) < 3 or " " not in n for n in names):
        return False, "suspect needs a full name"
    if any(_name_malformed(n) for n in names):
        return False, f"malformed name (comma/digit/gender): {names}"
    if any(_name_prefix(n) in _PLACEHOLDER_NAMES for n in names):
        return False, f"placeholder name: {names}"
    roles = [s.role.strip().lower() for s in case.suspects]
    if len(set(roles)) < len(roles):
        return False, "duplicate suspect roles"
    for s in case.suspects:
        if _BAD_ROLE.search(s.role) or _BAD_ROLE.search(s.name):
            return False, f"detective-like suspect: {s.name} ({s.role})"
    if not any((s.visual.gender or "").lower().startswith("f") for s in case.suspects):
        return False, "no female suspect"
    if not any((s.visual.gender or "").lower().startswith("m") for s in case.suspects):
        return False, "no male suspect"
    # A real culprit with a written motive and method makes the mystery land.
    if not (case.culprit.method_narrative or "").strip():
        return False, "no method narrative"
    return True, "ok"


def main() -> int:
    target = int(sys.argv[1]) if len(sys.argv) > 1 else 10
    start_seed = int(sys.argv[2]) if len(sys.argv) > 2 else 42000
    max_attempts = target * 4 + 8

    backend = make_backend(get_settings())
    out_dir = prebaked_cases_dir()
    out_dir.mkdir(parents=True, exist_ok=True)

    kept: list[CaseFile] = []
    seed = start_seed
    attempts = 0
    while len(kept) < target and attempts < max_attempts:
        attempts += 1
        try:
            result = generate_case(backend, seed=seed)
        except Exception as exc:  # generation hiccup - skip this seed
            print(f"[seed {seed}] generation error: {exc}")
            seed += 1
            continue
        seed += 1
        if not result.report.ok:
            print(f"[seed {seed - 1}] unsolvable, skipped")
            continue
        ok, why = _is_exciting(result.case)
        if not ok:
            print(f"[seed {seed - 1}] rejected: {why} -- '{result.case.title}'")
            continue
        case_id = f"CASE-{len(kept) + 1:04d}"
        case = result.case.model_copy(update={"case_id": case_id})
        save_case(case, out_dir / f"{case_id}.json")
        kept.append(case)
        cast = ", ".join(f"{s.name} ({s.visual.gender[:1].upper()})" for s in case.suspects)
        print(f"[KEEP {case_id}] '{case.title}' - victim {case.victim.name} | {cast}")

    print(f"\nDONE: kept {len(kept)}/{target} in {attempts} attempts -> {out_dir}")
    return 0 if kept else 1


if __name__ == "__main__":
    raise SystemExit(main())