File size: 2,038 Bytes
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""Re-filter, title-case, and renumber the pre-baked pool so only clean, exciting cases ship.

Run after ``prebake_cases.py``. Drops any case that fails the (hardened) quality filter,
tidies the title's capitalisation, and rewrites the survivors as a contiguous CASE-0001..N.
No model needed - this only reads and rewrites JSON.

    python scripts/curate_prebaked.py
"""

from __future__ import annotations

import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT / "src"))
sys.path.insert(0, str(ROOT / "scripts"))

from prebake_cases import _is_exciting  # noqa: E402

from case_zero.persistence.case_store import load_case, save_case  # noqa: E402
from case_zero.persistence.paths import prebaked_cases_dir  # noqa: E402

_SMALL = {"a", "an", "the", "of", "in", "at", "on", "and", "or", "to", "for", "with", "by", "from"}


def _titlecase(t: str) -> str:
    words = t.split()
    out = []
    for i, w in enumerate(words):
        lw = w.lower()
        if i != 0 and lw in _SMALL:
            out.append(lw)
        else:
            out.append(w[:1].upper() + w[1:] if w else w)
    return " ".join(out)


def main() -> int:
    d = prebaked_cases_dir()
    files = sorted(d.glob("CASE-*.json"))
    survivors = []
    for p in files:
        case = load_case(p)
        ok, why = _is_exciting(case)
        if not ok:
            print(f"DROP {p.stem}: {why} -- '{case.title}'")
            continue
        survivors.append(case)

    for p in files:
        p.unlink()
    for i, case in enumerate(survivors, 1):
        cid = f"CASE-{i:04d}"
        case = case.model_copy(update={"case_id": cid, "title": _titlecase(case.title)})
        save_case(case, d / f"{cid}.json")
        cast = ", ".join(f"{s.name} ({s.visual.gender[:1].upper()})" for s in case.suspects)
        print(f"KEEP {cid}: '{case.title}' - victim {case.victim.name} | {cast}")

    print(f"\nFINAL POOL: {len(survivors)} cases")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())