case0 / scripts /curate_prebaked.py
HusseinEid's picture
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55
raw
history blame
2.04 kB
"""Re-filter, title-case, and renumber the pre-baked pool so only clean, exciting cases ship.
Run after ``prebake_cases.py``. Drops any case that fails the (hardened) quality filter,
tidies the title's capitalisation, and rewrites the survivors as a contiguous CASE-0001..N.
No model needed - this only reads and rewrites JSON.
python scripts/curate_prebaked.py
"""
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT / "src"))
sys.path.insert(0, str(ROOT / "scripts"))
from prebake_cases import _is_exciting # noqa: E402
from case_zero.persistence.case_store import load_case, save_case # noqa: E402
from case_zero.persistence.paths import prebaked_cases_dir # noqa: E402
_SMALL = {"a", "an", "the", "of", "in", "at", "on", "and", "or", "to", "for", "with", "by", "from"}
def _titlecase(t: str) -> str:
words = t.split()
out = []
for i, w in enumerate(words):
lw = w.lower()
if i != 0 and lw in _SMALL:
out.append(lw)
else:
out.append(w[:1].upper() + w[1:] if w else w)
return " ".join(out)
def main() -> int:
d = prebaked_cases_dir()
files = sorted(d.glob("CASE-*.json"))
survivors = []
for p in files:
case = load_case(p)
ok, why = _is_exciting(case)
if not ok:
print(f"DROP {p.stem}: {why} -- '{case.title}'")
continue
survivors.append(case)
for p in files:
p.unlink()
for i, case in enumerate(survivors, 1):
cid = f"CASE-{i:04d}"
case = case.model_copy(update={"case_id": cid, "title": _titlecase(case.title)})
save_case(case, d / f"{cid}.json")
cast = ", ".join(f"{s.name} ({s.visual.gender[:1].upper()})" for s in case.suspects)
print(f"KEEP {cid}: '{case.title}' - victim {case.victim.name} | {cast}")
print(f"\nFINAL POOL: {len(survivors)} cases")
return 0
if __name__ == "__main__":
raise SystemExit(main())