her / tools /phase1_gate.py
geekwrestler's picture
Squash history (purge pre-scrub demo session blobs)
5f43c7d
#!/usr/bin/env python3
"""Phase 1 gate — falsification test for the jsonl-loader.
Loads the fixture via engine.loaders.jsonl_loader.load() and asserts the
CORRECTED regression oracle (CLAUDE.md / verified facts). NO model. Deterministic.
Oracle (corrected, START-anchored turn rule):
turns == 16 (human == 14, system == 2, system indices == [11, 14])
total tool calls == 239
session output == 1_555_504
session fresh input == 52_295
session cache_read == 213_300_315
session cache_create == 2_508_984
Never edit the oracle to match the parser — fix the parser. Run:
python3 tools/phase1_gate.py
"""
from __future__ import annotations
import sys
from pathlib import Path
# Make the repo root importable so `engine` resolves regardless of CWD.
REPO = Path(__file__).resolve().parent.parent
if str(REPO) not in sys.path:
sys.path.insert(0, str(REPO))
from engine.loaders.jsonl_loader import load # noqa: E402
FIXTURE = REPO / "fixtures" / "sample-session.jsonl"
# --- the corrected oracle (ground truth) ---------------------------------- #
ORACLE = {
"turns": 16,
"human": 14,
"system": 2,
"system_indices": [11, 14],
"tools": 239,
"out": 1_555_504,
"in": 52_295,
"cacheRead": 213_300_315,
"cacheCreate": 2_508_984,
}
def _line(label: str, got, want) -> bool:
ok = got == want
flag = "OK " if ok else "DIFF"
print(f" [{flag}] {label:<22} got={got!r:>14} want={want!r}")
return ok
def main() -> int:
if not FIXTURE.exists():
print(f"FAIL — fixture missing: {FIXTURE}")
return 1
out = load(str(FIXTURE))
turns = out["turns"]
session = out["session"]
# measured
n_turns = len(turns)
humans = [t for t in turns if t.origin == "human"]
systems = [t for t in turns if t.origin == "system"]
system_indices = [t.i for t in systems]
total_tools = sum(len(t.tools) for t in turns)
sess_in = sum(t.tokens.in_ for t in turns)
sess_out = sum(t.tokens.out for t in turns)
sess_cr = sum(t.tokens.cacheRead for t in turns)
sess_cc = sum(t.tokens.cacheCreate for t in turns)
print("Her · हेर — Phase 1 gate (jsonl-loader)")
print("=" * 60)
print(f"fixture : {FIXTURE}")
print(f"session : cwd={session.get('cwd')!r}")
print(f" sessionId={session.get('sessionId')!r}")
print(f" gitBranch={session.get('gitBranch')!r} version={session.get('version')!r}")
print("-" * 60)
checks = [
_line("turns", n_turns, ORACLE["turns"]),
_line("human turns", len(humans), ORACLE["human"]),
_line("system turns", len(systems), ORACLE["system"]),
_line("system indices", system_indices, ORACLE["system_indices"]),
_line("total tools", total_tools, ORACLE["tools"]),
_line("session output", sess_out, ORACLE["out"]),
_line("session fresh input", sess_in, ORACLE["in"]),
_line("session cache_read", sess_cr, ORACLE["cacheRead"]),
_line("session cache_create", sess_cc, ORACLE["cacheCreate"]),
]
print("-" * 60)
ok = all(checks)
print("GATE:", "PASS" if ok else "FAIL")
return 0 if ok else 1
if __name__ == "__main__":
sys.exit(main())