Spaces:
Running on Zero
Running on Zero
| #!/usr/bin/env python3 | |
| """Phase 1 gate — falsification test for the jsonl-loader. | |
| Loads the fixture via engine.loaders.jsonl_loader.load() and asserts the | |
| CORRECTED regression oracle (CLAUDE.md / verified facts). NO model. Deterministic. | |
| Oracle (corrected, START-anchored turn rule): | |
| turns == 16 (human == 14, system == 2, system indices == [11, 14]) | |
| total tool calls == 239 | |
| session output == 1_555_504 | |
| session fresh input == 52_295 | |
| session cache_read == 213_300_315 | |
| session cache_create == 2_508_984 | |
| Never edit the oracle to match the parser — fix the parser. Run: | |
| python3 tools/phase1_gate.py | |
| """ | |
| from __future__ import annotations | |
| import sys | |
| from pathlib import Path | |
| # Make the repo root importable so `engine` resolves regardless of CWD. | |
| REPO = Path(__file__).resolve().parent.parent | |
| if str(REPO) not in sys.path: | |
| sys.path.insert(0, str(REPO)) | |
| from engine.loaders.jsonl_loader import load # noqa: E402 | |
| FIXTURE = REPO / "fixtures" / "sample-session.jsonl" | |
| # --- the corrected oracle (ground truth) ---------------------------------- # | |
| ORACLE = { | |
| "turns": 16, | |
| "human": 14, | |
| "system": 2, | |
| "system_indices": [11, 14], | |
| "tools": 239, | |
| "out": 1_555_504, | |
| "in": 52_295, | |
| "cacheRead": 213_300_315, | |
| "cacheCreate": 2_508_984, | |
| } | |
| def _line(label: str, got, want) -> bool: | |
| ok = got == want | |
| flag = "OK " if ok else "DIFF" | |
| print(f" [{flag}] {label:<22} got={got!r:>14} want={want!r}") | |
| return ok | |
| def main() -> int: | |
| if not FIXTURE.exists(): | |
| print(f"FAIL — fixture missing: {FIXTURE}") | |
| return 1 | |
| out = load(str(FIXTURE)) | |
| turns = out["turns"] | |
| session = out["session"] | |
| # measured | |
| n_turns = len(turns) | |
| humans = [t for t in turns if t.origin == "human"] | |
| systems = [t for t in turns if t.origin == "system"] | |
| system_indices = [t.i for t in systems] | |
| total_tools = sum(len(t.tools) for t in turns) | |
| sess_in = sum(t.tokens.in_ for t in turns) | |
| sess_out = sum(t.tokens.out for t in turns) | |
| sess_cr = sum(t.tokens.cacheRead for t in turns) | |
| sess_cc = sum(t.tokens.cacheCreate for t in turns) | |
| print("Her · हेर — Phase 1 gate (jsonl-loader)") | |
| print("=" * 60) | |
| print(f"fixture : {FIXTURE}") | |
| print(f"session : cwd={session.get('cwd')!r}") | |
| print(f" sessionId={session.get('sessionId')!r}") | |
| print(f" gitBranch={session.get('gitBranch')!r} version={session.get('version')!r}") | |
| print("-" * 60) | |
| checks = [ | |
| _line("turns", n_turns, ORACLE["turns"]), | |
| _line("human turns", len(humans), ORACLE["human"]), | |
| _line("system turns", len(systems), ORACLE["system"]), | |
| _line("system indices", system_indices, ORACLE["system_indices"]), | |
| _line("total tools", total_tools, ORACLE["tools"]), | |
| _line("session output", sess_out, ORACLE["out"]), | |
| _line("session fresh input", sess_in, ORACLE["in"]), | |
| _line("session cache_read", sess_cr, ORACLE["cacheRead"]), | |
| _line("session cache_create", sess_cc, ORACLE["cacheCreate"]), | |
| ] | |
| print("-" * 60) | |
| ok = all(checks) | |
| print("GATE:", "PASS" if ok else "FAIL") | |
| return 0 if ok else 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |