#!/usr/bin/env python3 """Phase 2 gate — falsification test for the provenance engine. Loads the fixture through the loader seam, runs the deterministic engine, and asserts the verified Phase-2 oracle. NO model. Never edit the oracle to match the engine — fix the engine. Run: python3 tools/phase2_gate.py Oracle (verified ground truth): heavy set == {9, 10, 13} heaviest turn == 9, with 71 tools real retry loops == 0 re-read flag == turn 12, basename 'apply.js', count 4 indirect/total ratio in [0.78, 0.86] (~82% agent-driven) Plus: print 5 indirect edges on turn 9, each with flowValue + sourceTool. """ from __future__ import annotations import sys from pathlib import Path REPO = Path(__file__).resolve().parent.parent if str(REPO) not in sys.path: sys.path.insert(0, str(REPO)) from engine.core.analyze import analyze_path # noqa: E402 from engine.core.heavy import heaviest_turn # noqa: E402 from engine.core.loops import detect_all_loops # noqa: E402 from engine.core.rereads import detect_all_rereads # noqa: E402 FIXTURE = REPO / "fixtures" / "sample-session.jsonl" RATIO_LO, RATIO_HI = 0.78, 0.86 def _line(label: str, got, want, ok: bool) -> bool: flag = "OK " if ok else "DIFF" print(f" [{flag}] {label:<26} got={got!r:>20} want={want!r}") return ok def main() -> int: if not FIXTURE.exists(): print(f"FAIL — fixture missing: {FIXTURE}") return 1 result = analyze_path(str(FIXTURE)) turns = result["turns"] session = result["session"] print("Her · हेर — Phase 2 gate (provenance engine)") print("=" * 64) print(f"fixture : {FIXTURE}") print(f"session : cwd={session.get('cwd')!r}") print("-" * 64) checks: list[bool] = [] # 1) heavy set == {9, 10, 13} heavy_set = sorted(t.i for t in turns if t.heavy) checks.append(_line("heavy set", heavy_set, [9, 10, 13], heavy_set == [9, 10, 13])) # 2) heaviest == turn 9 with 71 tools h = heaviest_turn(turns) checks.append(_line("heaviest turn index", h.i, 9, h.i == 9)) checks.append(_line("heaviest turn #tools", len(h.tools), 71, len(h.tools) == 71)) # 3) real loops == 0 (exact-cmd + >=1 error) loops_by_turn = detect_all_loops(turns) real_loops = sum(len(tl.loops) for tl in loops_by_turn.values()) checks.append(_line("real retry loops", real_loops, 0, real_loops == 0)) # 4) re-read flag == turn 12 / apply.js / count 4 rr_by_turn = detect_all_rereads(turns) rr_flag = sorted( (ti, rr.file, rr.count) for ti, rrs in rr_by_turn.items() for rr in rrs ) want_rr = [(12, "apply.js", 4)] checks.append(_line("re-read flag", rr_flag, want_rr, rr_flag == want_rr)) # 5) indirect/total ratio in [0.78, 0.86] total_tools = sum(len(t.tools) for t in turns) total_indirect = sum(t.indirect for t in turns) ratio = total_indirect / total_tools if total_tools else 0.0 ratio_ok = RATIO_LO <= ratio <= RATIO_HI checks.append( _line( "indirect/total ratio", f"{ratio:.4f} ({total_indirect}/{total_tools})", f"[{RATIO_LO}, {RATIO_HI}]", ratio_ok, ) ) # --- spotcheck: 5 indirect edges on turn 9, each with flowValue + sourceTool print("-" * 64) print("turn-9 indirect value-flow edges (spotcheck, 5 shown):") t9 = turns[9] t9_indirect = [tc for tc in t9.tools if tc.provenance == "indirect" and tc.flowValue] # prefer the most distinctive evidence for display: UUID > URL > filename > other import re as _re _UUID = _re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", _re.I) def _rank(tc): v = tc.flowValue or "" if _UUID.match(v): return 0 if v.startswith("http"): return 1 if _re.search(r"\.[a-z]{2,4}$", v): return 2 return 3 # dedup by (flowValue, sourceTool) so 5 DISTINCT edges are shown seen: set = set() deduped = [] for tc in sorted(t9_indirect, key=_rank): key = (tc.flowValue, tc.sourceTool) if key in seen: continue seen.add(key) deduped.append(tc) shown = deduped[:5] cwd = session.get("cwd") or "" for tc in shown: cites_cwd = bool(cwd) and cwd in (tc.flowValue or "") distinctive = bool(_UUID.match(tc.flowValue or "")) or ("/" in (tc.flowValue or "")) or len(tc.flowValue or "") >= 12 print( f" {tc.name:<8} flowValue={tc.flowValue!r:<46} " f"sourceTool={tc.sourceTool!r:<10} distinctive={distinctive} cites_cwd={cites_cwd}" ) checks.append( _line("turn-9 indirect edges (>=5)", len(t9_indirect), ">=5", len(t9_indirect) >= 5) ) print("-" * 64) ok = all(checks) print("GATE:", "PASS" if ok else "FAIL") return 0 if ok else 1 if __name__ == "__main__": sys.exit(main())