RFTSystems commited on
Commit
69ba747
·
verified ·
1 Parent(s): bc1bcc0

Create drp/diff.py

Browse files
Files changed (1) hide show
  1. drp/diff.py +143 -0
drp/diff.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import difflib
2
+ from typing import Any, Dict, List, Optional, Tuple
3
+
4
+ from .bundle import Bundle, load_bundle
5
+
6
+
7
+ def _normalize_for_compare(x: Any) -> Any:
8
+ # Avoid false diffs from ordering
9
+ if isinstance(x, dict):
10
+ return {k: _normalize_for_compare(x[k]) for k in sorted(x.keys())}
11
+ if isinstance(x, list):
12
+ return [_normalize_for_compare(v) for v in x]
13
+ return x
14
+
15
+
16
+ def _json_diff(a: Any, b: Any, path: str = "") -> List[Dict[str, Any]]:
17
+ """
18
+ Small recursive diff (no heavy deps).
19
+ Emits list of {path, a, b, kind}.
20
+ """
21
+ diffs: List[Dict[str, Any]] = []
22
+
23
+ if type(a) != type(b):
24
+ diffs.append({"path": path or "$", "kind": "type", "a": str(type(a)), "b": str(type(b))})
25
+ return diffs
26
+
27
+ if isinstance(a, dict):
28
+ akeys = set(a.keys())
29
+ bkeys = set(b.keys())
30
+ for k in sorted(akeys - bkeys):
31
+ diffs.append({"path": f"{path}.{k}" if path else k, "kind": "removed", "a": a[k], "b": None})
32
+ for k in sorted(bkeys - akeys):
33
+ diffs.append({"path": f"{path}.{k}" if path else k, "kind": "added", "a": None, "b": b[k]})
34
+ for k in sorted(akeys & bkeys):
35
+ diffs.extend(_json_diff(a[k], b[k], f"{path}.{k}" if path else k))
36
+ return diffs
37
+
38
+ if isinstance(a, list):
39
+ # list diff by index (simple)
40
+ n = max(len(a), len(b))
41
+ for i in range(n):
42
+ pa = a[i] if i < len(a) else None
43
+ pb = b[i] if i < len(b) else None
44
+ if i >= len(a):
45
+ diffs.append({"path": f"{path}[{i}]", "kind": "added", "a": None, "b": pb})
46
+ elif i >= len(b):
47
+ diffs.append({"path": f"{path}[{i}]", "kind": "removed", "a": pa, "b": None})
48
+ else:
49
+ diffs.extend(_json_diff(pa, pb, f"{path}[{i}]"))
50
+ return diffs
51
+
52
+ if a != b:
53
+ diffs.append({"path": path or "$", "kind": "value", "a": a, "b": b})
54
+ return diffs
55
+
56
+
57
+ def _classify_divergence(ev_a: Dict[str, Any], ev_b: Dict[str, Any]) -> str:
58
+ ka = ev_a.get("kind")
59
+ kb = ev_b.get("kind")
60
+ if ka != kb:
61
+ return "control-flow"
62
+ if ka in ("tool_call", "tool_result"):
63
+ return "tool"
64
+ if ka in ("memory_write", "memory_read"):
65
+ return "memory"
66
+ if ka in ("llm_sample", "llm_call"):
67
+ return "sampling"
68
+ if ka in ("guardrail",):
69
+ return "governance"
70
+ return "state"
71
+
72
+
73
+ def _text_delta(a: str, b: str) -> str:
74
+ a_lines = a.splitlines()
75
+ b_lines = b.splitlines()
76
+ diff = difflib.unified_diff(a_lines, b_lines, fromfile="A", tofile="B", lineterm="")
77
+ return "\n".join(diff)
78
+
79
+
80
+ def diff_bundles(zip_a: str, zip_b: str) -> Dict[str, Any]:
81
+ A = load_bundle(zip_a)
82
+ B = load_bundle(zip_b)
83
+
84
+ ea = A.events
85
+ eb = B.events
86
+ n = min(len(ea), len(eb))
87
+
88
+ first_div: Optional[int] = None
89
+ per_event: List[Dict[str, Any]] = []
90
+
91
+ for i in range(n):
92
+ na = _normalize_for_compare({k: ea[i].get(k) for k in ("kind", "step", "payload")})
93
+ nb = _normalize_for_compare({k: eb[i].get(k) for k in ("kind", "step", "payload")})
94
+ if na != nb and first_div is None:
95
+ first_div = i
96
+
97
+ if na != nb:
98
+ diffs = _json_diff(na, nb)
99
+ item = {
100
+ "i": i,
101
+ "step_a": ea[i].get("step"),
102
+ "step_b": eb[i].get("step"),
103
+ "kind_a": ea[i].get("kind"),
104
+ "kind_b": eb[i].get("kind"),
105
+ "class": _classify_divergence(ea[i], eb[i]),
106
+ "diffs": diffs[:200], # cap
107
+ }
108
+
109
+ # Optional friendly text diff if payload has 'text'
110
+ ta = ea[i].get("payload", {}).get("text")
111
+ tb = eb[i].get("payload", {}).get("text")
112
+ if isinstance(ta, str) and isinstance(tb, str) and ta != tb:
113
+ item["text_unified_diff"] = _text_delta(ta, tb)[:20000]
114
+
115
+ per_event.append(item)
116
+
117
+ # length mismatch
118
+ if len(ea) != len(eb):
119
+ first_div = first_div if first_div is not None else n
120
+
121
+ summary = {
122
+ "run_a": A.manifest.get("run_id"),
123
+ "run_b": B.manifest.get("run_id"),
124
+ "framework_a": A.manifest.get("framework"),
125
+ "framework_b": B.manifest.get("framework"),
126
+ "model_a": A.manifest.get("model_id"),
127
+ "model_b": B.manifest.get("model_id"),
128
+ "events_a": len(ea),
129
+ "events_b": len(eb),
130
+ "first_divergence_index": first_div,
131
+ }
132
+
133
+ # simple counts by class
134
+ counts: Dict[str, int] = {}
135
+ for item in per_event:
136
+ counts[item["class"]] = counts.get(item["class"], 0) + 1
137
+
138
+ out = {
139
+ "summary": summary,
140
+ "class_counts": counts,
141
+ "differences": per_event[:400], # cap for UI
142
+ }
143
+ return out