File size: 4,731 Bytes
69ba747
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import difflib
from typing import Any, Dict, List, Optional, Tuple

from .bundle import Bundle, load_bundle


def _normalize_for_compare(x: Any) -> Any:
    # Avoid false diffs from ordering
    if isinstance(x, dict):
        return {k: _normalize_for_compare(x[k]) for k in sorted(x.keys())}
    if isinstance(x, list):
        return [_normalize_for_compare(v) for v in x]
    return x


def _json_diff(a: Any, b: Any, path: str = "") -> List[Dict[str, Any]]:
    """
    Small recursive diff (no heavy deps).
    Emits list of {path, a, b, kind}.
    """
    diffs: List[Dict[str, Any]] = []

    if type(a) != type(b):
        diffs.append({"path": path or "$", "kind": "type", "a": str(type(a)), "b": str(type(b))})
        return diffs

    if isinstance(a, dict):
        akeys = set(a.keys())
        bkeys = set(b.keys())
        for k in sorted(akeys - bkeys):
            diffs.append({"path": f"{path}.{k}" if path else k, "kind": "removed", "a": a[k], "b": None})
        for k in sorted(bkeys - akeys):
            diffs.append({"path": f"{path}.{k}" if path else k, "kind": "added", "a": None, "b": b[k]})
        for k in sorted(akeys & bkeys):
            diffs.extend(_json_diff(a[k], b[k], f"{path}.{k}" if path else k))
        return diffs

    if isinstance(a, list):
        # list diff by index (simple)
        n = max(len(a), len(b))
        for i in range(n):
            pa = a[i] if i < len(a) else None
            pb = b[i] if i < len(b) else None
            if i >= len(a):
                diffs.append({"path": f"{path}[{i}]", "kind": "added", "a": None, "b": pb})
            elif i >= len(b):
                diffs.append({"path": f"{path}[{i}]", "kind": "removed", "a": pa, "b": None})
            else:
                diffs.extend(_json_diff(pa, pb, f"{path}[{i}]"))
        return diffs

    if a != b:
        diffs.append({"path": path or "$", "kind": "value", "a": a, "b": b})
    return diffs


def _classify_divergence(ev_a: Dict[str, Any], ev_b: Dict[str, Any]) -> str:
    ka = ev_a.get("kind")
    kb = ev_b.get("kind")
    if ka != kb:
        return "control-flow"
    if ka in ("tool_call", "tool_result"):
        return "tool"
    if ka in ("memory_write", "memory_read"):
        return "memory"
    if ka in ("llm_sample", "llm_call"):
        return "sampling"
    if ka in ("guardrail",):
        return "governance"
    return "state"


def _text_delta(a: str, b: str) -> str:
    a_lines = a.splitlines()
    b_lines = b.splitlines()
    diff = difflib.unified_diff(a_lines, b_lines, fromfile="A", tofile="B", lineterm="")
    return "\n".join(diff)


def diff_bundles(zip_a: str, zip_b: str) -> Dict[str, Any]:
    A = load_bundle(zip_a)
    B = load_bundle(zip_b)

    ea = A.events
    eb = B.events
    n = min(len(ea), len(eb))

    first_div: Optional[int] = None
    per_event: List[Dict[str, Any]] = []

    for i in range(n):
        na = _normalize_for_compare({k: ea[i].get(k) for k in ("kind", "step", "payload")})
        nb = _normalize_for_compare({k: eb[i].get(k) for k in ("kind", "step", "payload")})
        if na != nb and first_div is None:
            first_div = i

        if na != nb:
            diffs = _json_diff(na, nb)
            item = {
                "i": i,
                "step_a": ea[i].get("step"),
                "step_b": eb[i].get("step"),
                "kind_a": ea[i].get("kind"),
                "kind_b": eb[i].get("kind"),
                "class": _classify_divergence(ea[i], eb[i]),
                "diffs": diffs[:200],  # cap
            }

            # Optional friendly text diff if payload has 'text'
            ta = ea[i].get("payload", {}).get("text")
            tb = eb[i].get("payload", {}).get("text")
            if isinstance(ta, str) and isinstance(tb, str) and ta != tb:
                item["text_unified_diff"] = _text_delta(ta, tb)[:20000]

            per_event.append(item)

    # length mismatch
    if len(ea) != len(eb):
        first_div = first_div if first_div is not None else n

    summary = {
        "run_a": A.manifest.get("run_id"),
        "run_b": B.manifest.get("run_id"),
        "framework_a": A.manifest.get("framework"),
        "framework_b": B.manifest.get("framework"),
        "model_a": A.manifest.get("model_id"),
        "model_b": B.manifest.get("model_id"),
        "events_a": len(ea),
        "events_b": len(eb),
        "first_divergence_index": first_div,
    }

    # simple counts by class
    counts: Dict[str, int] = {}
    for item in per_event:
        counts[item["class"]] = counts.get(item["class"], 0) + 1

    out = {
        "summary": summary,
        "class_counts": counts,
        "differences": per_event[:400],  # cap for UI
    }
    return out