| """Render an eval trajectory (results/.../messages.json) as a readable timeline. |
| |
| Usage: |
| python -m synth.trace <path-to-messages.json | path-to-task-result-dir> |
| python -m synth.trace results/myrun/deepseek-v3-2-instruct__filesystem/run-1/<task> |
| python -m synth.trace results/myrun --list # list all trajectories under a dir |
| |
| Shows, in order: the user instruction, each assistant thought (π¬), each MCP tool |
| call (π§ name + args) and its result (π€), plus a summary footer from meta.json. |
| """ |
|
|
| import argparse |
| import json |
| import sys |
| from pathlib import Path |
|
|
|
|
| def _text_of(msg) -> str: |
| c = msg.get("content") |
| if isinstance(c, list): |
| return "".join(p.get("text", "") for p in c if isinstance(p, dict)) |
| return c or "" |
|
|
|
|
| def _short(s, n) -> str: |
| s = str(s) |
| return s if len(s) <= n else s[:n] + f"β¦ (+{len(s) - n} chars)" |
|
|
|
|
| def _unwrap_output(raw): |
| """MCP tool outputs nest a content[].text / text payload as JSON; peel it.""" |
| cur = raw |
| for _ in range(4): |
| try: |
| obj = json.loads(cur) |
| except Exception: |
| break |
| if isinstance(obj, dict) and isinstance(obj.get("content"), list) and obj["content"]: |
| cur = obj["content"][0].get("text", cur) |
| elif isinstance(obj, dict) and "text" in obj: |
| cur = obj["text"] |
| else: |
| break |
| return cur |
|
|
|
|
| def _resolve(path: Path) -> Path: |
| if path.is_dir(): |
| cand = path / "messages.json" |
| if cand.is_file(): |
| return cand |
| raise SystemExit(f"no messages.json in {path}") |
| return path |
|
|
|
|
| def render(messages_path: Path, width: int): |
| data = json.loads(messages_path.read_text(encoding="utf-8")) |
| print("=" * 72) |
| print(f"Trajectory: {messages_path.parent.name} ({len(data)} messages)") |
| print("=" * 72) |
|
|
| n_calls = 0 |
| for i, m in enumerate(data): |
| t = m.get("type") |
| if m.get("role") == "user" and t in (None, "message"): |
| print(f"\n#{i} π€ USER (task instruction)") |
| print(_short(_text_of(m) or m.get("content", ""), max(width, 600))) |
| elif t == "message": |
| txt = _text_of(m).strip() |
| if txt: |
| print(f"\n#{i} π¬ ASSISTANT") |
| print(_short(txt, width)) |
| elif t == "function_call": |
| n_calls += 1 |
| try: |
| args = json.loads(m.get("arguments") or "{}") |
| args = {k: _short(v, 70) for k, v in args.items()} |
| except Exception: |
| args = m.get("arguments", "") |
| print(f"\n#{i} π§ TOOL CALL β {m.get('name')}") |
| print(f" args: {args}") |
| elif t == "function_call_output": |
| out = _unwrap_output(m.get("output", "")) |
| print(f"#{i} π€ OUTPUT: {_short(out, 240)}") |
|
|
| |
| meta = messages_path.parent / "meta.json" |
| if meta.is_file(): |
| md = json.loads(meta.read_text(encoding="utf-8")) |
| r = md.get("execution_result", {}) |
| ok = r.get("success") if isinstance(r, dict) else r |
| tok = md.get("token_usage", {}) |
| print("\n" + "-" * 72) |
| print(f"result: {'β
PASSED' if ok else 'β FAILED'} | " |
| f"turns={md.get('turn_count')} | " |
| f"tokens={tok.get('total_tokens')} | " |
| f"tool_calls={n_calls} | " |
| f"time={md.get('agent_execution_time', 0):.1f}s") |
| if isinstance(r, dict) and r.get("verification_output"): |
| print("verify:", r["verification_output"].strip().replace("\n", " | ")) |
|
|
|
|
| def list_trajectories(root: Path): |
| found = sorted(root.rglob("messages.json")) |
| if not found: |
| raise SystemExit(f"no messages.json under {root}") |
| for p in found: |
| md = p.parent / "meta.json" |
| tag = "" |
| if md.is_file(): |
| r = json.loads(md.read_text(encoding="utf-8")).get("execution_result", {}) |
| ok = r.get("success") if isinstance(r, dict) else r |
| tag = "β
" if ok else "β " |
| print(f" {tag} {p.parent.relative_to(root)}") |
|
|
|
|
| def main(): |
| ap = argparse.ArgumentParser(description="Render an eval trajectory.") |
| ap.add_argument("path", help="messages.json, a task result dir, or a results dir") |
| ap.add_argument("--list", action="store_true", help="list all trajectories under path") |
| ap.add_argument("--width", type=int, default=600, help="max chars per thought block") |
| args = ap.parse_args() |
|
|
| path = Path(args.path) |
| if not path.exists(): |
| raise SystemExit(f"path not found: {path}") |
| if args.list: |
| list_trajectories(path) |
| else: |
| render(_resolve(path), args.width) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|