mcpmark / synth /trace.py
haochengsama's picture
Add files using upload-large-folder tool
97cb846 verified
Raw
History Blame Contribute Delete
4.78 kB
"""Render an eval trajectory (results/.../messages.json) as a readable timeline.
Usage:
python -m synth.trace <path-to-messages.json | path-to-task-result-dir>
python -m synth.trace results/myrun/deepseek-v3-2-instruct__filesystem/run-1/<task>
python -m synth.trace results/myrun --list # list all trajectories under a dir
Shows, in order: the user instruction, each assistant thought (πŸ’¬), each MCP tool
call (πŸ”§ name + args) and its result (πŸ“€), plus a summary footer from meta.json.
"""
import argparse
import json
import sys
from pathlib import Path
def _text_of(msg) -> str:
c = msg.get("content")
if isinstance(c, list):
return "".join(p.get("text", "") for p in c if isinstance(p, dict))
return c or ""
def _short(s, n) -> str:
s = str(s)
return s if len(s) <= n else s[:n] + f"… (+{len(s) - n} chars)"
def _unwrap_output(raw):
"""MCP tool outputs nest a content[].text / text payload as JSON; peel it."""
cur = raw
for _ in range(4): # outputs can be wrapped a couple of layers deep
try:
obj = json.loads(cur)
except Exception:
break
if isinstance(obj, dict) and isinstance(obj.get("content"), list) and obj["content"]:
cur = obj["content"][0].get("text", cur)
elif isinstance(obj, dict) and "text" in obj:
cur = obj["text"]
else:
break
return cur
def _resolve(path: Path) -> Path:
if path.is_dir():
cand = path / "messages.json"
if cand.is_file():
return cand
raise SystemExit(f"no messages.json in {path}")
return path
def render(messages_path: Path, width: int):
data = json.loads(messages_path.read_text(encoding="utf-8"))
print("=" * 72)
print(f"Trajectory: {messages_path.parent.name} ({len(data)} messages)")
print("=" * 72)
n_calls = 0
for i, m in enumerate(data):
t = m.get("type")
if m.get("role") == "user" and t in (None, "message"):
print(f"\n#{i} πŸ‘€ USER (task instruction)")
print(_short(_text_of(m) or m.get("content", ""), max(width, 600)))
elif t == "message":
txt = _text_of(m).strip()
if txt:
print(f"\n#{i} πŸ’¬ ASSISTANT")
print(_short(txt, width))
elif t == "function_call":
n_calls += 1
try:
args = json.loads(m.get("arguments") or "{}")
args = {k: _short(v, 70) for k, v in args.items()}
except Exception:
args = m.get("arguments", "")
print(f"\n#{i} πŸ”§ TOOL CALL β†’ {m.get('name')}")
print(f" args: {args}")
elif t == "function_call_output":
out = _unwrap_output(m.get("output", ""))
print(f"#{i} πŸ“€ OUTPUT: {_short(out, 240)}")
# footer from meta.json if present
meta = messages_path.parent / "meta.json"
if meta.is_file():
md = json.loads(meta.read_text(encoding="utf-8"))
r = md.get("execution_result", {})
ok = r.get("success") if isinstance(r, dict) else r
tok = md.get("token_usage", {})
print("\n" + "-" * 72)
print(f"result: {'βœ… PASSED' if ok else 'βœ— FAILED'} | "
f"turns={md.get('turn_count')} | "
f"tokens={tok.get('total_tokens')} | "
f"tool_calls={n_calls} | "
f"time={md.get('agent_execution_time', 0):.1f}s")
if isinstance(r, dict) and r.get("verification_output"):
print("verify:", r["verification_output"].strip().replace("\n", " | "))
def list_trajectories(root: Path):
found = sorted(root.rglob("messages.json"))
if not found:
raise SystemExit(f"no messages.json under {root}")
for p in found:
md = p.parent / "meta.json"
tag = ""
if md.is_file():
r = json.loads(md.read_text(encoding="utf-8")).get("execution_result", {})
ok = r.get("success") if isinstance(r, dict) else r
tag = "βœ…" if ok else "βœ— "
print(f" {tag} {p.parent.relative_to(root)}")
def main():
ap = argparse.ArgumentParser(description="Render an eval trajectory.")
ap.add_argument("path", help="messages.json, a task result dir, or a results dir")
ap.add_argument("--list", action="store_true", help="list all trajectories under path")
ap.add_argument("--width", type=int, default=600, help="max chars per thought block")
args = ap.parse_args()
path = Path(args.path)
if not path.exists():
raise SystemExit(f"path not found: {path}")
if args.list:
list_trajectories(path)
else:
render(_resolve(path), args.width)
if __name__ == "__main__":
main()