| """Aggregate per-rank JSONLs into a summary table.""" |
| import argparse |
| import glob |
| import json |
| import os |
| from collections import defaultdict |
|
|
| import numpy as np |
|
|
|
|
| def main(): |
| p = argparse.ArgumentParser() |
| p.add_argument("--out_dir", required=True) |
| args = p.parse_args() |
|
|
| rows = [] |
| for path in sorted(glob.glob(os.path.join(args.out_dir, "rank_*.jsonl"))): |
| with open(path) as f: |
| for line in f: |
| rows.append(json.loads(line)) |
|
|
| if not rows: |
| print(f"NO RESULTS in {args.out_dir}") |
| return |
|
|
| print(f"=== {len(rows)} evaluated videos ===") |
| sf = np.array([r["soft_F1"] for r in rows]) |
| mf = np.array([r["mean_F1_tIoU"] for r in rows]) |
| hg = np.array([r["hungarian_iou"] for r in rows]) |
| parse_fail = np.mean([r["parse_failed"] for r in rows]) |
| print(f" soft_F1 mean={sf.mean():.3f} median={np.median(sf):.3f}") |
| print(f" mean_F1_tIoU mean={mf.mean():.3f} median={np.median(mf):.3f}") |
| print(f" hungarian_IoU mean={hg.mean():.3f} median={np.median(hg):.3f}") |
| print(f" parse_failure_rate: {parse_fail:.2%}") |
|
|
| |
| by_gen = defaultdict(list) |
| for r in rows: |
| by_gen[r["generator"]].append(r) |
| print(f"\n{'gen':<12} {'n':>4} {'soft_F1':>8} {'F1_tIoU':>9} {'hung_IoU':>9} {'parse_fail':>10}") |
| for g in sorted(by_gen.keys()): |
| rs = by_gen[g] |
| sf_g = np.mean([r["soft_F1"] for r in rs]) |
| mf_g = np.mean([r["mean_F1_tIoU"] for r in rs]) |
| hg_g = np.mean([r["hungarian_iou"] for r in rs]) |
| pf_g = np.mean([r["parse_failed"] for r in rs]) |
| print(f" {g:<10} {len(rs):>4} {sf_g:>8.3f} {mf_g:>9.3f} {hg_g:>9.3f} {pf_g:>10.2%}") |
|
|
| summary = { |
| "n": len(rows), |
| "soft_F1_mean": float(sf.mean()), |
| "mean_F1_tIoU_mean": float(mf.mean()), |
| "hungarian_IoU_mean": float(hg.mean()), |
| "parse_failure_rate": float(parse_fail), |
| "per_generator": { |
| g: { |
| "n": len(by_gen[g]), |
| "soft_F1": float(np.mean([r["soft_F1"] for r in by_gen[g]])), |
| "mean_F1_tIoU": float(np.mean([r["mean_F1_tIoU"] for r in by_gen[g]])), |
| "hungarian_IoU": float(np.mean([r["hungarian_iou"] for r in by_gen[g]])), |
| "parse_failure_rate": float(np.mean([r["parse_failed"] for r in by_gen[g]])), |
| } |
| for g in by_gen |
| }, |
| } |
| with open(os.path.join(args.out_dir, "summary.json"), "w") as f: |
| json.dump(summary, f, indent=2) |
| print(f"\nsaved {os.path.join(args.out_dir, 'summary.json')}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|