File size: 2,638 Bytes
33569f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""Aggregate per-rank JSONLs into a summary table."""
import argparse
import glob
import json
import os
from collections import defaultdict

import numpy as np


def main():
    p = argparse.ArgumentParser()
    p.add_argument("--out_dir", required=True)
    args = p.parse_args()

    rows = []
    for path in sorted(glob.glob(os.path.join(args.out_dir, "rank_*.jsonl"))):
        with open(path) as f:
            for line in f:
                rows.append(json.loads(line))

    if not rows:
        print(f"NO RESULTS in {args.out_dir}")
        return

    print(f"=== {len(rows)} evaluated videos ===")
    sf = np.array([r["soft_F1"] for r in rows])
    mf = np.array([r["mean_F1_tIoU"] for r in rows])
    hg = np.array([r["hungarian_iou"] for r in rows])
    parse_fail = np.mean([r["parse_failed"] for r in rows])
    print(f"  soft_F1        mean={sf.mean():.3f}  median={np.median(sf):.3f}")
    print(f"  mean_F1_tIoU   mean={mf.mean():.3f}  median={np.median(mf):.3f}")
    print(f"  hungarian_IoU  mean={hg.mean():.3f}  median={np.median(hg):.3f}")
    print(f"  parse_failure_rate: {parse_fail:.2%}")

    # Per-generator
    by_gen = defaultdict(list)
    for r in rows:
        by_gen[r["generator"]].append(r)
    print(f"\n{'gen':<12} {'n':>4} {'soft_F1':>8} {'F1_tIoU':>9} {'hung_IoU':>9} {'parse_fail':>10}")
    for g in sorted(by_gen.keys()):
        rs = by_gen[g]
        sf_g = np.mean([r["soft_F1"] for r in rs])
        mf_g = np.mean([r["mean_F1_tIoU"] for r in rs])
        hg_g = np.mean([r["hungarian_iou"] for r in rs])
        pf_g = np.mean([r["parse_failed"] for r in rs])
        print(f"  {g:<10} {len(rs):>4} {sf_g:>8.3f} {mf_g:>9.3f} {hg_g:>9.3f} {pf_g:>10.2%}")

    summary = {
        "n": len(rows),
        "soft_F1_mean": float(sf.mean()),
        "mean_F1_tIoU_mean": float(mf.mean()),
        "hungarian_IoU_mean": float(hg.mean()),
        "parse_failure_rate": float(parse_fail),
        "per_generator": {
            g: {
                "n": len(by_gen[g]),
                "soft_F1": float(np.mean([r["soft_F1"] for r in by_gen[g]])),
                "mean_F1_tIoU": float(np.mean([r["mean_F1_tIoU"] for r in by_gen[g]])),
                "hungarian_IoU": float(np.mean([r["hungarian_iou"] for r in by_gen[g]])),
                "parse_failure_rate": float(np.mean([r["parse_failed"] for r in by_gen[g]])),
            }
            for g in by_gen
        },
    }
    with open(os.path.join(args.out_dir, "summary.json"), "w") as f:
        json.dump(summary, f, indent=2)
    print(f"\nsaved {os.path.join(args.out_dir, 'summary.json')}")


if __name__ == "__main__":
    main()