#!/usr/bin/env python3 """Aggregate T1 extended benchmark results. Prints a Markdown-style table sorted by F1 desc.""" import os import json import glob import numpy as np from collections import defaultdict ROOT = '${PULSE_ROOT}/results/t1_extended' def collect(pattern): by_key = defaultdict(list) for f in sorted(glob.glob(pattern)): try: r = json.load(open(f)) except Exception as e: print(f" ERR reading {f}: {e}") continue key = r.get('method', os.path.basename(os.path.dirname(f))) # Distinguish ablations by tag tag = r.get('args', {}).get('tag', '') if tag: key = f"{key}_{tag}" by_key[key].append(r) return by_key def main(): groups = collect(f'{ROOT}/*/results.json') rows = [] for key, rs in groups.items(): f1s = [r['test_f1'] for r in rs] accs = [r['test_acc'] for r in rs] mods = ','.join(rs[0]['modalities']) rows.append({ 'method': key, 'modalities': mods, 'n_seeds': len(rs), 'f1_mean': np.mean(f1s), 'f1_std': np.std(f1s), 'acc_mean': np.mean(accs), 'acc_std': np.std(accs), 'n_params': rs[0].get('n_params', 0), }) rows.sort(key=lambda r: r['f1_mean'], reverse=True) print(f"\n{'Method':<28s} {'Modalities':<32s} N {'F1 mean±std':<14s} " f"{'Acc mean±std':<14s} Params") print('-' * 110) for r in rows: print(f"{r['method']:<28s} {r['modalities']:<32s} {r['n_seeds']} " f"{r['f1_mean']:.3f}±{r['f1_std']:.3f} " f"{r['acc_mean']:.3f}±{r['acc_std']:.3f} " f"{r['n_params']:,}") if __name__ == '__main__': main()