| |
| """Aggregate T1 extended benchmark results. |
| Prints a Markdown-style table sorted by F1 desc.""" |
| import os |
| import json |
| import glob |
| import numpy as np |
| from collections import defaultdict |
|
|
| ROOT = '${PULSE_ROOT}/results/t1_extended' |
|
|
|
|
| def collect(pattern): |
| by_key = defaultdict(list) |
| for f in sorted(glob.glob(pattern)): |
| try: |
| r = json.load(open(f)) |
| except Exception as e: |
| print(f" ERR reading {f}: {e}") |
| continue |
| key = r.get('method', os.path.basename(os.path.dirname(f))) |
| |
| tag = r.get('args', {}).get('tag', '') |
| if tag: |
| key = f"{key}_{tag}" |
| by_key[key].append(r) |
| return by_key |
|
|
|
|
| def main(): |
| groups = collect(f'{ROOT}/*/results.json') |
| rows = [] |
| for key, rs in groups.items(): |
| f1s = [r['test_f1'] for r in rs] |
| accs = [r['test_acc'] for r in rs] |
| mods = ','.join(rs[0]['modalities']) |
| rows.append({ |
| 'method': key, |
| 'modalities': mods, |
| 'n_seeds': len(rs), |
| 'f1_mean': np.mean(f1s), |
| 'f1_std': np.std(f1s), |
| 'acc_mean': np.mean(accs), |
| 'acc_std': np.std(accs), |
| 'n_params': rs[0].get('n_params', 0), |
| }) |
| rows.sort(key=lambda r: r['f1_mean'], reverse=True) |
|
|
| print(f"\n{'Method':<28s} {'Modalities':<32s} N {'F1 mean±std':<14s} " |
| f"{'Acc mean±std':<14s} Params") |
| print('-' * 110) |
| for r in rows: |
| print(f"{r['method']:<28s} {r['modalities']:<32s} {r['n_seeds']} " |
| f"{r['f1_mean']:.3f}±{r['f1_std']:.3f} " |
| f"{r['acc_mean']:.3f}±{r['acc_std']:.3f} " |
| f"{r['n_params']:,}") |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|