| #!/usr/bin/env bash |
| |
| |
|
|
| set -euo pipefail |
|
|
| ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| cd "${ROOT}" |
|
|
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo " OOD Summary β Decision Point at 2 PM UTC" |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo "" |
|
|
| python3 << 'PYSCRIPT' |
| import json, glob |
|
|
| rows = [] |
| for f in sorted(glob.glob('experiments/runs/*/results/summary.json')): |
| try: |
| s = json.load(open(f)) |
| rows.append(( |
| s.get('run_id','?')[-35:], |
| s.get('condition','?'), |
| s.get('n_train',0), |
| s.get('seed',0), |
| s.get('best_ood', s.get('ood_test_acc', 0)), |
| s.get('ood_improvement', 0), |
| s.get('grokking_epoch', -1), |
| )) |
| except: |
| pass |
|
|
| rows.sort(key=lambda x: x[4], reverse=True) |
|
|
| print(f"{'run':<35s} {'cond':<8s} {'n':>4s} {'s':>3s} {'best_ood':>8s} {'impr':>7s} {'grok_ep':<8s}") |
| print("-" * 90) |
|
|
| for r in rows: |
| ep_str = str(int(r[6])) if r[6] > 0 else "β" |
| print(f"{r[0]:<35s} {r[1]:<8s} {r[2]:>4d} {r[3]:>3d} {r[4]:>8.3f} {r[5]:>+7.3f} {ep_str:<8s}") |
|
|
| print("") |
| print("ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ") |
| print(" Decision Logic:") |
| print("ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ") |
| print("") |
|
|
| |
| runs_dict = {(r[1], r[2], r[3]): r[4] for r in rows} |
|
|
| s42_grok = runs_dict.get(('grokking', 300, 42), 0) |
| s123_grok = runs_dict.get(('grokking', 300, 123), 0) |
| s456_grok = runs_dict.get(('grokking', 300, 456), 0) |
|
|
| print(f"s42 (grokking): best_ood = {s42_grok:.3f}") |
| print(f"s123 (grokking): best_ood = {s123_grok:.3f}") |
| print(f"s456 (grokking): best_ood = {s456_grok:.3f}") |
| print("") |
|
|
| if s123_grok > 0.68 and s456_grok > 0.68: |
| print("β
REPLICATION CONFIRMED: s123 and s456 both > 0.68") |
| print("") |
| print(" NEXT STEP:") |
| print(" 1. Wire IRM fix to causalgrok_camelyon_v2.py (~15 min)") |
| print(" 2. Fire ablation grid:") |
| print(" python -m experiments.run_ablations --parallel --n_gpus 16") |
| print("") |
| else: |
| print("β οΈ REPLICATION UNCERTAIN: s123 or s456 < 0.68") |
| print("") |
| print(" HOLD: Do not fire ablation grid yet") |
| print(" NEXT: Characterize seed sensitivity") |
| print(" - Check if seed is the main variable") |
| print(" - Run 3 more seeds at n=300 to understand range") |
| print("") |
|
|
| PYSCRIPT |
|
|
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
|
|