File size: 3,154 Bytes
42c0d23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env bash
# Decision checklist when returning at ~2 PM UTC
# Run this FIRST before anything else

set -euo pipefail

ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${ROOT}"

echo "════════════════════════════════════════════════════════════"
echo "  OOD Summary β€” Decision Point at 2 PM UTC"
echo "════════════════════════════════════════════════════════════"
echo ""

python3 << 'PYSCRIPT'
import json, glob

rows = []
for f in sorted(glob.glob('experiments/runs/*/results/summary.json')):
    try:
        s = json.load(open(f))
        rows.append((
            s.get('run_id','?')[-35:],
            s.get('condition','?'),
            s.get('n_train',0),
            s.get('seed',0),
            s.get('best_ood', s.get('ood_test_acc', 0)),
            s.get('ood_improvement', 0),
            s.get('grokking_epoch', -1),
        ))
    except:
        pass

rows.sort(key=lambda x: x[4], reverse=True)

print(f"{'run':<35s} {'cond':<8s} {'n':>4s} {'s':>3s}  {'best_ood':>8s}  {'impr':>7s}  {'grok_ep':<8s}")
print("-" * 90)

for r in rows:
    ep_str = str(int(r[6])) if r[6] > 0 else "β€”"
    print(f"{r[0]:<35s} {r[1]:<8s} {r[2]:>4d} {r[3]:>3d}  {r[4]:>8.3f}  {r[5]:>+7.3f}  {ep_str:<8s}")

print("")
print("════════════════════════════════════════════════════════════")
print("  Decision Logic:")
print("════════════════════════════════════════════════════════════")
print("")

# Extract key runs
runs_dict = {(r[1], r[2], r[3]): r[4] for r in rows}

s42_grok = runs_dict.get(('grokking', 300, 42), 0)
s123_grok = runs_dict.get(('grokking', 300, 123), 0)
s456_grok = runs_dict.get(('grokking', 300, 456), 0)

print(f"s42 (grokking):   best_ood = {s42_grok:.3f}")
print(f"s123 (grokking):  best_ood = {s123_grok:.3f}")
print(f"s456 (grokking):  best_ood = {s456_grok:.3f}")
print("")

if s123_grok > 0.68 and s456_grok > 0.68:
    print("βœ… REPLICATION CONFIRMED: s123 and s456 both > 0.68")
    print("")
    print("   NEXT STEP:")
    print("   1. Wire IRM fix to causalgrok_camelyon_v2.py (~15 min)")
    print("   2. Fire ablation grid:")
    print("      python -m experiments.run_ablations --parallel --n_gpus 16")
    print("")
else:
    print("⚠️  REPLICATION UNCERTAIN: s123 or s456 < 0.68")
    print("")
    print("   HOLD: Do not fire ablation grid yet")
    print("   NEXT: Characterize seed sensitivity")
    print("   - Check if seed is the main variable")
    print("   - Run 3 more seeds at n=300 to understand range")
    print("")

PYSCRIPT

echo "════════════════════════════════════════════════════════════"