CausalGrok / code /scripts /decision_at_2pm.sh
nileshsarkar-ai's picture
Upload code/scripts
42c0d23 verified
#!/usr/bin/env bash
# Decision checklist when returning at ~2 PM UTC
# Run this FIRST before anything else
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${ROOT}"
echo "════════════════════════════════════════════════════════════"
echo " OOD Summary β€” Decision Point at 2 PM UTC"
echo "════════════════════════════════════════════════════════════"
echo ""
python3 << 'PYSCRIPT'
import json, glob
rows = []
for f in sorted(glob.glob('experiments/runs/*/results/summary.json')):
try:
s = json.load(open(f))
rows.append((
s.get('run_id','?')[-35:],
s.get('condition','?'),
s.get('n_train',0),
s.get('seed',0),
s.get('best_ood', s.get('ood_test_acc', 0)),
s.get('ood_improvement', 0),
s.get('grokking_epoch', -1),
))
except:
pass
rows.sort(key=lambda x: x[4], reverse=True)
print(f"{'run':<35s} {'cond':<8s} {'n':>4s} {'s':>3s} {'best_ood':>8s} {'impr':>7s} {'grok_ep':<8s}")
print("-" * 90)
for r in rows:
ep_str = str(int(r[6])) if r[6] > 0 else "β€”"
print(f"{r[0]:<35s} {r[1]:<8s} {r[2]:>4d} {r[3]:>3d} {r[4]:>8.3f} {r[5]:>+7.3f} {ep_str:<8s}")
print("")
print("════════════════════════════════════════════════════════════")
print(" Decision Logic:")
print("════════════════════════════════════════════════════════════")
print("")
# Extract key runs
runs_dict = {(r[1], r[2], r[3]): r[4] for r in rows}
s42_grok = runs_dict.get(('grokking', 300, 42), 0)
s123_grok = runs_dict.get(('grokking', 300, 123), 0)
s456_grok = runs_dict.get(('grokking', 300, 456), 0)
print(f"s42 (grokking): best_ood = {s42_grok:.3f}")
print(f"s123 (grokking): best_ood = {s123_grok:.3f}")
print(f"s456 (grokking): best_ood = {s456_grok:.3f}")
print("")
if s123_grok > 0.68 and s456_grok > 0.68:
print("βœ… REPLICATION CONFIRMED: s123 and s456 both > 0.68")
print("")
print(" NEXT STEP:")
print(" 1. Wire IRM fix to causalgrok_camelyon_v2.py (~15 min)")
print(" 2. Fire ablation grid:")
print(" python -m experiments.run_ablations --parallel --n_gpus 16")
print("")
else:
print("⚠️ REPLICATION UNCERTAIN: s123 or s456 < 0.68")
print("")
print(" HOLD: Do not fire ablation grid yet")
print(" NEXT: Characterize seed sensitivity")
print(" - Check if seed is the main variable")
print(" - Run 3 more seeds at n=300 to understand range")
print("")
PYSCRIPT
echo "════════════════════════════════════════════════════════════"