| # Launch the BLT pilot AND chain the pre-registered z-ablation eval on the | |
| # final checkpoint. The eval (eval.py) is the *success criterion*; per the | |
| # pre-registration in README.md, we look at Δ_random and Δ_zero BEFORE | |
| # looking at raw GSM8K accuracy. | |
| set -euo pipefail | |
| cd /home/ubuntu | |
| export TOKENIZERS_PARALLELISM=false | |
| export TRANSFORMERS_NO_ADVISORY_WARNINGS=1 | |
| export HF_HUB_DISABLE_PROGRESS_BARS=1 | |
| export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True | |
| OUT=/home/ubuntu/work/blt_pilot1 | |
| CFG=/home/ubuntu/experiments/blt_reasoner/configs/pilot_qwen15b_gsm8k.json | |
| mkdir -p "$OUT" | |
| LOG="$OUT/run.log" | |
| EVAL_LOG="$OUT/eval.log" | |
| # 1) Train (foreground inside nohup); only proceed if exit=0 | |
| nohup bash -c " | |
| set -e | |
| python3 -u -m experiments.blt_reasoner.train --config $CFG \\ | |
| >> $LOG 2>&1 | |
| echo '[wrapper] training exited 0 — running z-ablation eval' >> $LOG | |
| # 2) z-ablation eval on the final ckpt (pre-registered success criterion) | |
| python3 -u -m experiments.blt_reasoner.eval \\ | |
| --ckpt $OUT/final \\ | |
| --config $CFG \\ | |
| --n 200 \\ | |
| --out $OUT/final/ablation_n200.json \\ | |
| >> $EVAL_LOG 2>&1 | |
| echo '[wrapper] eval exited 0; ablation written to $OUT/final/ablation_n200.json' >> $LOG | |
| " & | |
| PID=$! | |
| echo "$PID" > "$OUT/run.pid" | |
| echo "Launched BLT pilot+eval wrapper pid=$PID log=$LOG eval_log=$EVAL_LOG" | |