File size: 1,365 Bytes
9477b5c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | #!/usr/bin/env bash
# Launch the BLT pilot AND chain the pre-registered z-ablation eval on the
# final checkpoint. The eval (eval.py) is the *success criterion*; per the
# pre-registration in README.md, we look at Δ_random and Δ_zero BEFORE
# looking at raw GSM8K accuracy.
set -euo pipefail
cd /home/ubuntu
export TOKENIZERS_PARALLELISM=false
export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
export HF_HUB_DISABLE_PROGRESS_BARS=1
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
OUT=/home/ubuntu/work/blt_pilot1
CFG=/home/ubuntu/experiments/blt_reasoner/configs/pilot_qwen15b_gsm8k.json
mkdir -p "$OUT"
LOG="$OUT/run.log"
EVAL_LOG="$OUT/eval.log"
# 1) Train (foreground inside nohup); only proceed if exit=0
nohup bash -c "
set -e
python3 -u -m experiments.blt_reasoner.train --config $CFG \\
>> $LOG 2>&1
echo '[wrapper] training exited 0 — running z-ablation eval' >> $LOG
# 2) z-ablation eval on the final ckpt (pre-registered success criterion)
python3 -u -m experiments.blt_reasoner.eval \\
--ckpt $OUT/final \\
--config $CFG \\
--n 200 \\
--out $OUT/final/ablation_n200.json \\
>> $EVAL_LOG 2>&1
echo '[wrapper] eval exited 0; ablation written to $OUT/final/ablation_n200.json' >> $LOG
" &
PID=$!
echo "$PID" > "$OUT/run.pid"
echo "Launched BLT pilot+eval wrapper pid=$PID log=$LOG eval_log=$EVAL_LOG"
|