blt-reasoner-pilot1 / code /scripts /run_pilot_with_eval.sh
LauraGG's picture
BLT-Reasoner pilot 1: ckpts + code + logs + ablations
9477b5c verified
#!/usr/bin/env bash
# Launch the BLT pilot AND chain the pre-registered z-ablation eval on the
# final checkpoint. The eval (eval.py) is the *success criterion*; per the
# pre-registration in README.md, we look at Δ_random and Δ_zero BEFORE
# looking at raw GSM8K accuracy.
set -euo pipefail
cd /home/ubuntu
export TOKENIZERS_PARALLELISM=false
export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
export HF_HUB_DISABLE_PROGRESS_BARS=1
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
OUT=/home/ubuntu/work/blt_pilot1
CFG=/home/ubuntu/experiments/blt_reasoner/configs/pilot_qwen15b_gsm8k.json
mkdir -p "$OUT"
LOG="$OUT/run.log"
EVAL_LOG="$OUT/eval.log"
# 1) Train (foreground inside nohup); only proceed if exit=0
nohup bash -c "
set -e
python3 -u -m experiments.blt_reasoner.train --config $CFG \\
>> $LOG 2>&1
echo '[wrapper] training exited 0 — running z-ablation eval' >> $LOG
# 2) z-ablation eval on the final ckpt (pre-registered success criterion)
python3 -u -m experiments.blt_reasoner.eval \\
--ckpt $OUT/final \\
--config $CFG \\
--n 200 \\
--out $OUT/final/ablation_n200.json \\
>> $EVAL_LOG 2>&1
echo '[wrapper] eval exited 0; ablation written to $OUT/final/ablation_n200.json' >> $LOG
" &
PID=$!
echo "$PID" > "$OUT/run.pid"
echo "Launched BLT pilot+eval wrapper pid=$PID log=$LOG eval_log=$EVAL_LOG"