BLT-Reasoner pilot 1: ckpts + code + logs + ablations

9477b5c verified 11 days ago

1.37 kB

	#!/usr/bin/env bash
	# Launch the BLT pilot AND chain the pre-registered z-ablation eval on the
	# final checkpoint. The eval (eval.py) is the success criterion; per the
	# pre-registration in README.md, we look at Δ_random and Δ_zero BEFORE
	# looking at raw GSM8K accuracy.
	set -euo pipefail

	cd /home/ubuntu
	export TOKENIZERS_PARALLELISM=false
	export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
	export HF_HUB_DISABLE_PROGRESS_BARS=1
	export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

	OUT=/home/ubuntu/work/blt_pilot1
	CFG=/home/ubuntu/experiments/blt_reasoner/configs/pilot_qwen15b_gsm8k.json
	mkdir -p "$OUT"

	LOG="$OUT/run.log"
	EVAL_LOG="$OUT/eval.log"

	# 1) Train (foreground inside nohup); only proceed if exit=0
	nohup bash -c "
	set -e
	python3 -u -m experiments.blt_reasoner.train --config $CFG \\
	>> $LOG 2>&1

	echo '[wrapper] training exited 0 — running z-ablation eval' >> $LOG
	# 2) z-ablation eval on the final ckpt (pre-registered success criterion)
	python3 -u -m experiments.blt_reasoner.eval \\
	--ckpt $OUT/final \\
	--config $CFG \\
	--n 200 \\
	--out $OUT/final/ablation_n200.json \\
	>> $EVAL_LOG 2>&1
	echo '[wrapper] eval exited 0; ablation written to $OUT/final/ablation_n200.json' >> $LOG
	" &
	PID=$!
	echo "$PID" > "$OUT/run.pid"
	echo "Launched BLT pilot+eval wrapper pid=$PID log=$LOG eval_log=$EVAL_LOG"