#!/usr/bin/env bash # Chain: GRPO train → z-ablation eval on final → push to HF under grpo/ # # Run as: # BLT_HF_TOKEN= nohup bash queue_grpo.sh > /home/ubuntu/work/queue_grpo.log 2>&1 & set -uo pipefail REPO="LauraGG/blt-reasoner-pilot1" OUT="/home/ubuntu/work/blt_grpo1" CFG="/home/ubuntu/experiments/blt_reasoner/configs/grpo_from_step12000.json" LOG="/home/ubuntu/work/queue_grpo.log" log() { echo "[$(date +%T)] $*" | tee -a "$LOG"; } mkdir -p "$OUT" cd /home/ubuntu export TOKENIZERS_PARALLELISM=false TRANSFORMERS_NO_ADVISORY_WARNINGS=1 HF_HUB_DISABLE_PROGRESS_BARS=1 export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True log "==========================================" log "GRPO PHASE C: train from pilot ckpt-step12000" log "==========================================" python3 -u -m experiments.blt_reasoner.grpo_train --config "$CFG" \ > "$OUT/train.log" 2>&1 rc=$? log "GRPO train exit=$rc" if [ $rc -ne 0 ]; then log "GRPO train FAILED; pushing partial state and exiting" fi # Always try to push whatever we have log "running n=100 K=16 z-ablation on GRPO final ckpt" python3 -u -m experiments.blt_reasoner.eval \ --ckpt "$OUT/final" --config "$CFG" \ --n 100 --K 16 --max_new_tokens 192 --temperature 0.0 \ --out "$OUT/final/ablation_K16_n100.json" \ > "$OUT/eval.log" 2>&1 log "eval exit=$?" log "pushing to HF under grpo/" python3 - <