#!/usr/bin/env bash # Chain: GRPO from Options 1+3 final ckpt → TF + AR ablations → push to HF under grpo_opt13/ set -uo pipefail REPO="LauraGG/blt-reasoner-pilot1" OUT="/home/ubuntu/work/blt_grpo_opt13" CFG="/home/ubuntu/experiments/blt_reasoner/configs/grpo_from_opt13.json" LOG="/home/ubuntu/work/queue_grpo_opt13.log" log() { echo "[$(date +%T)] $*" | tee -a "$LOG"; } mkdir -p "$OUT" cd /home/ubuntu export TOKENIZERS_PARALLELISM=false TRANSFORMERS_NO_ADVISORY_WARNINGS=1 HF_HUB_DISABLE_PROGRESS_BARS=1 export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True log "===========================================" log "GRPO Phase C from BLT 7B Options 1+3 ckpt (51% AR baseline)" log "===========================================" python3 -u -m experiments.blt_reasoner.grpo_train --config "$CFG" \ > "$OUT/train.log" 2>&1 log "grpo train exit=$?" log "TF ablation on grpo final" python3 -u -m experiments.blt_reasoner.scripts.ablate_teacher_forced \ --ckpt "$OUT/final" --config "$CFG" --n 200 --K 16 \ --out "$OUT/final/ablation_teacher_forced.json" \ > "$OUT/tf_eval.log" 2>&1 log "TF ablate exit=$?" log "AR ablation on grpo final" python3 -u -m experiments.blt_reasoner.eval \ --ckpt "$OUT/final" --config "$CFG" --n 200 --K 16 \ --max_new_tokens 192 --temperature 0.0 \ --out "$OUT/final/ablation_n200_K16.json" \ > "$OUT/ar_eval.log" 2>&1 log "AR ablate exit=$?" log "pushing grpo_opt13/ to HF" python3 - <