File size: 1,835 Bytes
bc7101b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | #!/usr/bin/env bash
# Chain: GRPO train → z-ablation eval on final → push to HF under grpo/
#
# Run as:
# BLT_HF_TOKEN=<hf_token> nohup bash queue_grpo.sh > /home/ubuntu/work/queue_grpo.log 2>&1 &
set -uo pipefail
REPO="LauraGG/blt-reasoner-pilot1"
OUT="/home/ubuntu/work/blt_grpo1"
CFG="/home/ubuntu/experiments/blt_reasoner/configs/grpo_from_step12000.json"
LOG="/home/ubuntu/work/queue_grpo.log"
log() { echo "[$(date +%T)] $*" | tee -a "$LOG"; }
mkdir -p "$OUT"
cd /home/ubuntu
export TOKENIZERS_PARALLELISM=false TRANSFORMERS_NO_ADVISORY_WARNINGS=1 HF_HUB_DISABLE_PROGRESS_BARS=1
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
log "=========================================="
log "GRPO PHASE C: train from pilot ckpt-step12000"
log "=========================================="
python3 -u -m experiments.blt_reasoner.grpo_train --config "$CFG" \
> "$OUT/train.log" 2>&1
rc=$?
log "GRPO train exit=$rc"
if [ $rc -ne 0 ]; then
log "GRPO train FAILED; pushing partial state and exiting"
fi
# Always try to push whatever we have
log "running n=100 K=16 z-ablation on GRPO final ckpt"
python3 -u -m experiments.blt_reasoner.eval \
--ckpt "$OUT/final" --config "$CFG" \
--n 100 --K 16 --max_new_tokens 192 --temperature 0.0 \
--out "$OUT/final/ablation_K16_n100.json" \
> "$OUT/eval.log" 2>&1
log "eval exit=$?"
log "pushing to HF under grpo/"
python3 - <<PY
import os
from huggingface_hub import HfApi
token = os.environ.get("BLT_HF_TOKEN", "").strip()
assert token.startswith("hf_"), "BLT_HF_TOKEN missing"
api = HfApi(token=token)
api.upload_folder(
folder_path="$OUT",
path_in_repo="grpo",
repo_id="$REPO", repo_type="model",
commit_message="GRPO Phase C: trained policy + final ablation",
ignore_patterns=["*.tmp"],
)
print("[push] done")
PY
log "queue_grpo.sh DONE"
|