| #!/usr/bin/env bash |
| |
| |
| |
| |
| set -uo pipefail |
|
|
| REPO="LauraGG/blt-reasoner-pilot1" |
| OUT="/home/ubuntu/work/blt_grpo1" |
| CFG="/home/ubuntu/experiments/blt_reasoner/configs/grpo_from_step12000.json" |
| LOG="/home/ubuntu/work/queue_grpo.log" |
|
|
| log() { echo "[$(date +%T)] $*" | tee -a "$LOG"; } |
|
|
| mkdir -p "$OUT" |
| cd /home/ubuntu |
| export TOKENIZERS_PARALLELISM=false TRANSFORMERS_NO_ADVISORY_WARNINGS=1 HF_HUB_DISABLE_PROGRESS_BARS=1 |
| export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True |
|
|
| log "==========================================" |
| log "GRPO PHASE C: train from pilot ckpt-step12000" |
| log "==========================================" |
| python3 -u -m experiments.blt_reasoner.grpo_train --config "$CFG" \ |
| > "$OUT/train.log" 2>&1 |
| rc=$? |
| log "GRPO train exit=$rc" |
| if [ $rc -ne 0 ]; then |
| log "GRPO train FAILED; pushing partial state and exiting" |
| fi |
|
|
| |
| log "running n=100 K=16 z-ablation on GRPO final ckpt" |
| python3 -u -m experiments.blt_reasoner.eval \ |
| --ckpt "$OUT/final" --config "$CFG" \ |
| --n 100 --K 16 --max_new_tokens 192 --temperature 0.0 \ |
| --out "$OUT/final/ablation_K16_n100.json" \ |
| > "$OUT/eval.log" 2>&1 |
| log "eval exit=$?" |
|
|
| log "pushing to HF under grpo/" |
| python3 - <<PY |
| import os |
| from huggingface_hub import HfApi |
| token = os.environ.get("BLT_HF_TOKEN", "").strip() |
| assert token.startswith("hf_"), "BLT_HF_TOKEN missing" |
| api = HfApi(token=token) |
| api.upload_folder( |
| folder_path="$OUT", |
| path_in_repo="grpo", |
| repo_id="$REPO", repo_type="model", |
| commit_message="GRPO Phase C: trained policy + final ablation", |
| ignore_patterns=["*.tmp"], |
| ) |
| print("[push] done") |
| PY |
| log "queue_grpo.sh DONE" |
|
|