| #!/usr/bin/env bash |
| |
| set -uo pipefail |
|
|
| REPO="LauraGG/blt-reasoner-pilot1" |
| OUT="/home/ubuntu/work/blt_longer_sft" |
| CFG="/home/ubuntu/experiments/blt_reasoner/configs/exp7b_longer_sft.json" |
| RESUME_FROM="/home/ubuntu/work/blt_grpo_opt13/final" |
| LOG="/home/ubuntu/work/queue_longer_sft.log" |
|
|
| log() { echo "[$(date +%T)] $*" | tee -a "$LOG"; } |
|
|
| mkdir -p "$OUT" |
| cd /home/ubuntu |
| export TOKENIZERS_PARALLELISM=false TRANSFORMERS_NO_ADVISORY_WARNINGS=1 HF_HUB_DISABLE_PROGRESS_BARS=1 |
| export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True |
|
|
| log "===========================================" |
| log "LONGER SFT from GRPO ckpt (52.5% baseline)" |
| log "===========================================" |
| python3 -u -m experiments.blt_reasoner.train --config "$CFG" \ |
| --resume_from "$RESUME_FROM" \ |
| > "$OUT/train.log" 2>&1 |
| log "train exit=$?" |
|
|
| log "TF ablation on final" |
| python3 -u -m experiments.blt_reasoner.scripts.ablate_teacher_forced \ |
| --ckpt "$OUT/final" --config "$CFG" --n 200 --K 16 \ |
| --out "$OUT/final/ablation_teacher_forced.json" \ |
| > "$OUT/tf_eval.log" 2>&1 |
| log "TF ablate exit=$?" |
|
|
| log "AR ablation on final" |
| python3 -u -m experiments.blt_reasoner.eval \ |
| --ckpt "$OUT/final" --config "$CFG" --n 200 --K 16 \ |
| --max_new_tokens 192 --temperature 0.0 \ |
| --out "$OUT/final/ablation_n200_K16.json" \ |
| > "$OUT/ar_eval.log" 2>&1 |
| log "AR ablate exit=$?" |
|
|
| log "pushing longer_sft/ to HF" |
| python3 - <<PYEND |
| import os |
| from huggingface_hub import HfApi |
| token = os.environ.get("BLT_HF_TOKEN", "").strip() |
| assert token.startswith("hf_"), "BLT_HF_TOKEN missing" |
| api = HfApi(token=token) |
| api.upload_folder(folder_path="$OUT", path_in_repo="longer_sft", |
| repo_id="$REPO", repo_type="model", |
| commit_message="Longer SFT from GRPO ckpt") |
| print("[push] done") |
| PYEND |
| log "queue_longer_sft.sh DONE" |
|
|