| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -uo pipefail |
|
|
| REPO="LauraGG/blt-reasoner-pilot1" |
| PILOT_FINAL_ABL="/home/ubuntu/work/blt_pilot1/final/ablation_n200.json" |
| LOG="/home/ubuntu/work/queue_controls.log" |
|
|
| log() { echo "[$(date +%T)] $*" | tee -a "$LOG"; } |
|
|
| cd /home/ubuntu |
| export TOKENIZERS_PARALLELISM=false TRANSFORMERS_NO_ADVISORY_WARNINGS=1 HF_HUB_DISABLE_PROGRESS_BARS=1 |
| export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True |
|
|
| |
| log "queue_controls.sh starting; waiting for $PILOT_FINAL_ABL" |
| DEADLINE=$(( $(date +%s) + 6*3600 )) |
| while [ ! -f "$PILOT_FINAL_ABL" ]; do |
| if [ "$(date +%s)" -gt "$DEADLINE" ]; then |
| log "FATAL: deadline exceeded waiting for pilot final ablation; aborting" |
| exit 1 |
| fi |
| sleep 60 |
| done |
| log "pilot final ablation present; proceeding" |
| sleep 30 |
|
|
| run_control () { |
| local name="$1" |
| local cfg="$2" |
| local out_dir="$3" |
|
|
| log "==========================================" |
| log "CONTROL [$name]: train cfg=$cfg out=$out_dir" |
| log "==========================================" |
| python3 -u -m experiments.blt_reasoner.train --config "$cfg" \ |
| > "$out_dir/train.log" 2>&1 |
| rc=$? |
| log "control [$name]: train exit=$rc" |
| if [ $rc -ne 0 ]; then |
| log "control [$name]: train FAILED; skipping eval+push" |
| return |
| fi |
|
|
| log "control [$name]: running n=100 K=4 z-ablation on final ckpt" |
| python3 -u -m experiments.blt_reasoner.eval \ |
| --ckpt "$out_dir/final" --config "$cfg" \ |
| --n 100 --K 4 --max_new_tokens 192 --temperature 0.0 \ |
| --out "$out_dir/final/ablation_K4_n100.json" \ |
| >> "$out_dir/eval.log" 2>&1 |
| rc=$? |
| log "control [$name]: eval exit=$rc" |
|
|
| log "control [$name]: pushing to HF under controls/$name/" |
| python3 - <<PY |
| import os |
| from huggingface_hub import HfApi |
| token = os.environ.get("BLT_HF_TOKEN", "").strip() |
| assert token.startswith("hf_"), "BLT_HF_TOKEN missing" |
| api = HfApi(token=token) |
| api.upload_folder( |
| folder_path="$out_dir", |
| path_in_repo="controls/$name", |
| repo_id="$REPO", repo_type="model", |
| commit_message=f"Add control: $name (final ckpt + n=100 ablation)", |
| ) |
| print("[push] done") |
| PY |
| log "control [$name]: push done" |
| } |
|
|
| |
| mkdir -p /home/ubuntu/work/blt_control_no_infonce |
| run_control no_infonce \ |
| /home/ubuntu/experiments/blt_reasoner/configs/control_no_infonce.json \ |
| /home/ubuntu/work/blt_control_no_infonce |
|
|
| |
| mkdir -p /home/ubuntu/work/blt_control_no_bottleneck |
| run_control no_bottleneck \ |
| /home/ubuntu/experiments/blt_reasoner/configs/control_no_bottleneck.json \ |
| /home/ubuntu/work/blt_control_no_bottleneck |
|
|
| log "queue_controls.sh DONE" |
|
|