File size: 2,143 Bytes
bc7101b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env bash
# E6: No-block-aware GRPO. Sample + score under block_y_to_x=False (matches
# eval distribution that achieves 77.5%). After training, run both ablations
# (with-block and without-block) for the full comparison, push to HF.
set -uo pipefail

REPO="LauraGG/blt-reasoner-pilot1"
OUT="/home/ubuntu/work/blt_grpo_no_block"
CFG="/home/ubuntu/experiments/blt_reasoner/configs/grpo_no_block.json"
LOG="/home/ubuntu/work/queue_grpo_no_block.log"

log() { echo "[$(date +%T)] $*" | tee -a "$LOG"; }

mkdir -p "$OUT"
cd /home/ubuntu
export TOKENIZERS_PARALLELISM=false TRANSFORMERS_NO_ADVISORY_WARNINGS=1 HF_HUB_DISABLE_PROGRESS_BARS=1
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

log "==========================================="
log "E6: No-block-aware GRPO from 77.5% no-block ckpt"
log "==========================================="
python3 -u -m experiments.blt_reasoner.grpo_train --config "$CFG" \
    > "$OUT/train.log" 2>&1
log "train exit=$?"

log "AR ablation (NO BLOCK at eval) on final"
python3 -u -m experiments.blt_reasoner.eval \
    --ckpt "$OUT/final" --config "$CFG" \
    --n 200 --K 16 --max_new_tokens 192 --temperature 0.0 \
    --no_block_y_to_x \
    --out "$OUT/final/ablation_no_block_n200.json" \
    > "$OUT/ar_eval_noblock.log" 2>&1
log "AR no-block ablate exit=$?"

log "AR ablation (WITH block at eval) on final — sanity check"
python3 -u -m experiments.blt_reasoner.eval \
    --ckpt "$OUT/final" --config "$CFG" \
    --n 200 --K 16 --max_new_tokens 192 --temperature 0.0 \
    --out "$OUT/final/ablation_with_block_n200.json" \
    > "$OUT/ar_eval_block.log" 2>&1
log "AR with-block ablate exit=$?"

log "pushing grpo_no_block/ to HF"
python3 - <<PYEND
import os
from huggingface_hub import HfApi
token = os.environ.get("BLT_HF_TOKEN", "").strip()
assert token.startswith("hf_"), "BLT_HF_TOKEN missing"
api = HfApi(token=token)
api.upload_folder(folder_path="$OUT", path_in_repo="grpo_no_block",
                   repo_id="$REPO", repo_type="model",
                   commit_message="E6: no-block-aware GRPO continuation")
print("[push] done")
PYEND
log "queue_grpo_no_block.sh DONE"