#!/usr/bin/env bash
# Auto-launches after the current 7B pilot finishes. Trains BLT-Reasoner at 7B
# with block_z_to_x=True for 500 K=8 steps, then runs the teacher-forced
# 3-way ablation (which respects block_z_to_x via the 4D mask) on the final
# ckpt. Pushes results to HF under exp7b_block_z/ in the repo.
set -uo pipefail

REPO="LauraGG/blt-reasoner-pilot1"
OUT="/home/ubuntu/work/blt_exp7b_blockz"
CFG="/home/ubuntu/experiments/blt_reasoner/configs/exp7b_block_z_to_x.json"
PILOT_FINAL_ABL="/home/ubuntu/work/blt_pilot7b/final/ablation_n200_K16.json"
LOG="/home/ubuntu/work/queue_block_z_exp.log"

log() { echo "[$(date +%T)] $*" | tee -a "$LOG"; }

mkdir -p "$OUT"
cd /home/ubuntu
export TOKENIZERS_PARALLELISM=false TRANSFORMERS_NO_ADVISORY_WARNINGS=1 HF_HUB_DISABLE_PROGRESS_BARS=1
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

log "queue_block_z_exp.sh starting; waiting for $PILOT_FINAL_ABL"
DEADLINE=$(( $(date +%s) + 12*3600 ))
while [ ! -f "$PILOT_FINAL_ABL" ]; do
    if [ "$(date +%s)" -gt "$DEADLINE" ]; then
        log "FATAL: deadline exceeded waiting for pilot; aborting"
        exit 1
    fi
    sleep 60
done
log "pilot final ablation present; waiting 30s for GPU drain"
sleep 30

# Also run teacher-forced ablation on the CURRENT pilot's final ckpt as a baseline
# (so we have apples-to-apples teacher-forced metrics for comparison).
log "running teacher-forced ablation on PILOT final ckpt (block_z_to_x=False baseline)"
python3 -u -m experiments.blt_reasoner.scripts.ablate_teacher_forced \
    --ckpt /home/ubuntu/work/blt_pilot7b/final \
    --config /home/ubuntu/experiments/blt_reasoner/configs/pilot7b_qwen_math_gsm8k.json \
    --n 200 --K 16 \
    --out /home/ubuntu/work/blt_pilot7b/final/ablation_teacher_forced.json \
    > "$OUT/baseline_tf_eval.log" 2>&1
log "baseline tf-ablation done; result:"
cat /home/ubuntu/work/blt_pilot7b/final/ablation_teacher_forced.json 2>/dev/null | python3 -c "
import json,sys
d=json.load(sys.stdin)
for c in ('normal','random','zero'):
    print(f\"  {c}: tok_acc={d['results'][c]['tok_acc']:.4f}\")
print(f\"  delta_random={d['delta_tokacc_normal_minus_random']:+.4f}\")
print(f\"  delta_zero  ={d['delta_tokacc_normal_minus_zero']:+.4f}\")
" >> "$LOG" 2>&1 || true

log "=========================================="
log "EXPERIMENT: 7B SFT with block_z_to_x=True (500 K=8 steps)"
log "=========================================="
python3 -u -m experiments.blt_reasoner.train --config "$CFG" \
    > "$OUT/train.log" 2>&1
log "exp train exit=$?"

log "running teacher-forced ablation on exp final ckpt"
python3 -u -m experiments.blt_reasoner.scripts.ablate_teacher_forced \
    --ckpt "$OUT/final" --config "$CFG" \
    --n 200 --K 8 \
    --out "$OUT/final/ablation_teacher_forced.json" \
    > "$OUT/exp_tf_eval.log" 2>&1
log "exp tf-ablation done; result:"
cat "$OUT/final/ablation_teacher_forced.json" 2>/dev/null | python3 -c "
import json,sys
d=json.load(sys.stdin)
for c in ('normal','random','zero'):
    print(f\"  {c}: tok_acc={d['results'][c]['tok_acc']:.4f}\")
print(f\"  delta_random={d['delta_tokacc_normal_minus_random']:+.4f}\")
print(f\"  delta_zero  ={d['delta_tokacc_normal_minus_zero']:+.4f}\")
" >> "$LOG" 2>&1 || true

log "pushing exp7b_block_z/ to HF"
python3 - <<PY
import os
from huggingface_hub import HfApi
token = os.environ.get("BLT_HF_TOKEN", "").strip()
assert token.startswith("hf_"), "BLT_HF_TOKEN missing"
api = HfApi(token=token)
api.upload_folder(
    folder_path="$OUT",
    path_in_repo="exp7b_block_z",
    repo_id="$REPO", repo_type="model",
    commit_message="EXP: 7B with block_z_to_x=True (leak-closure principled test)",
)
# Also push the pilot's tf ablation alongside (for clean comparison)
api.upload_file(
    path_or_fileobj="/home/ubuntu/work/blt_pilot7b/final/ablation_teacher_forced.json",
    path_in_repo="pilot7b/final/ablation_teacher_forced.json",
    repo_id="$REPO", repo_type="model",
    commit_message="Add teacher-forced ablation on pilot7b final (baseline for block_z_to_x exp)",
)
print("[push] done")
PY
log "queue_block_z_exp.sh DONE"