JulianHJR
/

tempv2

Model card Files Files and versions

xet

Community

JulianHJR commited on 22 days ago

Commit

c04eaa2

verified ·

1 Parent(s): 9fa8ff4

Upload runall.sh with huggingface_hub

Browse files

Files changed (1) hide show

runall.sh +213 -0

runall.sh ADDED Viewed

	@@ -0,0 +1,213 @@

+#!/bin/bash
+# ============================================================
+# Student Simulation v5 — 6-GPU pipeline (May 2026)
+# ============================================================
+#
+# WHAT v5 DOES DIFFERENTLY (vs v4):
+#   - DROPPED stage 9 (global α sweep). v4 small-batch proved it unreliable
+#     for 30B MoE multi-layer interaction.
+#   - NEW stage 5b: probe-based layer ranking. Pick top-K layers by
+#     linear-probe accuracy on the existing residuals. Replaces v4's
+#     "take back half" heuristic.
+#   - HARDENED stage 14:
+#       * residual_after_general < 0.3 → AUTO-SKIP (noise vector)
+#       * n_repeats=3 (averaged) → kill single-run noise
+#       * min_reduction_threshold=1.0 → no "noise victories"
+#       * active_threshold=1, side_effect_rate=0.25 (relaxed from v4)
+#       * SHARDED across 6 GPUs (each card calibrates ~3 of 16 layers)
+#   - NEW stage 16: cumulative top-k multi-layer sweep.
+#     k = 1, 3, 5, 8, 12, 16 layers steered together.
+#     Reports collapse rate at each k → finds the multi-layer cliff.
+#
+# 6-GPU LAYOUT
+#   Single-GPU phase (GPU 0):
+#     stages 1-8   data prep, expert select, residuals, directions
+#     stage 5b     probe ranking (uses residuals only, fast)
+#   Parallel phase (GPU 0-5, 6 cards in parallel):
+#     stage 14   per-layer calibration, sharded 0/6 ... 5/6
+#                merged into one calibration file
+#   Single-GPU again (GPU 0):
+#     stage 16   cumulative top-k multi-layer sweep
+#     stage 15   calibrated inference (baseline vs intervened)
+#     stage 13   final analysis + report
+# ============================================================
+#
+# QUICK START
+#   bash runall.sh                           # full pipeline
+#   STAGES=5b,14,16,15,13 bash runall.sh     # skip data prep
+#   STAGES=14,16,15,13 bash runall.sh        # skip probe (already ran)
+#
+# ENV VARS
+#   STAGES         comma-list of stages to run
+#   N_TRAIN        # CoTs to generate (default 150)
+#   N_CALIB        # problems for stage 14 (default 10)
+#   N_K_TEST       # problems for stage 16 (default 10)
+#   N_REPEATS      # stage 14 repeats (default 3)
+#   PROBE_TOP_K    # # layers from stage 5b (default 16)
+# ============================================================
+set -e
+set -u
+set -o pipefail
+PROJECT_ROOT="$(cd "$(dirname "$0")" && pwd)"
+cd "$PROJECT_ROOT"
+export DATA_ROOT="${DATA_ROOT:-$PROJECT_ROOT/data}"
+export PYTHONPATH="$PROJECT_ROOT:${PYTHONPATH:-}"
+export TOKENIZERS_PARALLELISM=false
+# CPU thread caps. Without these, each of the 6 parallel shards spawns ~64
+# BLAS threads (PyTorch defaults to nproc), so 6 processes × 64 = 384 threads
+# fight for ~64 cores → cache thrash → generation appears to hang.
+# 8 threads × 6 procs = 48 threads, well within capacity.
+export OMP_NUM_THREADS="${OMP_NUM_THREADS:-8}"
+export MKL_NUM_THREADS="${MKL_NUM_THREADS:-8}"
+export NUMEXPR_NUM_THREADS="${NUMEXPR_NUM_THREADS:-8}"
+N_TRAIN="${N_TRAIN:-150}"
+N_MATH_TEST="${N_MATH_TEST:-50}"
+N_AIME="${N_AIME:-30}"
+N_GPQA="${N_GPQA:-20}"
+N_CALIB="${N_CALIB:-10}"
+N_K_TEST="${N_K_TEST:-10}"
+N_REPEATS="${N_REPEATS:-3}"
+mkdir -p "$DATA_ROOT/logs" "$DATA_ROOT/results"
+RUNALL_LOG="$DATA_ROOT/logs/runall.log"
+echo "=========================================================" | tee -a "$RUNALL_LOG"
+echo "Student Simulation v5 (6-GPU) - $(date)" | tee -a "$RUNALL_LOG"
+echo "PROJECT_ROOT: $PROJECT_ROOT" | tee -a "$RUNALL_LOG"
+echo "N_CALIB:      $N_CALIB    N_REPEATS: $N_REPEATS" | tee -a "$RUNALL_LOG"
+echo "N_K_TEST:     $N_K_TEST" | tee -a "$RUNALL_LOG"
+echo "=========================================================" | tee -a "$RUNALL_LOG"
+python -m configs.paths 2>&1 | tee -a "$RUNALL_LOG"
+STAGES="${STAGES:-1,2,3,4,5,6,7,8,5b,14,16,15,13}"
+run_stage() {
+    local stage_num="$1"
+    local stage_name="$2"
+    shift 2
+    if [[ ",$STAGES," != *",$stage_num,"* ]]; then
+        echo "[skip] Stage $stage_num: $stage_name" | tee -a "$RUNALL_LOG"
+        return 0
+    fi
+    echo "" | tee -a "$RUNALL_LOG"
+    echo "==================== Stage $stage_num: $stage_name ====================" | tee -a "$RUNALL_LOG"
+    local t_start; t_start=$(date +%s)
+    "$@" 2>&1 | tee -a "$RUNALL_LOG"
+    local t_end; t_end=$(date +%s)
+    echo "Stage $stage_num took $((t_end - t_start))s" | tee -a "$RUNALL_LOG"
+}
+# Single-GPU stages
+export CUDA_VISIBLE_DEVICES=0
+if [[ -z "${SKIP_DOWNLOAD:-}" ]]; then
+    run_stage 1 "Download model" \
+        python scripts/01_download_model.py
+fi
+run_stage 2 "Generate CoTs" \
+    python scripts/02_generate_cots.py \
+        --n_train "$N_TRAIN" --n_math_test "$N_MATH_TEST" \
+        --n_aime "$N_AIME" --n_gpqa "$N_GPQA" --resume
+run_stage 3 "Label CoTs" \
+    python scripts/03_label_cots.py --resume
+run_stage 4 "Capture routing" \
+    python scripts/04_capture_routing.py --resume
+run_stage 5 "Select top experts" \
+    python scripts/05_select_top_experts.py --resume
+run_stage 6 "Interaction analysis" \
+    python scripts/06_interaction_analysis.py
+run_stage 7 "Capture residuals" \
+    python scripts/07_capture_residuals.py --resume
+run_stage 8 "Compute v4_clean directions" \
+    python scripts/08_compute_directions.py --resume
+run_stage 5b "Probe-based layer ranking" \
+    python scripts/05b_probe_ranking.py --dim monitoring
+# ============================================================
+# 6-GPU PARALLEL PHASE: stage 14 sharded
+# ============================================================
+if [[ ",$STAGES," == *",14,"* ]]; then
+    echo "" | tee -a "$RUNALL_LOG"
+    echo "==================== 6-GPU Stage 14 (sharded) ====================" | tee -a "$RUNALL_LOG"
+    t_start=$(date +%s)
+    PIDS=()
+    SHARD_FILES=()
+    for shard_id in 0 1 2 3 4 5; do
+        out_path="$DATA_ROOT/results/per_layer_calibration_monitoring_shard${shard_id}.json"
+        SHARD_FILES+=("$out_path")
+        (
+          # Bind this shard to ONE physical GPU by exporting inside the
+          # subshell BEFORE python starts. Inline 'VAR=val python ... | tee'
+          # is unreliable under `&`: the python process can fork before the
+          # prefix takes effect, ending up with the parent env's full GPU list.
+          export CUDA_VISIBLE_DEVICES="$shard_id"
+          echo "[shard $shard_id] CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
+              > "$DATA_ROOT/logs/14_mon_shard${shard_id}.log"
+          python scripts/14_calibrate_per_layer.py \
+              --dim monitoring \
+              --n_test "$N_CALIB" \
+              --n_repeats "$N_REPEATS" \
+              --layer_shard "${shard_id}/6" \
+              --shard_id "shard${shard_id}" \
+              >> "$DATA_ROOT/logs/14_mon_shard${shard_id}.log" 2>&1
+        ) &
+        PIDS+=($!)
+        echo "Spawned stage 14 shard $shard_id on GPU $shard_id (PID $!)" | tee -a "$RUNALL_LOG"
+    done
+    wait "${PIDS[@]}"
+    echo "All 6 stage-14 shards finished" | tee -a "$RUNALL_LOG"
+    # Merge
+    python scripts/14_merge_shards.py \
+        --dim monitoring \
+        --shards "${SHARD_FILES[@]}" \
+        2>&1 | tee -a "$RUNALL_LOG"
+    t_end=$(date +%s)
+    echo "Stage 14 (parallel + merge) took $((t_end - t_start))s" | tee -a "$RUNALL_LOG"
+fi
+# ============================================================
+# Single-GPU final stages
+# ============================================================
+export CUDA_VISIBLE_DEVICES=0
+run_stage 16 "Cumulative top-k multi-layer sweep" \
+    python scripts/16_cumulative_topk.py \
+        --dim monitoring --n_test "$N_K_TEST"
+run_stage 15 "Calibrated inference (monitoring)" \
+    python scripts/15_infer_calibrated.py \
+        --dim monitoring --auto_problems \
+        --save_to "$DATA_ROOT/results/infer_calibrated_monitoring_v5.json"
+run_stage 13 "Final analysis + report" \
+    python scripts/13_analyze_and_report.py
+echo "" | tee -a "$RUNALL_LOG"
+echo "=========================================================" | tee -a "$RUNALL_LOG"
+echo "v5 pipeline complete - $(date)" | tee -a "$RUNALL_LOG"
+echo "=========================================================" | tee -a "$RUNALL_LOG"
+echo "KEY FILES TO READ FIRST:" | tee -a "$RUNALL_LOG"
+echo "  $DATA_ROOT/checkpoints/probe_layer_ranking_monitoring.json" | tee -a "$RUNALL_LOG"
+echo "  $DATA_ROOT/results/per_layer_calibration_monitoring.json    <- safe_layers" | tee -a "$RUNALL_LOG"
+echo "  $DATA_ROOT/results/stage16_cumulative_topk_summary.json     <- collapse cliff" | tee -a "$RUNALL_LOG"
+echo "  $DATA_ROOT/results/infer_calibrated_monitoring_v5.json      <- final output" | tee -a "$RUNALL_LOG"
+echo "  $DATA_ROOT/results/final_report.md" | tee -a "$RUNALL_LOG"
+echo "=========================================================" | tee -a "$RUNALL_LOG"