| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -e |
| set -u |
| set -o pipefail |
|
|
| PROJECT_ROOT="$(cd "$(dirname "$0")" && pwd)" |
| cd "$PROJECT_ROOT" |
|
|
| export DATA_ROOT="${DATA_ROOT:-$PROJECT_ROOT/data}" |
| export PYTHONPATH="$PROJECT_ROOT:${PYTHONPATH:-}" |
| export TOKENIZERS_PARALLELISM=false |
|
|
| |
| |
| |
| |
| export OMP_NUM_THREADS="${OMP_NUM_THREADS:-8}" |
| export MKL_NUM_THREADS="${MKL_NUM_THREADS:-8}" |
| export NUMEXPR_NUM_THREADS="${NUMEXPR_NUM_THREADS:-8}" |
|
|
| N_TRAIN="${N_TRAIN:-150}" |
| N_MATH_TEST="${N_MATH_TEST:-50}" |
| N_AIME="${N_AIME:-30}" |
| N_GPQA="${N_GPQA:-20}" |
| N_CALIB="${N_CALIB:-10}" |
| N_K_TEST="${N_K_TEST:-10}" |
| N_REPEATS="${N_REPEATS:-3}" |
|
|
| mkdir -p "$DATA_ROOT/logs" "$DATA_ROOT/results" |
| RUNALL_LOG="$DATA_ROOT/logs/runall.log" |
|
|
| echo "=========================================================" | tee -a "$RUNALL_LOG" |
| echo "Student Simulation v5 (6-GPU) - $(date)" | tee -a "$RUNALL_LOG" |
| echo "PROJECT_ROOT: $PROJECT_ROOT" | tee -a "$RUNALL_LOG" |
| echo "N_CALIB: $N_CALIB N_REPEATS: $N_REPEATS" | tee -a "$RUNALL_LOG" |
| echo "N_K_TEST: $N_K_TEST" | tee -a "$RUNALL_LOG" |
| echo "=========================================================" | tee -a "$RUNALL_LOG" |
|
|
| python -m configs.paths 2>&1 | tee -a "$RUNALL_LOG" |
|
|
| STAGES="${STAGES:-1,2,3,4,5,6,7,8,5b,14,16,15,13}" |
|
|
| run_stage() { |
| local stage_num="$1" |
| local stage_name="$2" |
| shift 2 |
| if [[ ",$STAGES," != *",$stage_num,"* ]]; then |
| echo "[skip] Stage $stage_num: $stage_name" | tee -a "$RUNALL_LOG" |
| return 0 |
| fi |
| echo "" | tee -a "$RUNALL_LOG" |
| echo "==================== Stage $stage_num: $stage_name ====================" | tee -a "$RUNALL_LOG" |
| local t_start; t_start=$(date +%s) |
| "$@" 2>&1 | tee -a "$RUNALL_LOG" |
| local t_end; t_end=$(date +%s) |
| echo "Stage $stage_num took $((t_end - t_start))s" | tee -a "$RUNALL_LOG" |
| } |
|
|
| |
| export CUDA_VISIBLE_DEVICES=0 |
|
|
| if [[ -z "${SKIP_DOWNLOAD:-}" ]]; then |
| run_stage 1 "Download model" \ |
| python scripts/01_download_model.py |
| fi |
|
|
| run_stage 2 "Generate CoTs" \ |
| python scripts/02_generate_cots.py \ |
| --n_train "$N_TRAIN" --n_math_test "$N_MATH_TEST" \ |
| --n_aime "$N_AIME" --n_gpqa "$N_GPQA" --resume |
|
|
| run_stage 3 "Label CoTs" \ |
| python scripts/03_label_cots.py --resume |
|
|
| run_stage 4 "Capture routing" \ |
| python scripts/04_capture_routing.py --resume |
|
|
| run_stage 5 "Select top experts" \ |
| python scripts/05_select_top_experts.py --resume |
|
|
| run_stage 6 "Interaction analysis" \ |
| python scripts/06_interaction_analysis.py |
|
|
| run_stage 7 "Capture residuals" \ |
| python scripts/07_capture_residuals.py --resume |
|
|
| run_stage 8 "Compute v4_clean directions" \ |
| python scripts/08_compute_directions.py --resume |
|
|
| run_stage 5b "Probe-based layer ranking" \ |
| python scripts/05b_probe_ranking.py --dim monitoring |
|
|
| |
| |
| |
| if [[ ",$STAGES," == *",14,"* ]]; then |
| echo "" | tee -a "$RUNALL_LOG" |
| echo "==================== 6-GPU Stage 14 (sharded) ====================" | tee -a "$RUNALL_LOG" |
| t_start=$(date +%s) |
|
|
| PIDS=() |
| SHARD_FILES=() |
| for shard_id in 0 1 2 3 4 5; do |
| out_path="$DATA_ROOT/results/per_layer_calibration_monitoring_shard${shard_id}.json" |
| SHARD_FILES+=("$out_path") |
| ( |
| |
| |
| |
| |
| export CUDA_VISIBLE_DEVICES="$shard_id" |
| echo "[shard $shard_id] CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \ |
| > "$DATA_ROOT/logs/14_mon_shard${shard_id}.log" |
| python scripts/14_calibrate_per_layer.py \ |
| --dim monitoring \ |
| --n_test "$N_CALIB" \ |
| --n_repeats "$N_REPEATS" \ |
| --layer_shard "${shard_id}/6" \ |
| --shard_id "shard${shard_id}" \ |
| >> "$DATA_ROOT/logs/14_mon_shard${shard_id}.log" 2>&1 |
| ) & |
| PIDS+=($!) |
| echo "Spawned stage 14 shard $shard_id on GPU $shard_id (PID $!)" | tee -a "$RUNALL_LOG" |
| done |
|
|
| wait "${PIDS[@]}" |
| echo "All 6 stage-14 shards finished" | tee -a "$RUNALL_LOG" |
|
|
| |
| python scripts/14_merge_shards.py \ |
| --dim monitoring \ |
| --shards "${SHARD_FILES[@]}" \ |
| 2>&1 | tee -a "$RUNALL_LOG" |
|
|
| t_end=$(date +%s) |
| echo "Stage 14 (parallel + merge) took $((t_end - t_start))s" | tee -a "$RUNALL_LOG" |
| fi |
|
|
| |
| |
| |
| export CUDA_VISIBLE_DEVICES=0 |
|
|
| run_stage 16 "Cumulative top-k multi-layer sweep" \ |
| python scripts/16_cumulative_topk.py \ |
| --dim monitoring --n_test "$N_K_TEST" |
|
|
| run_stage 15 "Calibrated inference (monitoring)" \ |
| python scripts/15_infer_calibrated.py \ |
| --dim monitoring --auto_problems \ |
| --save_to "$DATA_ROOT/results/infer_calibrated_monitoring_v5.json" |
|
|
| run_stage 13 "Final analysis + report" \ |
| python scripts/13_analyze_and_report.py |
|
|
| echo "" | tee -a "$RUNALL_LOG" |
| echo "=========================================================" | tee -a "$RUNALL_LOG" |
| echo "v5 pipeline complete - $(date)" | tee -a "$RUNALL_LOG" |
| echo "=========================================================" | tee -a "$RUNALL_LOG" |
| echo "KEY FILES TO READ FIRST:" | tee -a "$RUNALL_LOG" |
| echo " $DATA_ROOT/checkpoints/probe_layer_ranking_monitoring.json" | tee -a "$RUNALL_LOG" |
| echo " $DATA_ROOT/results/per_layer_calibration_monitoring.json <- safe_layers" | tee -a "$RUNALL_LOG" |
| echo " $DATA_ROOT/results/stage16_cumulative_topk_summary.json <- collapse cliff" | tee -a "$RUNALL_LOG" |
| echo " $DATA_ROOT/results/infer_calibrated_monitoring_v5.json <- final output" | tee -a "$RUNALL_LOG" |
| echo " $DATA_ROOT/results/final_report.md" | tee -a "$RUNALL_LOG" |
| echo "=========================================================" | tee -a "$RUNALL_LOG" |
|
|