| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| set -euo pipefail |
|
|
| ROOT=/home/ubuntu/curriculum_cot |
| SWEEP_ROOT=$ROOT/_runs/baseline_1p5b_v4_20260523_184952 |
| PIPELINE=$ROOT/_runs/baseline_1p5b_pipeline_v4.sh |
|
|
| |
| PIPE_V_S3SFT_LATEST=$SWEEP_ROOT/pipe_v_sft_extend/s3_sft/checkpoint-step-04000 |
| PIPE_M_S3SFT_LATEST=$SWEEP_ROOT/pipe_m_s3sft_from_b/s3_sft/checkpoint-step-02400 |
| PIPE_V_S3GRPO_BEST=$SWEEP_ROOT/pipe_v_sft_extend/s3_grpo/checkpoint-1000 |
| PIPE_M_S3GRPO_BEST=$SWEEP_ROOT/pipe_m_s3sft_from_b/s3_grpo/checkpoint-200 |
| PIPE_O_S3SFT_LATEST=$SWEEP_ROOT/pipe_o_s3sft_lr5e6/s3_sft/checkpoint-step-02400 |
| CKPT_LR5E5=$ROOT/checkpoints/sudoku-9x9-20empty-baseline-1p5b-sweep/baseline_lr5e5_lowsft_v3/s2_sft_v3/checkpoint-step-03000 |
|
|
| for c in "$PIPE_V_S3SFT_LATEST" "$PIPE_M_S3SFT_LATEST" "$PIPE_V_S3GRPO_BEST" "$PIPE_M_S3GRPO_BEST" "$PIPE_O_S3SFT_LATEST"; do |
| [[ -d "$c" ]] || { echo "MISSING: $c"; exit 1; } |
| done |
|
|
| launch() { |
| local gpu="$1" variant="$2"; shift 2 |
| local out=$SWEEP_ROOT/$variant; mkdir -p "$out" |
| nohup env ROOT="$ROOT" VARIANT="$variant" GPU="$gpu" S2_SFT_CKPT="$CKPT_LR5E5" \ |
| OUTPUT_ROOT="$out" USE_WANDB=0 WANDB_MODE=offline "$@" \ |
| bash "$PIPELINE" </dev/null >"$out/nohup.log" 2>&1 & |
| local pid=$! |
| echo "$pid $gpu $variant" >> "$SWEEP_ROOT/PIDS.txt" |
| disown $pid 2>/dev/null || true |
| printf 'GPU %s -> %s pid=%s\n' "$gpu" "$variant" "$pid" |
| } |
|
|
| |
|
|
| |
| |
| launch 0 v6_a_grpo_v_card \ |
| START_PHASE=s3_grpo S3_GRPO_INIT="$PIPE_V_S3GRPO_BEST" \ |
| GRPO_LR=2e-6 GRPO_BS=32 GRPO_GA=1 GRPO_NG=8 \ |
| PENALTY_MISSING=0.75 EXACT_MATCH_BONUS=2.0 CARD_MISMATCH_PEN=1.0 \ |
| S3_GRPO_MAX_STEPS=2000 |
|
|
| |
| launch 1 v6_b_grpo_v_sharp \ |
| START_PHASE=s3_grpo S3_GRPO_INIT="$PIPE_V_S3GRPO_BEST" \ |
| GRPO_LR=2e-6 GRPO_BS=32 GRPO_GA=1 GRPO_NG=8 \ |
| PENALTY_MISSING=1.0 EXACT_MATCH_BONUS=4.0 CARD_MISMATCH_PEN=3.0 \ |
| S3_GRPO_MAX_STEPS=2000 |
|
|
| |
| launch 2 v6_c_grpo_vsft_card \ |
| START_PHASE=s3_grpo S3_GRPO_INIT="$PIPE_V_S3SFT_LATEST" \ |
| GRPO_LR=5e-6 GRPO_BS=32 GRPO_GA=1 GRPO_NG=8 \ |
| PENALTY_MISSING=0.75 EXACT_MATCH_BONUS=2.0 CARD_MISMATCH_PEN=1.0 \ |
| S3_GRPO_MAX_STEPS=2000 |
|
|
| |
| launch 3 v6_d_grpo_msft_card \ |
| START_PHASE=s3_grpo S3_GRPO_INIT="$PIPE_M_S3SFT_LATEST" \ |
| GRPO_LR=5e-6 GRPO_BS=32 GRPO_GA=1 GRPO_NG=8 \ |
| PENALTY_MISSING=0.75 EXACT_MATCH_BONUS=2.0 CARD_MISMATCH_PEN=1.0 \ |
| S3_GRPO_MAX_STEPS=2000 |
|
|
| |
|
|
| |
| launch 4 v6_e_sft_v_oversample5 \ |
| START_PHASE=s3_sft S3_SFT_INIT="$PIPE_V_S3SFT_LATEST" \ |
| SFT_LR_S3=2e-6 SFT_BS=16 SFT_GA=1 \ |
| SFT_OVERSAMPLE=5 \ |
| S3_SFT_MAX_STEPS=2500 \ |
| GRPO_LR=2e-6 GRPO_BS=32 GRPO_GA=1 GRPO_NG=8 \ |
| PENALTY_MISSING=0.75 EXACT_MATCH_BONUS=2.0 CARD_MISMATCH_PEN=1.0 \ |
| S3_GRPO_MAX_STEPS=1500 |
|
|
| |
| launch 5 v6_f_sft_v_oversample8 \ |
| START_PHASE=s3_sft S3_SFT_INIT="$PIPE_V_S3SFT_LATEST" \ |
| SFT_LR_S3=2e-6 SFT_BS=16 SFT_GA=1 \ |
| SFT_OVERSAMPLE=8 \ |
| S3_SFT_MAX_STEPS=2500 \ |
| GRPO_LR=2e-6 GRPO_BS=32 GRPO_GA=1 GRPO_NG=8 \ |
| PENALTY_MISSING=0.75 EXACT_MATCH_BONUS=2.0 CARD_MISMATCH_PEN=1.0 \ |
| S3_GRPO_MAX_STEPS=1500 |
|
|
| |
| |
| launch 6 v6_g_sft_v_mv_only \ |
| START_PHASE=s3_sft S3_SFT_INIT="$PIPE_V_S3SFT_LATEST" \ |
| SFT_LR_S3=1e-6 SFT_BS=16 SFT_GA=1 \ |
| SFT_OVERSAMPLE=5 SFT_TGT_MIN=2 \ |
| S3_SFT_MAX_STEPS=2000 \ |
| GRPO_LR=2e-6 GRPO_BS=32 GRPO_GA=1 GRPO_NG=8 \ |
| PENALTY_MISSING=0.75 EXACT_MATCH_BONUS=2.0 CARD_MISMATCH_PEN=1.0 \ |
| S3_GRPO_MAX_STEPS=1500 |
|
|
| |
| |
| |
| |
| launch 7 v6_h_grpo_v_card_long \ |
| START_PHASE=s3_grpo S3_GRPO_INIT="$PIPE_V_S3GRPO_BEST" \ |
| GRPO_LR=2e-6 GRPO_BS=32 GRPO_GA=1 GRPO_NG=8 \ |
| GRPO_BETA=0.01 \ |
| PENALTY_MISSING=0.75 EXACT_MATCH_BONUS=2.0 CARD_MISMATCH_PEN=1.0 \ |
| S3_GRPO_MAX_STEPS=3000 |
|
|
| echo |
| echo "=== launched ===" |
| tail -8 "$SWEEP_ROOT/PIDS.txt" |
|
|