File size: 3,300 Bytes
76de008
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env bash
set -euo pipefail

ROOT="/home/ubuntu/curriculum-CoT"
PYTHON_BIN="${PYTHON_BIN:-${ROOT}/.venv/bin/python}"
PIPELINE="${ROOT}/sudoku/llm_policy_icon/small_model_20empty/run_small_latent_pipeline.py"
TRAIN_JSONL="${TRAIN_JSONL:-${ROOT}/sudoku/llm_policy_icon/data/sudoku_t3_20empty_value_qwen_text.jsonl}"
CACHE_DIR="${CACHE_DIR:-${ROOT}/.hf_cache}"
MODEL_NAME="${MODEL_NAME:-Qwen/Qwen2.5-0.5B-Instruct}"
GPU_IDS="${GPU_IDS:-0}"
NUM_PROCESSES="${NUM_PROCESSES:-1}"
MIN_STAGE="${MIN_STAGE:-1}"
MAX_STAGE="${MAX_STAGE:-4}"
RUN_TAG="${RUN_TAG:-$(date +%Y%m%d_%H%M%S)}"
BASELINE_CHECKPOINT_ROOT="${BASELINE_CHECKPOINT_ROOT:-${ROOT}/sudoku/llm_policy_icon/final_checkpoint/small_model_20empty/baseline}"
LATENT_CHECKPOINT_ROOT="${LATENT_CHECKPOINT_ROOT:-${ROOT}/sudoku/llm_policy_icon/final_checkpoint/small_model_20empty/latent}"
BASELINE_OUTPUT_ROOT="${BASELINE_OUTPUT_ROOT:-${BASELINE_CHECKPOINT_ROOT}/${RUN_TAG}/baseline_pipeline_20empty_4stage_small}"
OUTPUT_ROOT="${OUTPUT_ROOT:-${LATENT_CHECKPOINT_ROOT}/${RUN_TAG}/latent_pipeline_20empty_4stage_small}"

cmd=(
  "${PYTHON_BIN}" "${PIPELINE}"
  --python_executable "${PYTHON_BIN}"
  --train_jsonl "${TRAIN_JSONL}"
  --cache_dir "${CACHE_DIR}"
  --model_name "${MODEL_NAME}"
  --checkpoint_root "${LATENT_CHECKPOINT_ROOT}"
  --baseline_output_root "${BASELINE_OUTPUT_ROOT}"
  --output_root "${OUTPUT_ROOT}"
  --run_tag "${RUN_TAG}"
  --min_stage "${MIN_STAGE}"
  --max_stage "${MAX_STAGE}"
  --distributed_gpu_ids "${GPU_IDS}"
  --sft_num_processes "${NUM_PROCESSES}"
  --grpo_num_processes "${NUM_PROCESSES}"
  --total_empties_hint "${TOTAL_EMPTIES_HINT:-20}"
  --sft_num_epochs "${SFT_NUM_EPOCHS:-1.0}"
  --grpo_num_train_epochs "${GRPO_NUM_TRAIN_EPOCHS:-1.0}"
  --sft_gradient_accumulation_steps "${SFT_GRADIENT_ACCUMULATION_STEPS:-8}"
  --grpo_per_device_train_batch_size "${GRPO_PER_DEVICE_TRAIN_BATCH_SIZE:-4}"
  --grpo_gradient_accumulation_steps "${GRPO_GRADIENT_ACCUMULATION_STEPS:-4}"
  --grpo_num_generations "${GRPO_NUM_GENERATIONS:-2}"
  --sft_enable_gradient_checkpointing
  --grpo_enable_gradient_checkpointing
  --sft_save_steps "${SFT_SAVE_STEPS:-100}"
  --sft_eval_steps "${SFT_EVAL_STEPS:-100}"
  --grpo_save_steps "${GRPO_SAVE_STEPS:-25}"
  --grpo_eval_steps "${GRPO_EVAL_STEPS:-25}"
  --phase_max_wall_clock_seconds "${PHASE_MAX_WALL_CLOCK_SECONDS:-21600}"
  --wandb_mode "${WANDB_MODE:-offline}"
)

if [[ -n "${BOOTSTRAP_ADAPTER_DIR:-}" ]]; then
  cmd+=(--bootstrap_adapter_dir "${BOOTSTRAP_ADAPTER_DIR}")
fi

if [[ -n "${STAGE1_INIT_ADAPTER_DIR:-}" ]]; then
  cmd+=(--stage1_init_adapter_dir "${STAGE1_INIT_ADAPTER_DIR}")
fi

if [[ -n "${LIMIT_TRAIN_ROWS:-}" ]]; then
  cmd+=(--limit_train_rows "${LIMIT_TRAIN_ROWS}")
fi

if [[ -n "${SFT_STAGE_MAX_STEPS:-}" ]]; then
  cmd+=(--sft_stage_max_steps "${SFT_STAGE_MAX_STEPS}")
fi

if [[ -n "${GRPO_STAGE_MAX_STEPS:-}" ]]; then
  cmd+=(--grpo_stage_max_steps "${GRPO_STAGE_MAX_STEPS}")
fi

if [[ -n "${WANDB_ENTITY:-}" ]]; then
  cmd+=(--use_wandb --wandb_entity "${WANDB_ENTITY}")
fi

printf 'Launching small latent pipeline on GPUs %s\n' "${GPU_IDS}"
printf 'Baseline root: %s\n' "${BASELINE_OUTPUT_ROOT}"
printf 'Latent output root: %s\n' "${OUTPUT_ROOT}"
printf 'Stages: %s -> %s, processes=%s\n' "${MIN_STAGE}" "${MAX_STAGE}" "${NUM_PROCESSES}"

"${cmd[@]}"