| #!/usr/bin/env bash |
| set -euo pipefail |
|
|
| ROOT="/home/ubuntu/curriculum-CoT" |
| PYTHON_BIN="${PYTHON_BIN:-${ROOT}/.venv/bin/python}" |
| PIPELINE="${ROOT}/sudoku/llm_policy_icon/small_model_20empty/run_small_latent_pipeline.py" |
| TRAIN_JSONL="${TRAIN_JSONL:-${ROOT}/sudoku/llm_policy_icon/data/sudoku_t3_20empty_value_qwen_text.jsonl}" |
| CACHE_DIR="${CACHE_DIR:-${ROOT}/.hf_cache}" |
| MODEL_NAME="${MODEL_NAME:-Qwen/Qwen2.5-0.5B-Instruct}" |
| GPU_IDS="${GPU_IDS:-0}" |
| NUM_PROCESSES="${NUM_PROCESSES:-1}" |
| MIN_STAGE="${MIN_STAGE:-1}" |
| MAX_STAGE="${MAX_STAGE:-4}" |
| RUN_TAG="${RUN_TAG:-$(date +%Y%m%d_%H%M%S)}" |
| BASELINE_CHECKPOINT_ROOT="${BASELINE_CHECKPOINT_ROOT:-${ROOT}/sudoku/llm_policy_icon/final_checkpoint/small_model_20empty/baseline}" |
| LATENT_CHECKPOINT_ROOT="${LATENT_CHECKPOINT_ROOT:-${ROOT}/sudoku/llm_policy_icon/final_checkpoint/small_model_20empty/latent}" |
| BASELINE_OUTPUT_ROOT="${BASELINE_OUTPUT_ROOT:-${BASELINE_CHECKPOINT_ROOT}/${RUN_TAG}/baseline_pipeline_20empty_4stage_small}" |
| OUTPUT_ROOT="${OUTPUT_ROOT:-${LATENT_CHECKPOINT_ROOT}/${RUN_TAG}/latent_pipeline_20empty_4stage_small}" |
|
|
| cmd=( |
| "${PYTHON_BIN}" "${PIPELINE}" |
| --python_executable "${PYTHON_BIN}" |
| --train_jsonl "${TRAIN_JSONL}" |
| --cache_dir "${CACHE_DIR}" |
| --model_name "${MODEL_NAME}" |
| --checkpoint_root "${LATENT_CHECKPOINT_ROOT}" |
| --baseline_output_root "${BASELINE_OUTPUT_ROOT}" |
| --output_root "${OUTPUT_ROOT}" |
| --run_tag "${RUN_TAG}" |
| --min_stage "${MIN_STAGE}" |
| --max_stage "${MAX_STAGE}" |
| --distributed_gpu_ids "${GPU_IDS}" |
| --sft_num_processes "${NUM_PROCESSES}" |
| --grpo_num_processes "${NUM_PROCESSES}" |
| --total_empties_hint "${TOTAL_EMPTIES_HINT:-20}" |
| --sft_num_epochs "${SFT_NUM_EPOCHS:-1.0}" |
| --grpo_num_train_epochs "${GRPO_NUM_TRAIN_EPOCHS:-1.0}" |
| --sft_gradient_accumulation_steps "${SFT_GRADIENT_ACCUMULATION_STEPS:-8}" |
| --grpo_per_device_train_batch_size "${GRPO_PER_DEVICE_TRAIN_BATCH_SIZE:-4}" |
| --grpo_gradient_accumulation_steps "${GRPO_GRADIENT_ACCUMULATION_STEPS:-4}" |
| --grpo_num_generations "${GRPO_NUM_GENERATIONS:-2}" |
| --sft_enable_gradient_checkpointing |
| --grpo_enable_gradient_checkpointing |
| --sft_save_steps "${SFT_SAVE_STEPS:-100}" |
| --sft_eval_steps "${SFT_EVAL_STEPS:-100}" |
| --grpo_save_steps "${GRPO_SAVE_STEPS:-25}" |
| --grpo_eval_steps "${GRPO_EVAL_STEPS:-25}" |
| --phase_max_wall_clock_seconds "${PHASE_MAX_WALL_CLOCK_SECONDS:-21600}" |
| --wandb_mode "${WANDB_MODE:-offline}" |
| ) |
|
|
| if [[ -n "${BOOTSTRAP_ADAPTER_DIR:-}" ]]; then |
| cmd+=(--bootstrap_adapter_dir "${BOOTSTRAP_ADAPTER_DIR}") |
| fi |
|
|
| if [[ -n "${STAGE1_INIT_ADAPTER_DIR:-}" ]]; then |
| cmd+=(--stage1_init_adapter_dir "${STAGE1_INIT_ADAPTER_DIR}") |
| fi |
|
|
| if [[ -n "${LIMIT_TRAIN_ROWS:-}" ]]; then |
| cmd+=(--limit_train_rows "${LIMIT_TRAIN_ROWS}") |
| fi |
|
|
| if [[ -n "${SFT_STAGE_MAX_STEPS:-}" ]]; then |
| cmd+=(--sft_stage_max_steps "${SFT_STAGE_MAX_STEPS}") |
| fi |
|
|
| if [[ -n "${GRPO_STAGE_MAX_STEPS:-}" ]]; then |
| cmd+=(--grpo_stage_max_steps "${GRPO_STAGE_MAX_STEPS}") |
| fi |
|
|
| if [[ -n "${WANDB_ENTITY:-}" ]]; then |
| cmd+=(--use_wandb --wandb_entity "${WANDB_ENTITY}") |
| fi |
|
|
| printf 'Launching small latent pipeline on GPUs %s\n' "${GPU_IDS}" |
| printf 'Baseline root: %s\n' "${BASELINE_OUTPUT_ROOT}" |
| printf 'Latent output root: %s\n' "${OUTPUT_ROOT}" |
| printf 'Stages: %s -> %s, processes=%s\n' "${MIN_STAGE}" "${MAX_STAGE}" "${NUM_PROCESSES}" |
|
|
| "${cmd[@]}" |
|
|