| |
|
|
| |
| |
|
|
| |
| WANDB_MODE=online |
| WANDB_ENTITY=training-dynamics |
| WANDB_PROJECT= |
| WANDB_RUN_NAME= |
| WANDB__SERVICE_WAIT=300 |
|
|
| |
| GPU_IDS=0,1,2,3,4,5,6,7 |
| NUM_PROCESSES=8 |
| NPROC_PER_JOB=2 |
| MASTER_PORT=29501 |
| CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 |
| PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True |
|
|
| |
| MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct |
| CACHE_DIR=.hf_cache |
| RUN_TAG= |
| CHECKPOINT_ROOT= |
| OUTPUT_ROOT= |
|
|
| |
| EMPTIES=20 |
| MODES_SPEC=recurrent_hidden |
| GPU_GROUPS_SPEC=0,1,2,3,4,5,6,7 |
| TRAIN_PUZZLES=10000 |
| EVAL_PUZZLES=100 |
| SOLVE_TARGET=0.95 |
| VALUE_TARGET=0 |
| MIN_STEPS_BEFORE_STOP=50 |
| BASELINE_WARM_MAX_STEPS=1000 |
| LATENT_SFT_MAX_STEPS=1000 |
| LATENT_GRPO_MAX_STEPS=500 |
| SFT_NUM_EPOCHS=64 |
| GRPO_NUM_TRAIN_EPOCHS=50 |
|
|
| |
| SFT_PER_DEVICE_BS=8 |
| SFT_GRAD_ACCUM=2 |
| BASELINE_PER_DEVICE_BS=16 |
| BASELINE_GRAD_ACCUM=2 |
| GRPO_PER_DEVICE_BS=4 |
| GRPO_GRAD_ACCUM=2 |
| LORA_R=32 |
| LORA_ALPHA=64 |
| LORA_DROPOUT=0.05 |
| GRPO_BETA=0.0 |
|
|
| |
| STAGE1_BASELINE_ADAPTER_DIR= |
| STAGE1_LATENT_SFT_ADAPTER_DIR= |
| STAGE1_LATENT_GRPO_ADAPTER_DIR= |
| STAGE2_BASELINE_WARM_ADAPTER_DIR= |
| STAGE2_LATENT_SFT_ADAPTER_DIR= |
| STAGE2_LATENT_GRPO_ADAPTER_DIR= |
| STAGE3_BASELINE_WARM_ADAPTER_DIR= |
| STAGE3_LATENT_SFT_ADAPTER_DIR= |
|
|
| |
| FIXED_SLOT_DEBUG_LIMIT=0 |
| FIXED_SLOT_DECODE_DEBUG_LIMIT=0 |
| LATENT_VOCAB_DEBUG_TOPK=1 |
| ATTN_DENSITY_DEBUG_LIMIT=0 |
| ATTN_DENSITY_THRESHOLD_MULT=1.0 |
|
|