# Copy to .env for local runs. Do not put real secrets in this example file. # Hugging Face token, if needed for private/gated repos or higher rate limits. # HF_TOKEN=hf_xxx # Weights & Biases configuration. WANDB_MODE=online WANDB_ENTITY=training-dynamics WANDB_PROJECT= WANDB_RUN_NAME= WANDB__SERVICE_WAIT=300 # GPU/distributed launch defaults. GPU_IDS=0,1,2,3,4,5,6,7 NUM_PROCESSES=8 NPROC_PER_JOB=2 MASTER_PORT=29501 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True # Model/cache/output defaults used by launch scripts. MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct CACHE_DIR=.hf_cache RUN_TAG= CHECKPOINT_ROOT= OUTPUT_ROOT= # Warm-baseline all-latent stage pipeline defaults. EMPTIES=20 MODES_SPEC=recurrent_hidden GPU_GROUPS_SPEC=0,1,2,3,4,5,6,7 TRAIN_PUZZLES=10000 EVAL_PUZZLES=100 SOLVE_TARGET=0.95 VALUE_TARGET=0 MIN_STEPS_BEFORE_STOP=50 BASELINE_WARM_MAX_STEPS=1000 LATENT_SFT_MAX_STEPS=1000 LATENT_GRPO_MAX_STEPS=500 SFT_NUM_EPOCHS=64 GRPO_NUM_TRAIN_EPOCHS=50 # SFT/GRPO batch and LoRA defaults. SFT_PER_DEVICE_BS=8 SFT_GRAD_ACCUM=2 BASELINE_PER_DEVICE_BS=16 BASELINE_GRAD_ACCUM=2 GRPO_PER_DEVICE_BS=4 GRPO_GRAD_ACCUM=2 LORA_R=32 LORA_ALPHA=64 LORA_DROPOUT=0.05 GRPO_BETA=0.0 # Optional resume adapters. STAGE1_BASELINE_ADAPTER_DIR= STAGE1_LATENT_SFT_ADAPTER_DIR= STAGE1_LATENT_GRPO_ADAPTER_DIR= STAGE2_BASELINE_WARM_ADAPTER_DIR= STAGE2_LATENT_SFT_ADAPTER_DIR= STAGE2_LATENT_GRPO_ADAPTER_DIR= STAGE3_BASELINE_WARM_ADAPTER_DIR= STAGE3_LATENT_SFT_ADAPTER_DIR= # Optional debug knobs. FIXED_SLOT_DEBUG_LIMIT=0 FIXED_SLOT_DECODE_DEBUG_LIMIT=0 LATENT_VOCAB_DEBUG_TOPK=1 ATTN_DENSITY_DEBUG_LIMIT=0 ATTN_DENSITY_THRESHOLD_MULT=1.0