curriculum-cot-code / .env.example
Avra98's picture
Initial code dump (rebuttal-ready snapshot)
76de008 verified
# Copy to .env for local runs. Do not put real secrets in this example file.
# Hugging Face token, if needed for private/gated repos or higher rate limits.
# HF_TOKEN=hf_xxx
# Weights & Biases configuration.
WANDB_MODE=online
WANDB_ENTITY=training-dynamics
WANDB_PROJECT=
WANDB_RUN_NAME=
WANDB__SERVICE_WAIT=300
# GPU/distributed launch defaults.
GPU_IDS=0,1,2,3,4,5,6,7
NUM_PROCESSES=8
NPROC_PER_JOB=2
MASTER_PORT=29501
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# Model/cache/output defaults used by launch scripts.
MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct
CACHE_DIR=.hf_cache
RUN_TAG=
CHECKPOINT_ROOT=
OUTPUT_ROOT=
# Warm-baseline all-latent stage pipeline defaults.
EMPTIES=20
MODES_SPEC=recurrent_hidden
GPU_GROUPS_SPEC=0,1,2,3,4,5,6,7
TRAIN_PUZZLES=10000
EVAL_PUZZLES=100
SOLVE_TARGET=0.95
VALUE_TARGET=0
MIN_STEPS_BEFORE_STOP=50
BASELINE_WARM_MAX_STEPS=1000
LATENT_SFT_MAX_STEPS=1000
LATENT_GRPO_MAX_STEPS=500
SFT_NUM_EPOCHS=64
GRPO_NUM_TRAIN_EPOCHS=50
# SFT/GRPO batch and LoRA defaults.
SFT_PER_DEVICE_BS=8
SFT_GRAD_ACCUM=2
BASELINE_PER_DEVICE_BS=16
BASELINE_GRAD_ACCUM=2
GRPO_PER_DEVICE_BS=4
GRPO_GRAD_ACCUM=2
LORA_R=32
LORA_ALPHA=64
LORA_DROPOUT=0.05
GRPO_BETA=0.0
# Optional resume adapters.
STAGE1_BASELINE_ADAPTER_DIR=
STAGE1_LATENT_SFT_ADAPTER_DIR=
STAGE1_LATENT_GRPO_ADAPTER_DIR=
STAGE2_BASELINE_WARM_ADAPTER_DIR=
STAGE2_LATENT_SFT_ADAPTER_DIR=
STAGE2_LATENT_GRPO_ADAPTER_DIR=
STAGE3_BASELINE_WARM_ADAPTER_DIR=
STAGE3_LATENT_SFT_ADAPTER_DIR=
# Optional debug knobs.
FIXED_SLOT_DEBUG_LIMIT=0
FIXED_SLOT_DECODE_DEBUG_LIMIT=0
LATENT_VOCAB_DEBUG_TOPK=1
ATTN_DENSITY_DEBUG_LIMIT=0
ATTN_DENSITY_THRESHOLD_MULT=1.0