Bailan-Alex's picture
Upload folder using huggingface_hub
4f2b2f4 verified
# Training mode configuration
training_mode: 'llada' # 'llada' or 'dream'
# Model and data path configuration
paths:
model: 'GSAI-ML/LLaDA-8B-Instruct'
experiment: 'ckpt_llada_instruct'
data:
bs: 'Lansechen/bs17k_collection_filtered_hard_maxlength600'
bs_easy: 'Lansechen/bs17k_collection_filtered_easy_maxlength600'
denoiser:
encoder:
name: 'dream'
mask_id: 151666
decoder:
wiinit: true
name: 'eagle_rope'
num_blocks: 1
seq_len: &seq_len 1024
input_dim: 3584
hidden_dim: &dim 3584
vocab_size: 152064
block:
seq_len: *seq_len
hidden_dim: *dim
num_heads: 32
train:
# Will use paths.experiment path
decoder_resume_path:
head_resume_path:
skipped_keys:
global_step:
exp_name: &exp_name 'llada_ddt_maskteacher'
wandb_proj: *exp_name
output_dir: 'ddt_test'
logging_dir: 'logs'
mixed_precision: 'fp16'
gradient_accumulation_steps: 5
report_to: 'wandb'
block_size: 16
lr: 1e-5
num_iters: 50000
eval_every: 100000
save_every: 1000
enable_shift: true
share_steps: 2
self_align: true
feature_align: false
self_step: true
data:
name: 'bs17k' #['numinamath', 'bs17k']
batch_size: 1
max_length: *seq_len