checkpoint_dir: /mnt/home/xiezhifei/projects/zh/checkpoint/stage1_paskale2e/qwen-2.5-3B out_dir: /data/zihang/paskal/checkpoint/train___stage_c_benchmark_v2 devices: 8 num_nodes: 1 resume: false train: save_interval: 50 log_interval: 3 global_batch_size: 64 micro_batch_size: 2 lr_warmup_steps: 100 epochs: 2 max_seq_length: 4096 min_lr: 6.0e-05 eval: interval: 25 max_new_tokens: 2000 max_iters: 500 initial_validation: true final_validation: true evaluate_example: first optimizer: AdamW logger_name: tensorboard seed: 1337