model_name: opencoder-1.5b out_dir: /home/aiops/zhuty/litgpt_out/pretrain/opencoder-1.5b-pystack80-opcanneal20-50ksteps precision: bf16-mixed initial_checkpoint_dir: checkpoints/tyzhu/opencoder-1.5b resume: auto data: class_path: litgpt.data.WeightedTextFiles init_args: data_sources: /home/aiops/zhuty/cont_data//pystack/train,0.8;/home/aiops/zhuty/cont_data//opcanneal/train,0.2 validation_corpora: pystack:/home/aiops/zhuty/cont_data//pystack/test;opcanneal:/home/aiops/zhuty/cont_data//opcanneal/test seed: 42 num_workers: 2 add_eos: true train: save_interval: 2500 save_optimizer_state: true max_optimizer_state: 1 log_interval: 1 global_batch_size: 1024 micro_batch_size: 8 lr_warmup_fraction: 0.01 max_steps: 50000 max_seq_length: 1024 max_norm: 1.0 min_lr: 5.0e-06 eval: interval: 1000 max_iters: 100 initial_validation: true final_validation: true evaluate_example: first num_generation_examples: 1 calculate_exact_match: false log: project: mathcont optimizer: class_path: torch.optim.AdamW init_args: lr: 5.0e-05 weight_decay: 0.1 betas: - 0.9 - 0.95 devices: auto num_nodes: 1 tokenizer_dir: checkpoints/tyzhu/opencoder-1.5b logger_name: wandb seed: 42 compiler: torch executors: - sdpa - torchcompile - torch strategy: fsdp diffusion: false mask_token_id: 811 sampling_eps: 0.001