model_name: OLMo-1B-hf out_dir: /home/aiops/zhuty/litgpt_out/pretrain/olmo-1b-finecode-5ksteps precision: bf16-mixed initial_checkpoint_dir: checkpoints/allenai/OLMo-1B-hf resume: auto data: class_path: litgpt.data.TextFiles init_args: train_data_path: /home/aiops/zhuty/cont_data/finecode/train val_data_path: /home/aiops/zhuty/cont_data/finecode/test seed: 42 num_workers: 8 add_eos: true train: save_interval: 1000 save_optimizer_state: true max_optimizer_state: 1 log_interval: 1 global_batch_size: 1024 micro_batch_size: 8 lr_warmup_fraction: 0.01 max_steps: 5000 max_seq_length: 1024 max_norm: 1.0 min_lr: 5.0e-06 eval: interval: 1000 max_iters: 100 initial_validation: true final_validation: true evaluate_example: first num_generation_examples: 1 calculate_exact_match: false log: project: mathcont optimizer: class_path: torch.optim.AdamW init_args: lr: 5.0e-05 weight_decay: 0.1 betas: - 0.9 - 0.95 devices: auto num_nodes: 1 tokenizer_dir: checkpoints/allenai/OLMo-1B-hf logger_name: wandb seed: 42 compiler: torch executors: - sdpa - torchcompile - torch strategy: fsdp diffusion: false mask_token_id: 811 sampling_eps: 0.001