| checkpoint_dir: checkpoints/meta-llama/Meta-Llama-3-8B-Instruct |
| out_dir: /apdcephfs/share_300000800/user/wenlinyao/research2/AlphaCode/litgpt_trained_model/llama-3-8b-instruct_4k_alphaflow_v10 |
| precision: bf16-true |
| devices: 8 |
| resume: false |
| data: |
| class_path: litgpt.data.JSON |
| init_args: |
| json_path: /apdcephfs/share_300000800/user/wenlinyao/research2/AlphaCode/litgpt_data/alphaflow_training_data_v10.json |
| mask_prompt: false |
| val_split_fraction: 0.005 |
| prompt_style: llama3 |
| ignore_index: -100 |
| seed: 42 |
| num_workers: 4 |
| train: |
| save_interval: 600 |
| log_interval: 1 |
| global_batch_size: 128 |
| micro_batch_size: 1 |
| lr_warmup_steps: 50 |
| epochs: 4 |
| max_seq_length: 4096 |
| min_lr: 2.0e-06 |
| eval: |
| interval: 200 |
| max_new_tokens: 2048 |
| max_iters: 100 |
| initial_validation: false |
| optimizer: |
| class_path: torch.optim.AdamW |
| init_args: |
| lr: 2.0e-05 |
| weight_decay: 0.02 |
| betas: |
| - 0.9 |
| - 0.95 |
| logger_name: csv |
| seed: 11 |
|
|