| { | |
| "use_lora": false, | |
| "lora_rank": 8, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.0, | |
| "from_checkpoint": "/root/autodl-tmp/checkpoint_20241130_005942_step_307000.pt", | |
| "save_checkpoint_to": "/root/autodl-tmp/checkpoint", | |
| "dataset_path": [ | |
| ["/root/autodl-tmp/sft_train.base64", "/root/autodl-tmp/sft_val.base64"] | |
| ], | |
| "tokenizer_path": "/root/Nano/tokenizer/tokenizer_16384.json", | |
| "random_seed": 39, | |
| "batch_size": 120, | |
| "gradient_accumulation_steps": 2, | |
| "grad_clip": 1.0, | |
| "dropout": 0.1, | |
| "learning_rate": 1e-6, | |
| "weight_decay": 1e-1, | |
| "beta1": 0.9, | |
| "beta2": 0.95, | |
| "decay_lr": false, | |
| "warmup_iters": 10000, | |
| "lr_decay_iters": 1e9, | |
| "min_lr": 1e-8, | |
| "eval_interval": 500, | |
| "log_interval": 10, | |
| "eval_iters": 5, | |
| "backend": "nccl", | |
| "device": "cuda", | |
| "sdp_kernel": "flash", | |
| "dtype": "bfloat16", | |
| "use_amp": true | |
| } | |