{ "use_lora": false, "lora_rank": 8, "lora_alpha": 16, "lora_dropout": 0.0, "from_checkpoint": "/root/autodl-tmp/checkpoint_20241130_005942_step_307000.pt", "save_checkpoint_to": "/root/autodl-tmp/checkpoint", "dataset_path": [ ["/root/autodl-tmp/sft_train.base64", "/root/autodl-tmp/sft_val.base64"] ], "tokenizer_path": "/root/Nano/tokenizer/tokenizer_16384.json", "random_seed": 39, "batch_size": 120, "gradient_accumulation_steps": 2, "grad_clip": 1.0, "dropout": 0.1, "learning_rate": 1e-6, "weight_decay": 1e-1, "beta1": 0.9, "beta2": 0.95, "decay_lr": false, "warmup_iters": 10000, "lr_decay_iters": 1e9, "min_lr": 1e-8, "eval_interval": 500, "log_interval": 10, "eval_iters": 5, "backend": "nccl", "device": "cuda", "sdp_kernel": "flash", "dtype": "bfloat16", "use_amp": true }