| { |
| "use_lora": false, |
| "lora_rank": 8, |
| "lora_alpha": 16, |
| "lora_dropout": 0.0, |
|
|
| "from_checkpoint": "/home/bd4sur/ai/Nano/checkpoint/checkpoint_20241122_232939_step_196000.pt", |
| "save_checkpoint_to": "/home/bd4sur/ai/Nano/checkpoint", |
| "dataset_path": [ |
| ["/home/bd4sur/ai/Nano/dataset_preprocessed/pt_train_0.base64", "/home/bd4sur/ai/Nano/dataset_preprocessed/pt_val_0.base64"] |
| ], |
| "tokenizer_path": "/home/bd4sur/ai/Nano/tokenizer/tokenizer_16384.json", |
|
|
| "random_seed": 39, |
| "batch_size": 80, |
| "gradient_accumulation_steps": 1, |
| "grad_clip": 1.0, |
|
|
| "dropout": 0.0, |
|
|
| "learning_rate": 5e-4, |
| "weight_decay": 1e-1, |
| "beta1": 0.9, |
| "beta2": 0.95, |
|
|
| "decay_lr": true, |
| "warmup_iters": 500, |
| "lr_decay_iters": 1e9, |
| "min_lr": 6e-5, |
|
|
| "eval_interval": 500, |
| "log_interval": 10, |
| "eval_iters": 2, |
|
|
| "backend": "nccl", |
| "device": "cuda", |
| "sdp_kernel": "flash", |
| "dtype": "bfloat16", |
| "use_amp": true |
| } |
|
|