| { | |
| "train_batch_size": 16, | |
| "train_micro_batch_size_per_gpu": 16, | |
| "steps_per_print": 100, | |
| "optimizer": { | |
| "type": "Adam", | |
| "adam_w_mode": true, | |
| "params": { | |
| "lr": 1e-05, | |
| "weight_decay": 0.01, | |
| "bias_correction": true, | |
| "betas": [ | |
| 0.9, | |
| 0.98 | |
| ], | |
| "eps": 1e-08 | |
| } | |
| }, | |
| "zero_optimization": { | |
| "stage": 1, | |
| "reduce_bucket_size": 500000000.0 | |
| }, | |
| "bf16": { | |
| "enabled": true | |
| }, | |
| "gradient_clipping": 0.7 | |
| } |