mindeyev2old / src /deepspeed_config_stage3.json
ckadirt's picture
Upload folder using huggingface_hub
626cbe5 verified
{
"bf16": {
"enabled": false
},
"fp16": {
"enabled": true
},
"zero_optimization": {
"stage": 3,
"contiguous_gradients": true,
"stage3_gather_16bit_weights_on_model_save": true,
"stage3_max_live_parameters": 1e9,
"stage3_max_reuse_distance": 1e9,
"stage3_prefetch_bucket_size": 1e7,
"stage3_param_persistence_threshold": 1e5,
"reduce_bucket_size": 1e7,
"sub_group_size": 1e9,
"offload_optimizer": {
"device": "none",
"nvme_path": "/scratch",
"pin_memory": true
},
"offload_param": {
"device": "none",
"nvme_path": "/scratch",
"buffer_size": 4e9,
"pin_memory": true
}
},
"aio": {
"block_size": 26214400,
"queue_depth": 32,
"thread_count": 1,
"single_submit": false,
"overlap_events": true
},
"gradient_accumulation_steps": 1,
"gradient_clipping": 1.0,
"steps_per_print": 20000,
"train_batch_size": 128,
"train_micro_batch_size_per_gpu": 16,
"wall_clock_breakdown": false,
"zero_allow_untested_optimizer": true
}