mistral-chatpro-dataset / ds_config.json
Doug240's picture
Add DeepSpeed config
0ca8699
raw
history blame contribute delete
363 Bytes
{
"train_batch_size": 8,
"train_micro_batch_size_per_gpu": 1,
"gradient_accumulation_steps": 4,
"gradient_clipping": 1.0,
"bf16": {
"enabled": true
},
"zero_optimization": {
"stage": 2,
"offload_optimizer": {
"device": "none"
},
"contiguous_gradients": true
},
"steps_per_print": 100,
"wall_clock_breakdown": false
}