{ "zero_optimization": { "stage": 2, "allgather_partitions": true, "reduce_scatter": true, "contiguous_gradients": true }, "bf16": { "enabled": true }, "gradient_clipping": 1.0, "train_batch_size": 1, "gradient_accumulation_steps": 4 }