| distributed_type: DEEPSPEED | |
| deepspeed_config: | |
| deepspeed_multinode_launcher: standard | |
| gradient_clipping: 0.0 | |
| zero_stage: 3 #2 | |
| offload_optimizer_device: cpu # Moves optimizer states to CPU RAM | |
| offload_param_device: cpu # Moves model parameters to CPU RAM | |
| zero3_init_flag: true # Initializes the model directly across GPUs to save CPU RAM | |
| zero3_save_16bit_model: true # Consolidates weights into a single file when saving checkpoints | |
| num_machines: 1 | |
| num_processes: 8 | |
| machine_rank: 0 | |