distributed_type: DEEPSPEED deepspeed_config: deepspeed_multinode_launcher: standard gradient_clipping: 0.0 zero_stage: 3 #2 offload_optimizer_device: cpu # Moves optimizer states to CPU RAM offload_param_device: cpu # Moves model parameters to CPU RAM zero3_init_flag: true # Initializes the model directly across GPUs to save CPU RAM zero3_save_16bit_model: true # Consolidates weights into a single file when saving checkpoints num_machines: 1 num_processes: 8 machine_rank: 0