compute_environment: LOCAL_MACHINE deepspeed_config: deepspeed_multinode_launcher: standard gradient_accumulation_steps: 16 offload_optimizer_device: none offload_param_device: none zero3_init_flag: false zero_stage: 3 distributed_type: DEEPSPEED main_process_ip: 'xxx.xxx.xxx.xxx' main_process_port: 29500 main_training_function: main mixed_precision: bf16 num_machines: 2 num_processes: 8 # world size rdzv_backend: static use_cpu: false