| compute_environment: LOCAL_MACHINE | |
| distributed_type: MULTI_GPU | |
| downcast_bf16: 'no' | |
| gpu_ids: all | |
| machine_rank: 0 | |
| main_process_ip: 'xx.xx.xx.xx' | |
| main_process_port: 36001 | |
| main_training_function: main | |
| mixed_precision: 'bf16' | |
| num_machines: 2 | |
| num_processes: 16 | |
| rdzv_backend: static | |
| same_network: true | |
| tpu_use_cluster: false | |
| tpu_use_sudo: false | |
| use_cpu: false | |
| # Optional DeepSpeed ZeRO-0 (no sharding). Prefer MULTI_GPU DDP for single-node training. | |
| deepspeed_config: | |
| zero_stage: 0 | |
| gradient_accumulation_steps: 1 | |
| zero3_init_flag: false |