pdsh -w 51006,51007,51008,51009 ' if [[ $(hostname) == "node0" ]]; then export RANK=0 else export RANK=1 fi export LOCAL_RANK=0 export MASTER_ADDR=10.0.0.1 export MASTER_PORT=29500 export WORLD_SIZE=2 export NCCL_DEBUG=INFO export NCCL_IB_DISABLE=1 python - << EOF import torch.distributed as dist dist.init_process_group("nccl") print("OK", dist.get_rank(), dist.get_world_size(), flush=True) EOF ' '