msj19's picture
Add files using upload-large-folder tool
c39435c verified
pdsh -w 51006,51007,51008,51009 '
if [[ $(hostname) == "node0" ]]; then
export RANK=0
else
export RANK=1
fi
export LOCAL_RANK=0
export MASTER_ADDR=10.0.0.1
export MASTER_PORT=29500
export WORLD_SIZE=2
export NCCL_DEBUG=INFO
export NCCL_IB_DISABLE=1
python - << EOF
import torch.distributed as dist
dist.init_process_group("nccl")
print("OK", dist.get_rank(), dist.get_world_size(), flush=True)
EOF
'
'