|
|
import os |
|
|
import torch |
|
|
import torch.distributed as dist |
|
|
import torch.multiprocessing as mp |
|
|
|
|
|
def setup(rank, world_size): |
|
|
os.environ['MASTER_ADDR'] = 'localhost' |
|
|
os.environ['MASTER_PORT'] = '12355' |
|
|
dist.init_process_group("nccl", rank=rank, world_size=world_size) |
|
|
|
|
|
def cleanup(): |
|
|
dist.destroy_process_group() |
|
|
|
|
|
class MyDiffusionModel(torch.nn.Module): |
|
|
def __init__(self): |
|
|
super().__init__() |
|
|
self.conv1 = torch.nn.Conv2d(3, 16, 3, 1) |
|
|
|
|
|
self.fc1 = torch.nn.Linear(32 * 6 * 6, 128) |
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
x = torch.nn.functional.relu(self.conv1(x)) |
|
|
x = torch.nn.functional.max_pool2d(x, 2) |
|
|
|
|
|
|
|
|
x = torch.flatten(x, 1) |
|
|
x = torch.nn.functional.relu(self.fc1(x)) |
|
|
x = self.fc2(x) |
|
|
return x |
|
|
|
|
|
def main(rank, world_size): |
|
|
setup(rank, world_size) |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
num_gpus = torch.cuda.device_count() |
|
|
print(f"Rank {rank}, Number of GPUs available: {num_gpus}") |
|
|
for i in range(num_gpus): |
|
|
print(f"Rank {rank}, GPU {i}: {torch.cuda.get_device_name(i)}") |
|
|
else: |
|
|
print(f"Rank {rank}, No GPUs available") |
|
|
|
|
|
cleanup() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
world_size = 1 |
|
|
mp.spawn(main, args=(world_size,), nprocs=torch.cuda.device_count(), join=True) |
|
|
|