| # Copyright (c) Microsoft Corporation. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # DeepSpeed Team | |
| """ | |
| DeepSpeed Communication Backend. | |
| In the future, directly use NCCL/MPI/Gloo/etc without requiring torch.distributed. Simply wrap torch.distributed for now. | |
| # Custom DS Backends -- Direct C/Ops | |
| - NCCL -- [EXPERIMENTAL] | |
| - MPI -- [EXPERIMENTAL] | |
| - RCCL -- [EXPERIMENTAL] | |
| - GLOO -- [EXPERIMENTAL] | |
| # DS backend wrapper for torch.distributed [DEFAULT] | |
| - T-NCCL -- [DEFAULT] | |
| - T-GLOO | |
| - T-MPI | |
| # Backend is the base class | |
| -- NcclBackend, MpiBackend, and TorchBackend are the main subclasses. TorchBackend is the only officially supported backend for now. | |
| """ | |
| class Backend(object): | |
| def __init__(self, name='backend', rank=0, size=1): | |
| self.name = name | |
| # The world size and rank of the world process group | |
| self.world_group = None | |
| self.world_size = size | |
| self.world_rank = rank | |
| # Single process group (pg) implementation for now but keep a list for future | |
| self.process_groups = [] | |
| self.initialized = False | |
| def is_initialized(self): | |
| return self.initialized | |
| def new_group(self): | |
| # create a new pg and add it to pg list | |
| pass | |
| def init_process_group(self): | |
| # subclasses will initialize them fully | |
| # - initialize a default world process group and add it to pg list | |
| self.initialized = True | |