|
|
""" |
|
|
:mod:`torch.distributed.optim` exposes DistributedOptimizer, which takes a list |
|
|
of remote parameters (:class:`~torch.distributed.rpc.RRef`) and runs the |
|
|
optimizer locally on the workers where the parameters live. The distributed |
|
|
optimizer can use any of the local optimizer :ref:`optimizer-algorithms` to |
|
|
apply the gradients on each worker. |
|
|
""" |
|
|
import torch |
|
|
from torch import optim |
|
|
|
|
|
from .functional_adagrad import _FunctionalAdagrad |
|
|
from .functional_adam import _FunctionalAdam |
|
|
from .functional_adamw import _FunctionalAdamW |
|
|
from .functional_sgd import _FunctionalSGD |
|
|
from .functional_adadelta import _FunctionalAdadelta |
|
|
from .functional_rmsprop import _FunctionalRMSprop |
|
|
from .functional_rprop import _FunctionalRprop |
|
|
from .functional_adamax import _FunctionalAdamax |
|
|
from .utils import as_functional_optim |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if hasattr(torch._C, '_rpc_init'): |
|
|
from .optimizer import DistributedOptimizer |
|
|
|
|
|
from .post_localSGD_optimizer import PostLocalSGDOptimizer |
|
|
from .zero_redundancy_optimizer import ZeroRedundancyOptimizer |
|
|
|