cranky-coder08 commited on Sep 10

Commit

f4cade0

verified ·

1 Parent(s): ad5f26a

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +6 -0
phivenv/Lib/site-packages/numpy.libs/libscipy_openblas64_-caad452230ae4ddb57899b8b3a33c55c.dll +3 -0
phivenv/Lib/site-packages/pip/_vendor/distlib/t64-arm.exe +3 -0
phivenv/Lib/site-packages/pip/_vendor/distlib/t64.exe +3 -0
phivenv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe +3 -0
phivenv/Lib/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-39.pyc +3 -0
phivenv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-39.pyc +3 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/_checkpointable.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/_composable_state.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/_functional_collectives.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/_functional_collectives_impl.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/_serialization.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/_state_dict_utils.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/argparse_util.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/c10d_logger.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/collective_utils.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/constants.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/device_mesh.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/launch.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/logging_handlers.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/remote_device.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/rendezvous.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/run.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/__pycache__/utils.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_composable/__init__.py +3 -0
phivenv/Lib/site-packages/torch/distributed/_composable/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_composable/__pycache__/checkpoint_activation.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_composable/__pycache__/contract.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_composable/__pycache__/replicate.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_composable/checkpoint_activation.py +132 -0
phivenv/Lib/site-packages/torch/distributed/_composable/contract.py +248 -0
phivenv/Lib/site-packages/torch/distributed/_composable/fsdp/__init__.py +3 -0
phivenv/Lib/site-packages/torch/distributed/_composable/fsdp/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_composable/fsdp/__pycache__/fully_shard.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_composable/fsdp/fully_shard.py +8 -0
phivenv/Lib/site-packages/torch/distributed/_composable/replicate.py +256 -0
phivenv/Lib/site-packages/torch/distributed/_shard/__init__.py +1 -0
phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/_utils.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/api.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/common_op_utils.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/metadata.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/op_registry_utils.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/sharder.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_shard/_utils.py +32 -0
phivenv/Lib/site-packages/torch/distributed/_shard/api.py +306 -0
phivenv/Lib/site-packages/torch/distributed/_shard/checkpoint/__init__.py +19 -0
phivenv/Lib/site-packages/torch/distributed/_shard/checkpoint/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/distributed/_shard/common_op_utils.py +65 -0

.gitattributes CHANGED Viewed

@@ -48,3 +48,9 @@ phivenv/Lib/site-packages/numpy/_core/__pycache__/fromnumeric.cpython-39.pyc fil
 phivenv/Lib/site-packages/numpy/_core/tests/__pycache__/test_umath.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/numpy/_core/__pycache__/_add_newdocs.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/numpy.libs/msvcp140-23ebcc0b37c8e3d074511f362feac48b.dll filter=lfs diff=lfs merge=lfs -text

 phivenv/Lib/site-packages/numpy/_core/tests/__pycache__/test_umath.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/numpy/_core/__pycache__/_add_newdocs.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/numpy.libs/msvcp140-23ebcc0b37c8e3d074511f362feac48b.dll filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pip/_vendor/distlib/t64-arm.exe filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/numpy.libs/libscipy_openblas64_-caad452230ae4ddb57899b8b3a33c55c.dll filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pip/_vendor/distlib/t64.exe filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text

phivenv/Lib/site-packages/numpy.libs/libscipy_openblas64_-caad452230ae4ddb57899b8b3a33c55c.dll ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44629a7d27806ea076daeae8e829b0cfbdec9e25099561a19af8e5910bd635c5
+size 32816640

phivenv/Lib/site-packages/pip/_vendor/distlib/t64-arm.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1618387a688f162408e7811350a72269076d52bf6d0f09860548d5b57d677ac
+size 180736

phivenv/Lib/site-packages/pip/_vendor/distlib/t64.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a00a877acefcad45953343ad56a22152f7aaba5fcf2a10215d84169d47fbcd1d
+size 105984

phivenv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43f1ddcd5bbdcf161d6816b79b4889e7f75d2ce12ab4f7bcc77d16003a17cdaf
+size 166400

phivenv/Lib/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-39.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8acb4dd7cd594effc85e8c2b9ac052d6f4fe88744cd4749a8e8b8b93ba88246
+size 151716

phivenv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-39.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a6e2c125af98ae3013115aad3c6156dd30340dd0c77863105db036c061ddc8e
+size 176641

phivenv/Lib/site-packages/torch/distributed/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (4.49 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/_checkpointable.cpython-39.pyc ADDED Viewed

Binary file (1.78 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/_composable_state.cpython-39.pyc ADDED Viewed

Binary file (1.49 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/_functional_collectives.cpython-39.pyc ADDED Viewed

Binary file (32.8 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/_functional_collectives_impl.cpython-39.pyc ADDED Viewed

Binary file (2.75 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/_serialization.cpython-39.pyc ADDED Viewed

Binary file (4.76 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/_state_dict_utils.cpython-39.pyc ADDED Viewed

Binary file (21.2 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/argparse_util.cpython-39.pyc ADDED Viewed

Binary file (3.95 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/c10d_logger.cpython-39.pyc ADDED Viewed

Binary file (3.12 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/collective_utils.cpython-39.pyc ADDED Viewed

Binary file (5.46 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/constants.cpython-39.pyc ADDED Viewed

Binary file (532 Bytes). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/device_mesh.cpython-39.pyc ADDED Viewed

Binary file (31.7 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/launch.cpython-39.pyc ADDED Viewed

Binary file (7.81 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/logging_handlers.cpython-39.pyc ADDED Viewed

Binary file (341 Bytes). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/remote_device.cpython-39.pyc ADDED Viewed

Binary file (3.8 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/rendezvous.cpython-39.pyc ADDED Viewed

Binary file (8.85 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/run.cpython-39.pyc ADDED Viewed

Binary file (27.2 kB). View file

phivenv/Lib/site-packages/torch/distributed/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (12.1 kB). View file

phivenv/Lib/site-packages/torch/distributed/_composable/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .checkpoint_activation import checkpoint
+from .contract import _get_registry, contract
+from .replicate import replicate

phivenv/Lib/site-packages/torch/distributed/_composable/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (316 Bytes). View file

phivenv/Lib/site-packages/torch/distributed/_composable/__pycache__/checkpoint_activation.cpython-39.pyc ADDED Viewed

Binary file (4.54 kB). View file

phivenv/Lib/site-packages/torch/distributed/_composable/__pycache__/contract.cpython-39.pyc ADDED Viewed

Binary file (6.96 kB). View file

phivenv/Lib/site-packages/torch/distributed/_composable/__pycache__/replicate.cpython-39.pyc ADDED Viewed

Binary file (7.13 kB). View file

phivenv/Lib/site-packages/torch/distributed/_composable/checkpoint_activation.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# mypy: allow-untyped-defs
+from collections.abc import Generator
+from contextlib import AbstractContextManager, contextmanager, nullcontext
+from typing import Any, Optional
+import torch
+import torch.nn as nn
+from torch.utils.checkpoint import (
+    _checkpoint_without_reentrant_generator,
+    _DEFAULT_DETERMINISM_MODE,
+)
+from .contract import _State, contract
+@contextmanager
+def _no_hook(module: nn.Module, user_ctx: Optional[AbstractContextManager] = None):
+    r"""
+    Disable hooks installed by checkpoint to avoid unintentional recursion
+    during backward recomputation.
+    """
+    with user_ctx if user_ctx else nullcontext():
+        orig_enable_hook = checkpoint.state(module).enable_hook
+        checkpoint.state(module).enable_hook = False
+        try:
+            yield
+        finally:
+            checkpoint.state(module).enable_hook = orig_enable_hook
+class _CheckpointState(_State):
+    enable_hook: bool = False
+    _ac_generator: Optional[Generator[None, None, None]]
+@contract(_CheckpointState)
+def checkpoint(module: nn.Module, **kwargs) -> nn.Module:
+    r"""
+    This is a composable activation checkpointing API. Unlike functional
+    activation checkpointing APIs, this one does not require changing model
+    source code. Unlike ``nn.Module`` wrapper activation checkpointing APIs,
+    this one does not modify model structure or fully-qualified names either.
+    Under the hood, it registers activation checkpointing logic as pre- and
+    post-forward hooks. Hence, this API can be easily applied to any model or
+    sub-modules in the model.
+    Args:
+        module (nn.Module): the target model or sub-module to apply activation
+            checkpointing.
+    Example::
+        >>> # xdoctest: +SKIP
+        >>> import torch.nn as nn
+        >>>
+        >>> class MyModel(nn.Module):
+        >>>     def __init__(self) -> None:
+        >>>         super().__init__()
+        >>>         self.l1 = nn.Linear(10, 10)
+        >>>         self.l2 = nn.Linear(10, 10)
+        >>>
+        >>>     def forward(self, x):
+        >>>         return self.l2(self.l1(x))
+        >>>
+        >>> model = MyModel()
+        >>> checkpoint(model.l1)  # apply activation checkpointing only to l1
+        >>> model(torch.zeros(2, 10)).sum().backward()
+    """
+    torch._C._log_api_usage_once("torch.distributed.checkpoint")
+    use_reentrant = kwargs.pop("use_reentrant", False)
+    if use_reentrant:
+        raise NotImplementedError(
+            "use_reentrant=True is not supported in composable checkpoint. "
+            "Please use torch.utils.checkpoint.checkpoint instead."
+        )
+    preserve_rng_state = kwargs.pop("preserve_rng_state", True)
+    user_context_fns = kwargs.pop("context_fn", None)
+    determinism_check = kwargs.pop("determinism_check", _DEFAULT_DETERMINISM_MODE)
+    debug = kwargs.pop("debug", False)
+    if kwargs:
+        raise ValueError(
+            "Unexpected keyword arguments: " + ",".join(arg for arg in kwargs)
+        )
+    def forward_pre_hook(
+        module: nn.Module, args: tuple[Any, ...], kwargs: dict[str, Any]
+    ) -> None:
+        if checkpoint.state(module).enable_hook:
+            def context_fns():
+                if user_context_fns is not None:
+                    ctx1, ctx2 = user_context_fns()
+                    return ctx1, _no_hook(module, ctx2)
+                else:
+                    return nullcontext(), _no_hook(module)
+            gen = _checkpoint_without_reentrant_generator(
+                module,
+                preserve_rng_state,
+                context_fns,
+                determinism_check,
+                debug,
+                *args,
+                **kwargs,
+            )
+            checkpoint.state(module)._ac_generator = gen
+            next(gen)
+    def forward_hook(module: nn.Module, inputs: tuple[Any, ...], output: Any) -> Any:
+        if checkpoint.state(module).enable_hook:
+            try:
+                gen = checkpoint.state(module)._ac_generator
+                assert gen is not None
+                next(gen)
+            except StopIteration:
+                pass
+            else:
+                raise RuntimeError(
+                    "Expected non-reentrant activation checkpoint generator to be exhausted, but it was not!"
+                )
+        #  Ensure that we no longer hold on to the generator. always_call=True helps ensure we
+        # clear this even in the case of exception in fwd pass.
+        checkpoint.state(module)._ac_generator = None
+    checkpoint.state(module).enable_hook = True
+    module.register_forward_pre_hook(forward_pre_hook, with_kwargs=True)
+    module.register_forward_hook(forward_hook, prepend=True, always_call=True)
+    return module

phivenv/Lib/site-packages/torch/distributed/_composable/contract.py ADDED Viewed

	@@ -0,0 +1,248 @@

+# mypy: allow-untyped-defs
+import uuid
+from collections import OrderedDict
+from functools import wraps
+from typing import Callable, Generic, Optional, Protocol
+from typing_extensions import Concatenate, ParamSpec, TypeVar
+import torch
+import torch.nn as nn
+from torch.distributed._composable_state import _State
+from torch.distributed.utils import _get_root_modules
+_T = TypeVar("_T", covariant=True)
+_P = ParamSpec("_P")
+def generate_state_key(string="__composable_api_state_key"):
+    return f"{string}_{str(uuid.uuid4())}"
+STATE_KEY = generate_state_key()
+REGISTRY_KEY = generate_state_key()
+# TODO: we can add additional info to RegistryItem to share across APIs. E.g.,
+# we can add args and kwargs here, and then we can detect whether fully_shard
+# is combined with reentrant activation checkpointing and error out with a clear
+# message.
+class RegistryItem:
+    pass
+_TState = TypeVar("_TState", bound="_State", covariant=True)
+_M = TypeVar("_M", nn.Module, list[nn.Module])
+class _ContractFn(Protocol, Generic[_P, _T, _TState]):
+    def __call__(self, *args: _P.args, **kwargs: _P.kwargs) -> _T: ...
+    def state(self, module: nn.Module) -> _TState: ...
+def contract(
+    state_cls: type[_TState] = _State,  # type: ignore[assignment]
+) -> Callable[
+    [Callable[Concatenate[_M, _P], _M]],
+    _ContractFn[Concatenate[_M, _P], _M, _TState],
+]:
+    r"""
+    Decorate a function as a composable distributed API, where the first
+    argument of the function must be an :class:`nn.Module` instance or sequence
+    of :class:`nn.Module` instances.
+    The decorator verifies that the decorated function does not modify
+    fully-qualified names (FQNs) for parameters, buffers, or modules. The
+    decorated function can return different module instances than the input
+    modules; the FQN invariant will be enforced following the input order.
+    When a function ``func`` is decorated by ``@contract()``, a
+    ``.state(module: nn.Module)`` method will be installed to the decorated
+    function. Then you can retrieve and modify the state on a module by calling
+    ``func.state(module)``.
+    Example::
+        >>> # xdoctest: +SKIP
+        >>> import torch.nn as nn
+        >>>
+        >>> class MyModel(nn.Module):
+        >>>     def __init__(self) -> None:
+        >>>         super().__init__()
+        >>>         self.l1 = nn.Linear(10, 10)
+        >>>         self.l2 = nn.Linear(10, 10)
+        >>>
+        >>>     def forward(self, x):
+        >>>         return self.l2(self.l1(x))
+        >>>
+        >>> @contract()
+        >>> def my_feature(module: nn.Module) -> nn.Module:
+        >>>     my_feature.state(module).some_state = "any value"
+        >>>     return module
+        >>>
+        >>> model = MyModel()
+        >>> my_feature(model.l1)
+        >>> assert my_feature.state(model.l1).some_state == "any value"
+        >>> my_feature(model.l2)
+        >>> model(torch.randn(2, 10)).sum().backward()
+    """
+    # wraps will make functions decorated with contract() pickleable - needed for integration with torch.package
+    @wraps(state_cls)  # type: ignore[arg-type]
+    def inner(
+        func: Callable[Concatenate[_M, _P], _M],
+    ) -> _ContractFn[Concatenate[_M, _P], _M, _TState]:
+        @wraps(func)
+        def wrapper(
+            module: _M,
+            *args: _P.args,
+            **kwargs: _P.kwargs,
+        ) -> _M:
+            inp_module = module
+            modules: list[nn.Module]
+            if isinstance(module, nn.Module):
+                modules = [module]
+            else:
+                # If the user passes a sequence of modules, then we assume that
+                # we only need to insert the state object on the root modules
+                # (i.e. those without a parent) among the passed-in modules.
+                modules = _get_root_modules(list(module))
+            state = state_cls()  # shared across all modules
+            registry_item = RegistryItem()  # shared across all modules
+            # `func` is allowed to return different module instances than the
+            # input modules as long as FQNs are preserved following the input
+            # module order
+            all_orig_named_params: list[dict[str, nn.Parameter]] = []
+            all_orig_named_buffers: list[dict[str, torch.Tensor]] = []
+            all_orig_named_modules: list[dict[str, nn.Module]] = []
+            for module in modules:
+                default_all_state: dict[Callable, _State] = OrderedDict()
+                default_registry: dict[str, RegistryItem] = OrderedDict()
+                all_state: dict[Callable, _State] = module.__dict__.setdefault(  # type: ignore[call-overload]
+                    STATE_KEY, default_all_state
+                )
+                if not isinstance(all_state, dict):
+                    raise AssertionError(
+                        f"Distributed composable API states corrupted: {all_state}"
+                    )
+                registry: dict[str, RegistryItem] = module.__dict__.setdefault(  # type: ignore[call-overload]
+                    REGISTRY_KEY, default_registry
+                )
+                if not isinstance(registry, dict):
+                    raise AssertionError(
+                        f"Distributed composable API registry corrupted: {registry}"
+                    )
+                if func in all_state or func.__name__ in registry:
+                    raise AssertionError(
+                        "Each distinct composable distributed API can only be applied to a "
+                        f"module once. {func.__name__} has already been applied to the "
+                        f"following module:\n{module}"
+                    )
+                all_state.setdefault(func, state)
+                registry.setdefault(func.__name__, registry_item)
+                all_orig_named_params.append(OrderedDict(module.named_parameters()))
+                all_orig_named_buffers.append(OrderedDict(module.named_buffers()))
+                all_orig_named_modules.append(OrderedDict(module.named_modules()))
+            updated = func(inp_module, *args, **kwargs)
+            if updated is None:
+                updated = inp_module  # type: ignore[assignment]
+            updated_modules: list[nn.Module]
+            if isinstance(updated, nn.Module):
+                updated_modules = [updated]
+            else:
+                updated_modules = _get_root_modules(list(inp_module))  # type: ignore[arg-type, call-overload]
+            all_new_named_params: list[dict[str, nn.Parameter]] = []
+            all_new_named_buffers: list[dict[str, torch.Tensor]] = []
+            all_new_named_modules: list[dict[str, nn.Module]] = []
+            for module in updated_modules:
+                all_new_named_params.append(OrderedDict(module.named_parameters()))
+                all_new_named_buffers.append(OrderedDict(module.named_buffers()))
+                all_new_named_modules.append(OrderedDict(module.named_modules()))
+            num_orig_modules = len(all_orig_named_modules)
+            num_new_modules = len(all_new_named_modules)
+            if num_orig_modules != num_new_modules:
+                raise AssertionError(
+                    f"{func.__name__} should return the same number of modules as input modules"
+                    f"Inputs: {num_orig_modules} modules\n"
+                    f"Outputs: {num_new_modules} modules"
+                )
+            def check_fqn(orig_fqns: list[str], new_fqns: list[str], check_key: str):
+                if orig_fqns == new_fqns:
+                    return
+                orig_fqn_set, new_fqn_set = set(orig_fqns), set(new_fqns)
+                orig_only = orig_fqn_set - new_fqn_set
+                new_only = new_fqn_set - orig_fqn_set
+                if len(orig_only) or len(new_only):
+                    raise RuntimeError(
+                        f"{check_key}"
+                        "Composable distributed API implementations cannot modify FQNs.\n"
+                        f"FQNs only in original: {orig_only}\n"
+                        f"FQNs only in new: {new_only}"
+                    )
+                else:
+                    raise RuntimeError(
+                        f"{check_key}"
+                        "Composable distributed API implementations cannot modify "
+                        "the order of FQNs.\n"
+                        f"Original FQNs: {orig_only}\n"
+                        f"New FQNs: {new_only}"
+                    )
+            for orig_named_params, new_named_params in zip(
+                all_orig_named_params, all_new_named_params
+            ):
+                check_fqn(
+                    list(orig_named_params.keys()),
+                    list(new_named_params.keys()),
+                    "Checking parameters: ",
+                )
+            for orig_named_buffers, new_named_buffers in zip(
+                all_orig_named_buffers, all_new_named_buffers
+            ):
+                check_fqn(
+                    list(orig_named_buffers.keys()),
+                    list(new_named_buffers.keys()),
+                    "Checking buffers: ",
+                )
+            for orig_named_modules, new_named_modules in zip(
+                all_orig_named_modules, all_new_named_modules
+            ):
+                check_fqn(
+                    list(orig_named_modules.keys()),
+                    list(new_named_modules.keys()),
+                    "Checking modules: ",
+                )
+            # TODO: verify that installed distributed paradigms are compatible with
+            # each other.
+            return updated
+        def get_state(module: nn.Module) -> _State:
+            return module.__dict__.setdefault(  # type: ignore[call-overload]
+                STATE_KEY,
+                {},  # TODO(@yhcharles): this is a temporary fix, need a better way
+            ).get(func)  # type: ignore[call-overload]
+        wrapper.state = get_state  # type: ignore[attr-defined]
+        return wrapper  # type: ignore[return-value]
+    return inner  # type: ignore[return-value]
+def _get_registry(module: nn.Module) -> Optional[dict[str, RegistryItem]]:
+    r"""
+    Get an ``OrderedDict`` of composable APIs that have been applied to the
+    ``module``, indexed by the API name. If no API has been applied, then this
+    returns ``None``.
+    """
+    return getattr(module, REGISTRY_KEY, None)

phivenv/Lib/site-packages/torch/distributed/_composable/fsdp/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from torch.distributed.fsdp import CPUOffloadPolicy, MixedPrecisionPolicy, OffloadPolicy
2	+
3	+ from .fully_shard import FSDPModule, fully_shard, register_fsdp_forward_method

phivenv/Lib/site-packages/torch/distributed/_composable/fsdp/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (395 Bytes). View file

phivenv/Lib/site-packages/torch/distributed/_composable/fsdp/__pycache__/fully_shard.cpython-39.pyc ADDED Viewed

Binary file (323 Bytes). View file

phivenv/Lib/site-packages/torch/distributed/_composable/fsdp/fully_shard.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# TODO: For backward compatibility, we are importing the public objects
+# originally from this file.
+from torch.distributed.fsdp import (  # noqa: F401
+    FSDPModule,
+    fully_shard,
+    register_fsdp_forward_method,
+    UnshardHandle,
+)

phivenv/Lib/site-packages/torch/distributed/_composable/replicate.py ADDED Viewed

	@@ -0,0 +1,256 @@

+# mypy: allow-untyped-defs
+import weakref
+from collections.abc import Iterable
+from typing import Any, NoReturn, Optional
+import torch
+import torch.nn as nn
+from torch.distributed._composable_state import _State
+from torch.nn.parallel import DistributedDataParallel
+from .contract import _get_registry, contract
+_ROOT_MODULE_PREFIX = ""
+class _ReplicateState(_State):
+    _ddp_weakref: weakref.ref
+    def __init__(self) -> None:
+        super().__init__()
+        self.module: nn.Module = nn.ParameterList()
+        self.has_initialized: bool = False
+        self._param_list: nn.ParameterList = nn.ParameterList()
+        # TODO(@fegin): this variable is originally create for testing, we
+        # should remove this if possible.
+        self._orig_module = self.module
+        self._param_names: list[str] = []
+        self._no_sync: bool = False
+        self._init_args: Optional[tuple[Any, ...]] = None
+        self._init_kwargs: dict[str, Any] = {}
+        self._comm_hook_args: list[Any] = []
+    def _collect_params(
+        self,
+        module: nn.Module,
+        ignored_modules: set[nn.Module],
+        ignored_params: set[nn.Parameter],
+        prefix: str = _ROOT_MODULE_PREFIX,
+    ) -> None:
+        # skip if managed by fully_sharded API
+        if _is_fully_sharded(module):
+            return
+        # if a module is ignored, all descendants of the module are ignored.
+        if module in ignored_modules:
+            return
+        recurse_prefix = (
+            f"{prefix}." if prefix != _ROOT_MODULE_PREFIX else _ROOT_MODULE_PREFIX
+        )
+        for n, p in module.named_parameters(recurse=False):
+            if p not in ignored_params:
+                self._param_list.append(p)
+                self._param_names.append(f"{recurse_prefix}{n}")
+        for name, child_module in module.named_children():
+            self._collect_params(
+                child_module,
+                ignored_modules,
+                ignored_params,
+                prefix=f"{recurse_prefix}{name}",
+            )
+    def lazy_init(self) -> None:
+        @torch._disable_dynamo(recursive=True)
+        def _lazy_init():
+            assert self._init_args is not None
+            self.init(*self._init_args, **self._init_kwargs)
+            self.register_comm_hook()
+            self._init_args = ()
+            self._init_kwargs = {}
+        _lazy_init()
+    def init(
+        self,
+        module: nn.Module,
+        ignored_modules: set[nn.Module],
+        **kwargs,
+    ) -> None:
+        if self.has_initialized:
+            return
+        self.has_initialized = True
+        self.module = module
+        ignored_params = {p for m in ignored_modules for p in m.parameters()}
+        for submodule in module.modules():
+            if _is_fully_sharded(submodule):
+                ignored_params.update(submodule.parameters())
+        from torch.distributed.tensor.parallel.ddp import _localize_dtensor
+        _localize_dtensor(module, ignored_params=ignored_params)
+        self._collect_params(module, ignored_modules, ignored_params)
+        if "device_id" in kwargs:
+            # replicate() supports a small usability enhancement where
+            # user can pass in device_id as a Union[int, torch.device] even for
+            # CPU devices so users don't have to change code for CPU/GPU runs.
+            # We derive the right device_ids to feed into DDP to support this.
+            if kwargs["device_id"] is not None:
+                device_id = kwargs["device_id"]
+                # Convert to device_ids that DDP expects.
+                if isinstance(device_id, torch.device) and device_id.type == "cpu":
+                    # CPU modules receive device_ids None
+                    kwargs["device_ids"] = None
+                else:
+                    # GPU modules expect device_ids=[cuda_device]
+                    kwargs["device_ids"] = [device_id]
+            else:
+                kwargs["device_ids"] = None
+            kwargs.pop("device_id")
+        self._ddp = DistributedDataParallel(self._param_list, **kwargs)
+        # Weakref to the DDP instance is currently only used for testing.
+        replicate.state(self.module)._ddp_weakref = weakref.ref(self._ddp)
+    def register_comm_hook(self) -> None:
+        for comm_args, comm_kwargs in self._comm_hook_args:
+            self._ddp.register_comm_hook(*comm_args, **comm_kwargs)
+        self._comm_hook_args.clear()
+    def record_init_args(self, *args, **kwargs) -> None:
+        self._init_args = args
+        self._init_kwargs = kwargs
+    def forward_pre_hook(
+        self, module: nn.Module, args: tuple[Any, ...], kwargs: dict[str, Any]
+    ) -> Any:
+        if self._init_args or self._init_kwargs:
+            self.lazy_init()
+        self._ddp.require_backward_grad_sync = not self._no_sync
+        return self._ddp._pre_forward(*args, **kwargs)
+    def forward_post_hook(
+        self,
+        module: nn.Module,
+        input: tuple[torch.Tensor],
+        output: torch.Tensor,
+    ) -> torch.Tensor:
+        return self._ddp._post_forward(output)
+def unimplemented_deepcopy(*args: Any, **kwargs: Any) -> NoReturn:
+    raise AssertionError(
+        "DDP does not support deepcopy. Please use state dict for serialization."
+    )
+# Follow the same pattern as FSDP/fully_shard
+class DDP:
+    def __new__(cls, *args, **kwargs):
+        """
+        Override ``__new__`` to remove the DDP class and directly construct
+        the original class for cases like indexing into a container module.
+        """
+        # Use index 2 since 0 is the dynamically constructed `DDP<...>` class
+        # and index 1 is the `DDP` class itself
+        orig_cls = cls.__mro__[2]
+        return orig_cls.__new__(orig_cls, *args, **kwargs)
+    def set_requires_gradient_sync(self, requires_gradient_sync: bool) -> None:
+        """
+        Sets if the module should sync gradients. This can be used to implement
+        gradient accumulation without communication.
+        Args:
+            requires_gradient_sync (bool): Whether to reduce gradients for the
+                module's parameters.
+        """
+        replicate.state(self)._no_sync = not requires_gradient_sync  # type: ignore[arg-type]
+    def register_comm_hook(self, *args, **kwargs) -> None:
+        replicate.state(self)._comm_hook_args.append((args, kwargs))  # type: ignore[arg-type]
+@contract(state_cls=_ReplicateState)
+def replicate(
+    module: nn.Module,
+    ignored_modules: Optional[Iterable[torch.nn.Module]] = None,
+    **kwargs,
+) -> nn.Module:
+    r"""Replicates a module
+    Args:
+        module (torch.nn.Module): module to replicate
+    Example::
+        >>> # xdoctest: +REQUIRES(module:torch._C._distributed_c10d)
+        >>> module = nn.Linear(3, 3)
+        >>> replicate(module)
+    """
+    torch._C._log_api_usage_once("torch.distributed.replicate")
+    # TODO(fegin): using kwargs is not a good idea if we would like to make
+    # replicate a formal API to replace DDP.
+    if "device_id" in kwargs:
+        if not isinstance(kwargs["device_id"], (int, torch.device)):
+            raise RuntimeError(
+                "Expected device_id to be int or torch.device, "
+                f"but got {type(kwargs['device_id'])}"
+            )
+    if _is_fully_sharded(module):
+        raise RuntimeError(
+            "Cannot apply `replicate()` on a Module already managed by `fully_shard`"
+        )
+    if ignored_modules is None:
+        ignored_modules = {}
+    else:
+        ignored_modules = set(ignored_modules)
+    state = replicate.state(module)
+    module.register_forward_pre_hook(state.forward_pre_hook, with_kwargs=True)
+    device_mesh = kwargs.get("device_mesh", None)
+    if device_mesh is not None:
+        from torch.distributed.device_mesh import _mesh_resources
+        root_mesh = _mesh_resources.get_root_mesh(device_mesh)
+        # if a root mesh is not the same as device_mesh,
+        # meaning the device_mesh is sliced out from the root mesh.
+        if root_mesh != device_mesh:
+            # TODO: This is a temporary work around to enable DDP + TP.
+            # We should do the logic in DDP so that the 2D implementation is
+            # sound and the state_dict works out of the box.
+            #
+            # This won't conflict with what is done in DDP class as the module
+            # replicate is going to pass is NOT the original module.
+            from torch.distributed.tensor.parallel.ddp import (
+                _localize_dtensor,
+                _reconstruct_dtensor,
+            )
+            module.register_forward_pre_hook(_reconstruct_dtensor)
+            module.register_forward_hook(_localize_dtensor)
+    module.register_forward_hook(state.forward_post_hook)  # type: ignore[arg-type]
+    state.record_init_args(module, ignored_modules, **kwargs)
+    # Place DDP leftmost for highest priority in the method resolution order
+    cls = module.__class__
+    dct = {"__deepcopy__": unimplemented_deepcopy}
+    new_cls = type(f"DDP{cls.__name__}", (DDP, cls), dct)
+    module.__class__ = new_cls
+    return module
+def _is_fully_sharded(module: nn.Module) -> bool:
+    r"""Check if module is marked with fully_shard."""
+    registry = _get_registry(module)
+    if registry is None:
+        return False
+    return "fully_shard" in registry

phivenv/Lib/site-packages/torch/distributed/_shard/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .api import _shard_tensor, load_with_process_group, shard_module, shard_parameter

phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (292 Bytes). View file

phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/_utils.cpython-39.pyc ADDED Viewed

Binary file (1.04 kB). View file

phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/api.cpython-39.pyc ADDED Viewed

Binary file (9.82 kB). View file

phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/common_op_utils.cpython-39.pyc ADDED Viewed

Binary file (2.25 kB). View file

phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/metadata.cpython-39.pyc ADDED Viewed

Binary file (2.28 kB). View file

phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/op_registry_utils.cpython-39.pyc ADDED Viewed

Binary file (1.18 kB). View file

phivenv/Lib/site-packages/torch/distributed/_shard/__pycache__/sharder.cpython-39.pyc ADDED Viewed

Binary file (1.32 kB). View file

phivenv/Lib/site-packages/torch/distributed/_shard/_utils.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from collections.abc import Sequence
+import torch
+from torch.distributed._shard.metadata import ShardMetadata
+DEPRECATE_MSG = "Please use DTensor instead and we are deprecating ShardedTensor."
+def narrow_tensor_by_index(
+    tensor: torch.Tensor,
+    offsets: Sequence[int],
+    sizes: Sequence[int],
+) -> torch.Tensor:
+    """
+    Narrow the tensor according to ``offsets`` and ``sizes``.
+    """
+    narrowed_tensor = tensor
+    for idx, (offset, size) in enumerate(zip(offsets, sizes)):
+        if size < tensor.size(idx):
+            # Reshape to get shard for this rank and we don't want autograd
+            # recording here for the narrow op and 'local_shard' should be a
+            # leaf variable in the autograd graph.
+            narrowed_tensor = narrowed_tensor.narrow(idx, offset, size)
+    return narrowed_tensor
+def narrow_tensor(tensor: torch.Tensor, metadata: ShardMetadata) -> torch.Tensor:
+    """
+    Narrow the tensor according to the metadata
+    """
+    return narrow_tensor_by_index(tensor, metadata.shard_offsets, metadata.shard_sizes)

phivenv/Lib/site-packages/torch/distributed/_shard/api.py ADDED Viewed

	@@ -0,0 +1,306 @@

+# mypy: allow-untyped-defs
+from contextlib import contextmanager
+from typing import Optional
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from torch.distributed import distributed_c10d
+from torch.distributed._shard.sharded_tensor import ShardedTensor
+from .sharder import Sharder
+from .sharding_plan import ShardingPlan
+from .sharding_spec import ChunkShardingSpec, ShardingSpec
+def _shard_tensor(
+    tensor: torch.Tensor, sharding_spec: ShardingSpec, src_rank=0, process_group=None
+) -> ShardedTensor:
+    """
+    Given a :class:`torch.Tensor`, it shards that tensor according to the provided
+    ``sharding_spec``. ``src_rank`` denotes the source rank which would be
+    used as the ground truth of the data which would be scattered as shards
+    across the rest of the ranks.
+    Args:
+        tensor (:class:`torch.Tensor`): Tensor needs to be sharded.
+        sharding_spec (:class:`torch.distributed._shard.sharding_spec.ShardingSpec`): The specification
+            describing how to shard the Tensor.
+    Keyword args:
+        src_rank (int, optional): The source rank which is used as the ground truth of
+            the data for the parameter that would be sharded and scattered
+            across the rest of the ranks.
+            Default: 0.
+        process_group (ProcessGroup, optional): The process group to work on. If None,
+            the default process group will be used.
+    Returns:
+        A :class:`ShardedTensor` sharded from the given tensor.
+    .. warning::
+        Only :class:`torch.distributed._shard.sharding_spec.ChunkShardingSpec` is
+        currently supported as the ``sharding_spec``.
+    """
+    if not tensor.is_contiguous():
+        raise ValueError("input tensor is not a contiguous Tensor")
+    pg = (
+        process_group
+        if process_group is not None
+        else distributed_c10d._get_default_group()
+    )
+    world_size = dist.get_world_size(pg)
+    current_rank = dist.get_rank(pg)
+    # Validate src_rank and sharding_spec are same across all ranks.
+    gathered_list = [None] * world_size
+    dist.all_gather_object(gathered_list, (src_rank, sharding_spec), group=pg)
+    for idx, entry in enumerate(gathered_list):
+        if src_rank != entry[0]:  # type: ignore[index]
+            raise ValueError(
+                f"src_rank={src_rank} on rank: {current_rank} does not "  # type: ignore[index]
+                f"match with src_rank={entry[0]} on rank: {idx}"  # type: ignore[index]
+            )
+        if sharding_spec != entry[1]:  # type: ignore[index]
+            raise ValueError(
+                f"sharding_spec={sharding_spec} on rank: {current_rank} does not "  # type: ignore[index]
+                f"match with sharding_spec={entry[1]} on rank: {idx}"  # type: ignore[index]
+            )
+    st = sharding_spec.shard(tensor, src_rank=src_rank, process_group=pg)
+    return st
+def shard_parameter(
+    module: torch.nn.Module,
+    param_name: str,
+    sharding_spec: ShardingSpec,
+    src_rank=0,
+    process_group=None,
+):
+    """
+    Given a :class:`torch.nn.Module`, a ``param_name`` for a parameter in that
+    module, it shards that parameter according to the provided
+    ``sharding_spec``. ``src_rank`` denotes the source rank which would be
+    used as the ground truth of the data which would be scattered as shards
+    across the rest of the ranks.
+    This method replaces ``module.param_name`` with a
+    :class:`torch.distributed._sharded_tensor.ShardedTensor`
+    Args:
+        module (:class:`torch.nn.Module`): Module whose parameter needs to be sharded.
+        param_name (str): Name of the parameter of ``module`` that needs to be sharded.
+        sharding_spec (:class:`torch.distributed._shard.sharding_spec.ShardingSpec`): The specification
+            describing how to shard the Tensor.
+    Keyword args:
+        src_rank (int, optional): The source rank which is used as the ground truth of
+            the data for the parameter that would be sharded and scattered
+            across the rest of the ranks.
+            Default: 0.
+        process_group (ProcessGroup, optional): The process group to work on. If None,
+            the default process group will be used.
+    .. warning::
+        Only :class:`torch.distributed._shard.sharding_spec.ChunkShardingSpec` is
+        currently supported as the ``sharding_spec``.
+    """
+    # Perform some validation first.
+    if not hasattr(module, param_name):
+        raise AttributeError(f"{module._get_name()} has no attribute `{param_name}`")
+    tensor = getattr(module, param_name)
+    if not isinstance(tensor, torch.Tensor):
+        raise ValueError(
+            f"Expected {type(module).__name__}.{param_name} to be a Tensor, but found {type(tensor).__name__}"
+        )
+    if not tensor.is_contiguous():
+        raise ValueError(f"param: {param_name} is not a contiguous Tensor")
+    st = _shard_tensor(tensor, sharding_spec, src_rank, process_group)
+    # Replace param with ShardedTensor.
+    module.register_parameter(param_name, nn.Parameter(st))
+# Tracks the current process group in the load context manager.
+_CURRENT_PROCESS_GROUP: Optional[dist.ProcessGroup] = None
+@contextmanager
+def load_with_process_group(process_group):
+    """
+    Context manager to set the process group with which to load a ShardedTensor.
+    """
+    global _CURRENT_PROCESS_GROUP
+    if _CURRENT_PROCESS_GROUP is not None:
+        raise RuntimeError(
+            'ProcessGroup already set by previous "load_with_process_group" '
+            "context manager"
+        )
+    _CURRENT_PROCESS_GROUP = process_group
+    try:
+        yield process_group
+    finally:
+        _CURRENT_PROCESS_GROUP = None
+def _get_current_process_group():
+    """
+    Retrieves the current process group set by ``load_with_process_group``.
+    If not set, it just returns the default group.
+    """
+    global _CURRENT_PROCESS_GROUP
+    if _CURRENT_PROCESS_GROUP is None:
+        return distributed_c10d._get_default_group()
+    else:
+        return _CURRENT_PROCESS_GROUP
+def _reshard_output(
+    module: torch.nn.Module, resharding_spec: ShardingSpec
+) -> torch.nn.Module:
+    """
+    Hook a module with output resharding in the forward pass according
+    to the given ``resharding_spec``.
+    Args:
+        module (:class:`torch.nn.Module`): Module whose output needs to be resharded.
+        resharding_spec (:class:`torch.distributed._shard.sharding_spec.ShardingSpec`):
+            The specification describing how the output of the module will be resharded.
+    Returns:
+        A :class:`torch.nn.Module` object with reshard API hooked.
+    """
+    def hook_func(_module, _input, output):
+        if isinstance(output, ShardedTensor):
+            return output.reshard(resharding_spec)
+        return output
+    module.register_forward_hook(hook_func)
+    return module
+def _collect_local_shard(module: torch.nn.Module) -> torch.nn.Module:
+    """
+    Hook a module with local shards collection in the forward pass.
+    This API is typically used to convert a sharded representation back to data parallel
+    representation. In particular, it returns the local tensor for this Shard. If the
+    size along the sharding dimension for the local tensor is 1, this dimension is removed
+    from the final result. For example a [4, 16] ShardedTensor across 4 ranks is typically
+    a local Tensor of size [16] across each rank and not [1, 16] across each rank.
+    Args:
+        module (:class:`torch.nn.Module`): Module whose output is ShardedTensor and the
+            local tensor value needs to be returned.
+    Returns:
+        A :class:`torch.nn.Module` object with collection API hooked.
+    """
+    def hook_func(_module, _input, output):
+        if isinstance(output, ShardedTensor):
+            local_tensor = output.local_tensor()
+            # Squeeze the # of dimensions manually, only applicable to ChunkShardingSpec
+            sharding_spec = output._sharding_spec
+            if (
+                isinstance(sharding_spec, ChunkShardingSpec)
+                and local_tensor.size(sharding_spec.dim) == 1  # type: ignore[attr-defined, arg-type]
+            ):
+                local_tensor = local_tensor.squeeze(
+                    output._sharding_spec.dim  # type: ignore[attr-defined]
+                )
+            return local_tensor
+    module.register_forward_hook(hook_func)
+    return module
+def shard_module(module: nn.Module, plan: ShardingPlan, src_rank=0, process_group=None):
+    """
+    Shards a given module according to the provided sharding `plan`. This method
+    first shards all the parameters according to the given sharding `plan`. Then if
+    `output_plan` and `return_local_tensor` are specified in the sharding `plan`, it
+    will tag the output of modules according `output_plan`, convert the module's
+    output back to data parallel according to `return_local_tensor`.
+    Needs to be called on all ranks in an SPMD fashion.
+    Args:
+        module (:class:`torch.nn.Module`): The module to apply sharding to
+        plan (:class:`torch.distributed._shard.sharding_plan.ShardingPlan`):
+            The ShardingPlan which specified param name to ShardingSpec to apply to
+            each parameter.
+    Keyword args:
+         src_rank (int, optional): The source rank which is used as the ground truth of
+            the data for the module that would be sharded and scattered across the rest
+            of the ranks.
+            Default: 0.
+        process_group (ProcessGroup, optional): The process group to work on. If None,
+            the default process group will be used.
+    """
+    # record Sharder paths for sanity check on the plan to ensure items in the plan
+    # does not conflict with the submodule tree that the Sharder is working with
+    sharder_paths = []
+    for name, spec in plan.plan.items():
+        if isinstance(spec, Sharder):
+            sharder_paths.append(name)
+    # shard the parameter according to the ShardingPlan
+    for name, spec in plan.plan.items():
+        if isinstance(spec, ShardingSpec):
+            # if found a sharding spec, try to shard the parameter
+            module_path, _, param_name = name.rpartition(".")
+            for sharder_path in sharder_paths:
+                if module_path.startswith(sharder_path):
+                    raise RuntimeError(
+                        f"ShardingPlan is in-valid, trying to shard a parameter: {name},"
+                        f" but there's already a Sharder entry for module {sharder_path},"
+                        f" parameter sharding should not conflict with the submodule tree"
+                        f" that a Sharder is working with!"
+                    )
+            mod = module.get_submodule(module_path)
+            shard_parameter(
+                mod, param_name, spec, src_rank=src_rank, process_group=process_group
+            )
+        elif isinstance(spec, Sharder):
+            parent_mod_path, _, _mod_name = name.rpartition(".")
+            if name == "":
+                raise KeyError("Module path must not be empty for custom sharder!")
+            mod = module.get_submodule(name)
+            parent_mod = module.get_submodule(parent_mod_path)
+            sharded_mod = spec.shard(mod)
+            # swap this submodule with the sharded module
+            parent_mod.mod_name = sharded_mod
+        else:
+            raise TypeError(
+                f"Only `ShardingSpec` and `Sharder` are supported to shard '{name}'"
+            )
+    # reshard output if there's an entry in `reshard_output` for this module
+    if plan.output_plan is not None:
+        for module_path, output_spec in plan.output_plan.items():
+            if isinstance(output_spec, ShardingSpec):
+                mod = module.get_submodule(module_path)
+                _reshard_output(mod, output_spec)
+            else:
+                raise TypeError(
+                    f"Only `ShardingSpec` is supported as output_plan for '{module_path}'"
+                )
+    # convert the output back to data parallel for the modules appears in
+    # `return_local_tensor` of the plan, we will call `_collect_local_shard`
+    # to collect the local tensor for output of modules
+    if plan.return_local_tensor is not None:
+        for module_path in plan.return_local_tensor:
+            mod = module.get_submodule(module_path)
+            _collect_local_shard(mod)

phivenv/Lib/site-packages/torch/distributed/_shard/checkpoint/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Keep old package for BC purposes, this file should be removed once
+# everything moves to the `torch.distributed.checkpoint` package.
+import sys
+import warnings
+import torch
+from torch.distributed.checkpoint import *  # noqa: F403
+with warnings.catch_warnings():
+    warnings.simplefilter("always")
+    warnings.warn(
+        "`torch.distributed._shard.checkpoint` will be deprecated, "
+        "use `torch.distributed.checkpoint` instead",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+sys.modules["torch.distributed._shard.checkpoint"] = torch.distributed.checkpoint

phivenv/Lib/site-packages/torch/distributed/_shard/checkpoint/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (628 Bytes). View file

phivenv/Lib/site-packages/torch/distributed/_shard/common_op_utils.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# mypy: allow-untyped-defs
+from typing import Optional
+import torch
+from torch.utils import _pytree as pytree
+def _basic_validation(op, args=(), kwargs=None):
+    """
+    Common validation across all ops go in here.
+    """
+    from torch.distributed._shard.sharded_tensor import ShardedTensor
+    if len(args) == 0 and (kwargs is None or len(kwargs) == 0):
+        raise ValueError(f" No input for '{op.__name__}'!")
+    # Validate types
+    has_distributed_tensor = False
+    def is_distributed_tensor(e):
+        nonlocal has_distributed_tensor
+        if isinstance(e, ShardedTensor):
+            has_distributed_tensor = True
+    pytree.tree_map_(is_distributed_tensor, args)
+    pytree.tree_map_(is_distributed_tensor, kwargs)
+    if not has_distributed_tensor:
+        raise TypeError(
+            f"torch function '{op.__name__}', with args: {args} and "
+            f"kwargs: {kwargs} are called without any distributed tensor!"
+        )
+    # Validate all distributed tensors use the same PG.
+    cur_pg: Optional[torch.distributed.ProcessGroup] = None
+    def validate_pg(e):
+        nonlocal cur_pg
+        if isinstance(e, ShardedTensor):
+            if cur_pg is not None and e._process_group is not cur_pg:
+                raise RuntimeError(
+                    "All distributed tensors should use the "
+                    "same ProcessGroup if used together in an op."
+                )
+            cur_pg = e._process_group
+    pytree.tree_map_(validate_pg, args)
+    pytree.tree_map_(validate_pg, kwargs)
+def _register_default_op(op, decorator):
+    @decorator(op)
+    def tensor_default_op(types, args=(), kwargs=None, pg=None):
+        """
+        Handles ``__torch_function__`` dispatch for the default tensor ops that
+        behave the same as ``torch.Tensor`` such as ``torch.Tensor.shape`` or
+        ``torch.Tensor.dtype``. We simply lower to the real op call with
+        DisableTorchFunctionSubclass context like ``torch.Tensor.__torch_function__``
+        to avoid recursions.
+        """
+        if kwargs is None:
+            kwargs = {}
+        with torch._C.DisableTorchFunctionSubclass():
+            return op(*args, **kwargs)