BryanW commited on Mar 23

Commit

1254814

verified ·

1 Parent(s): 24c31ad

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/hooks.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/memory_utils.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/state.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/__init__.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/accelerate_cli.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/env.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/estimate.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/launch.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/merge.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/test.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/to_fsdp2.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/tpu.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/utils.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__init__.py +52 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/__init__.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/cluster.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_args.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_utils.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/default.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/sagemaker.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/update.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/cluster.py +917 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/config.py +89 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/config_args.py +256 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/config_utils.py +122 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/default.py +163 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/sagemaker.py +274 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/update.py +63 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__init__.py +14 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/__init__.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/cursor.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/helpers.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/input.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/keymap.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/selection_menu.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/cursor.py +65 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/helpers.py +59 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/input.py +84 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/keymap.py +133 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/selection_menu.py +145 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/__pycache__/__init__.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/__pycache__/examples.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/__pycache__/testing.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/__pycache__/training.cpython-312.pyc +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/__init__.py +13 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/test_ddp_comm_hook.py +85 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/test_distributed_data_loop.py +410 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/test_merge_weights.py +158 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/test_notebook.py +118 -0

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/hooks.cpython-312.pyc ADDED Viewed

Binary file (34.4 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/memory_utils.cpython-312.pyc ADDED Viewed

Binary file (517 Bytes). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/state.cpython-312.pyc ADDED Viewed

Binary file (64.5 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (225 Bytes). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/accelerate_cli.cpython-312.pyc ADDED Viewed

Binary file (1.86 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/env.cpython-312.pyc ADDED Viewed

Binary file (5.17 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/estimate.cpython-312.pyc ADDED Viewed

Binary file (14.1 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/launch.cpython-312.pyc ADDED Viewed

Binary file (51.8 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/merge.cpython-312.pyc ADDED Viewed

Binary file (2.45 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/test.cpython-312.pyc ADDED Viewed

Binary file (2.21 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/to_fsdp2.cpython-312.pyc ADDED Viewed

Binary file (6.25 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/tpu.cpython-312.pyc ADDED Viewed

Binary file (6.04 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (5.24 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+from .config import config_command_parser
+from .config_args import default_config_file, load_config_from_file  # noqa: F401
+from .default import default_command_parser
+from .update import update_command_parser
+def get_config_parser(subparsers=None):
+    parent_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
+    # The main config parser
+    config_parser = config_command_parser(subparsers)
+    # The subparser to add commands to
+    subcommands = config_parser.add_subparsers(title="subcommands", dest="subcommand")
+    # Then add other parsers with the parent parser
+    default_command_parser(subcommands, parents=[parent_parser])
+    update_command_parser(subcommands, parents=[parent_parser])
+    return config_parser
+def main():
+    config_parser = get_config_parser()
+    args = config_parser.parse_args()
+    if not hasattr(args, "func"):
+        config_parser.print_help()
+        exit(1)
+    # Run
+    args.func(args)
+if __name__ == "__main__":
+    main()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.5 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/cluster.cpython-312.pyc ADDED Viewed

Binary file (28.2 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (3.29 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_args.cpython-312.pyc ADDED Viewed

Binary file (12.2 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/config_utils.cpython-312.pyc ADDED Viewed

Binary file (3.97 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/default.cpython-312.pyc ADDED Viewed

Binary file (5.99 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/sagemaker.cpython-312.pyc ADDED Viewed

Binary file (9.52 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/__pycache__/update.cpython-312.pyc ADDED Viewed

Binary file (2.47 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/cluster.py ADDED Viewed

	@@ -0,0 +1,917 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from ...utils import (
+    ComputeEnvironment,
+    DistributedType,
+    is_deepspeed_available,
+    is_fp8_available,
+    is_hpu_available,
+    is_mlu_available,
+    is_mps_available,
+    is_msamp_available,
+    is_musa_available,
+    is_npu_available,
+    is_sdaa_available,
+    is_transformer_engine_available,
+    is_transformers_available,
+    is_xpu_available,
+)
+from ...utils.constants import (
+    DEEPSPEED_MULTINODE_LAUNCHERS,
+    FSDP2_STATE_DICT_TYPE,
+    FSDP_AUTO_WRAP_POLICY,
+    FSDP_BACKWARD_PREFETCH,
+    FSDP_SHARDING_STRATEGY,
+    FSDP_STATE_DICT_TYPE,
+    TORCH_DYNAMO_MODES,
+)
+from .config_args import ClusterConfig
+from .config_utils import (
+    DYNAMO_BACKENDS,
+    _ask_field,
+    _ask_options,
+    _convert_distributed_mode,
+    _convert_dynamo_backend,
+    _convert_fp8_backend,
+    _convert_mixed_precision,
+    _convert_yes_no_to_bool,
+)
+def get_cluster_input():
+    distributed_type = _ask_options(
+        "Which type of machine are you using?",
+        [
+            "No distributed training",
+            "multi-CPU",
+            "multi-XPU",
+            "multi-HPU",
+            "multi-GPU",
+            "multi-NPU",
+            "multi-MLU",
+            "multi-SDAA",
+            "multi-MUSA",
+            "TPU",
+        ],
+        _convert_distributed_mode,
+    )
+    machine_rank = 0
+    num_machines = 1
+    num_processes = 1
+    gpu_ids = None
+    main_process_ip = None
+    main_process_port = None
+    rdzv_backend = "static"
+    same_network = True
+    debug = False
+    if distributed_type in [
+        DistributedType.MULTI_GPU,
+        DistributedType.MULTI_MLU,
+        DistributedType.MULTI_SDAA,
+        DistributedType.MULTI_MUSA,
+        DistributedType.MULTI_NPU,
+        DistributedType.MULTI_XPU,
+        DistributedType.MULTI_CPU,
+        DistributedType.MULTI_HPU,
+    ]:
+        num_machines = _ask_field(
+            "How many different machines will you use (use more than 1 for multi-node training)? [1]: ",
+            int,
+            default=1,
+        )
+        if num_machines > 1:
+            machine_rank = _ask_options(
+                "What is the rank of this machine?",
+                list(range(num_machines)),
+                int,
+            )
+            main_process_ip = _ask_field(
+                "What is the IP address of the machine that will host the main process? ",
+            )
+            main_process_port = _ask_field(
+                "What is the port you will use to communicate with the main process? ",
+                int,
+            )
+            same_network = _ask_field(
+                "Are all the machines on the same local network? Answer `no` if nodes are on the cloud and/or on different network hosts [YES/no]: ",
+                _convert_yes_no_to_bool,
+                default=True,
+                error_message="Please enter yes or no.",
+            )
+            if not same_network:
+                rdzv_backend = _ask_field(
+                    "What rendezvous backend will you use? ('static', 'c10d', ...): ", default="static"
+                )
+        debug = _ask_field(
+            "Should distributed operations be checked while running for errors? This can avoid timeout issues but will be slower. [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    if distributed_type == DistributedType.NO:
+        use_cpu = _ask_field(
+            "Do you want to run your training on CPU only (even if a GPU / Apple Silicon / Ascend NPU device is available)? [yes/NO]:",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    elif distributed_type == DistributedType.MULTI_CPU:
+        use_cpu = True
+    else:
+        use_cpu = False
+    ipex_config = {}
+    mpirun_config = {}
+    if use_cpu or is_xpu_available():
+        ipex_config["ipex"] = _ask_field(
+            "Do you want to use Intel PyTorch Extension (IPEX) to speed up training on CPU/XPU? [yes/NO]:",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    if use_cpu:
+        if distributed_type == DistributedType.MULTI_CPU:
+            use_mpirun = _ask_field(
+                "Do you want accelerate to launch mpirun? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if use_mpirun:
+                mpirun_hostfile = _ask_field(
+                    "Please enter the path to the hostfile to use with mpirun [~/hostfile]: ",
+                    str,
+                    default="~/hostfile",
+                )
+                mpirun_config["mpirun_hostfile"] = os.path.expanduser(mpirun_hostfile.strip())
+                mpirun_config["mpirun_ccl"] = _ask_field("Enter the number of oneCCL worker threads [1]: ", default=1)
+    dynamo_config = {}
+    use_dynamo = _ask_field(
+        "Do you wish to optimize your script with torch dynamo?[yes/NO]:",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    if use_dynamo:
+        prefix = "dynamo_"
+        dynamo_config[prefix + "backend"] = _ask_options(
+            "Which dynamo backend would you like to use?",
+            [x.lower() for x in DYNAMO_BACKENDS],
+            _convert_dynamo_backend,
+            default=2,
+        )
+        use_custom_options = _ask_field(
+            "Do you want to customize the defaults sent to torch.compile? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_custom_options:
+            dynamo_config[prefix + "mode"] = _ask_options(
+                "Which mode do you want to use?",
+                TORCH_DYNAMO_MODES,
+                lambda x: TORCH_DYNAMO_MODES[int(x)],
+                default=0,
+            )
+            dynamo_config[prefix + "use_fullgraph"] = _ask_field(
+                "Do you want the fullgraph mode or it is ok to break model into several subgraphs? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            dynamo_config[prefix + "use_dynamic"] = _ask_field(
+                "Do you want to enable dynamic shape tracing? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            dynamo_config[prefix + "use_regional_compilation"] = _ask_field(
+                "Do you want to enable regional compilation? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+    use_mps = not use_cpu and is_mps_available()
+    deepspeed_config = {}
+    if (
+        distributed_type
+        in [
+            DistributedType.MULTI_GPU,
+            DistributedType.MULTI_XPU,
+            DistributedType.MULTI_HPU,
+            DistributedType.MULTI_NPU,
+            DistributedType.MULTI_MLU,
+            DistributedType.MULTI_SDAA,
+            DistributedType.MULTI_MUSA,
+            DistributedType.NO,
+        ]
+        and not use_mps
+    ):
+        use_deepspeed = _ask_field(
+            "Do you want to use DeepSpeed? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_deepspeed:
+            distributed_type = DistributedType.DEEPSPEED
+            assert is_deepspeed_available(), (
+                "DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source"
+            )
+        if distributed_type == DistributedType.DEEPSPEED:
+            use_deepspeed_config = _ask_field(
+                "Do you want to specify a json file to a DeepSpeed config? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if use_deepspeed_config:
+                deepspeed_config["deepspeed_config_file"] = _ask_field(
+                    "Please enter the path to the json DeepSpeed config file: ",
+                    str,
+                    default="none",
+                )
+            else:
+                deepspeed_config["zero_stage"] = _ask_options(
+                    "What should be your DeepSpeed's ZeRO optimization stage?",
+                    [0, 1, 2, 3],
+                    int,
+                    default=2,
+                )
+                deepspeed_devices = ["none", "cpu", "nvme"]
+                if deepspeed_config["zero_stage"] >= 2:
+                    deepspeed_config["offload_optimizer_device"] = _ask_options(
+                        "Where to offload optimizer states?", deepspeed_devices, lambda x: deepspeed_devices[int(x)]
+                    )
+                    deepspeed_config["offload_param_device"] = _ask_options(
+                        "Where to offload parameters?", deepspeed_devices, lambda x: deepspeed_devices[int(x)]
+                    )
+                    if deepspeed_config["offload_param_device"] == "nvme":
+                        deepspeed_config["offload_param_nvme_path"] = _ask_field(
+                            "Nvme Path to offload parameters?",
+                            str,
+                            default="/nvme",
+                        )
+                    if deepspeed_config["offload_optimizer_device"] == "nvme":
+                        deepspeed_config["offload_optimizer_nvme_path"] = _ask_field(
+                            "Nvme Path to offload optimizer states?",
+                            str,
+                            default="/nvme",
+                        )
+                deepspeed_config["gradient_accumulation_steps"] = _ask_field(
+                    "How many gradient accumulation steps you're passing in your script? [1]: ",
+                    int,
+                    default=1,
+                )
+                use_gradient_clipping = _ask_field(
+                    "Do you want to use gradient clipping? [yes/NO]: ",
+                    _convert_yes_no_to_bool,
+                    default=False,
+                    error_message="Please enter yes or no.",
+                )
+                if use_gradient_clipping:
+                    deepspeed_config["gradient_clipping"] = _ask_field(
+                        "What is the gradient clipping value? [1.0]: ",
+                        float,
+                        default=1.0,
+                    )
+                if deepspeed_config["zero_stage"] == 3:
+                    deepspeed_config["zero3_save_16bit_model"] = _ask_field(
+                        "Do you want to save 16-bit model weights when using ZeRO Stage-3? [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                        error_message="Please enter yes or no.",
+                    )
+            deepspeed_config["zero3_init_flag"] = _ask_field(
+                "Do you want to enable `deepspeed.zero.Init` when using ZeRO Stage-3 for constructing massive models? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if deepspeed_config["zero3_init_flag"]:
+                if not is_transformers_available():
+                    raise Exception(
+                        "When `zero3_init_flag` is set, it requires Transformers to be installed. "
+                        "Please run `pip3 install transformers`."
+                    )
+            use_moe = _ask_field(
+                "Do you want to enable Mixture-of-Experts training (MoE)? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if use_moe:
+                deepspeed_config["deepspeed_moe_layer_cls_names"] = _ask_field(
+                    "Specify the comma-separated list of transformers MoE layer class names (case-sensitive), e.g : "
+                    " `MixtralSparseMoeBlock`, `Qwen2MoeSparseMoeBlock`, `JetMoEAttention,JetMoEBlock` ... : ",
+                    str,
+                )
+            if num_machines > 1:
+                launcher_query = "Which Type of launcher do you want to use?"
+                deepspeed_config["deepspeed_multinode_launcher"] = _ask_options(
+                    launcher_query,
+                    DEEPSPEED_MULTINODE_LAUNCHERS,
+                    lambda x: DEEPSPEED_MULTINODE_LAUNCHERS[int(x)],
+                )
+                if deepspeed_config["deepspeed_multinode_launcher"] != DEEPSPEED_MULTINODE_LAUNCHERS[1]:
+                    deepspeed_config["deepspeed_hostfile"] = _ask_field(
+                        "DeepSpeed configures multi-node compute resources with hostfile. "
+                        "Each row is of the format `hostname slots=[num_gpus]`, e.g., `localhost slots=2`; "
+                        "for more information please refer official [documentation]"
+                        "(https://www.deepspeed.ai/getting-started/#resource-configuration-multi-node). "
+                        "Please specify the location of hostfile: ",
+                        str,
+                    )
+                    is_exclusion_filter = _ask_field(
+                        "Do you want to specify exclusion filter string? [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                        error_message="Please enter yes or no.",
+                    )
+                    if is_exclusion_filter:
+                        deepspeed_config["deepspeed_exclusion_filter"] = _ask_field(
+                            "DeepSpeed exclusion filter string: ",
+                            str,
+                        )
+                    is_inclusion_filter = _ask_field(
+                        "Do you want to specify inclusion filter string? [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                        error_message="Please enter yes or no.",
+                    )
+                    if is_inclusion_filter:
+                        deepspeed_config["deepspeed_inclusion_filter"] = _ask_field(
+                            "DeepSpeed inclusion filter string: ",
+                            str,
+                        )
+    fsdp_config = {}
+    if distributed_type in [
+        DistributedType.MULTI_GPU,
+        DistributedType.MULTI_NPU,
+        DistributedType.MULTI_MLU,
+        DistributedType.MULTI_SDAA,
+        DistributedType.MULTI_MUSA,
+        DistributedType.MULTI_XPU,
+        DistributedType.MULTI_HPU,
+    ]:
+        use_fsdp = _ask_field(
+            "Do you want to use FullyShardedDataParallel? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_fsdp:
+            distributed_type = DistributedType.FSDP
+        if distributed_type == DistributedType.FSDP:
+            fsdp_config["fsdp_version"] = _ask_options(
+                "What should be your FSDP version? [2]: ",
+                [1, 2],
+                lambda x: int(x) + 1,
+                default=1,
+            )
+            fsdp_version = fsdp_config["fsdp_version"]  # extract to a variable to simplify usage later
+            if fsdp_version == 1:
+                sharding_strategy_query = "What should be your sharding strategy?"
+                fsdp_config["fsdp_reshard_after_forward"] = _ask_options(
+                    sharding_strategy_query,
+                    FSDP_SHARDING_STRATEGY,
+                    lambda x: FSDP_SHARDING_STRATEGY[int(x)],
+                )
+            else:
+                fsdp_config["fsdp_reshard_after_forward"] = _ask_field(
+                    "Do you want to enable resharding after forward? [YES/no]: ",
+                    _convert_yes_no_to_bool,
+                    default=True,
+                    error_message="Please enter yes or no.",
+                )
+            fsdp_config["fsdp_offload_params"] = _ask_field(
+                "Do you want to offload parameters and gradients to CPU? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            fsdp_wrap_query = "What should be your auto wrap policy?"
+            fsdp_config["fsdp_auto_wrap_policy"] = _ask_options(
+                fsdp_wrap_query,
+                FSDP_AUTO_WRAP_POLICY,
+                lambda x: FSDP_AUTO_WRAP_POLICY[int(x)],
+            )
+            if fsdp_config["fsdp_auto_wrap_policy"] == FSDP_AUTO_WRAP_POLICY[0]:
+                use_no_split_modules = _ask_field(
+                    "Do you want to use the model's `_no_split_modules` to wrap. Only applicable for 🤗 Transformers [yes/NO]: ",
+                    _convert_yes_no_to_bool,
+                    default=False,
+                    error_message="Please enter yes or no.",
+                )
+                if not use_no_split_modules:
+                    fsdp_config["fsdp_transformer_layer_cls_to_wrap"] = _ask_field(
+                        "Specify the comma-separated list of transformer layer class names (case-sensitive) to wrap ,e.g, :"
+                        "`BertLayer`, `GPTJBlock`, `T5Block`, `BertLayer,BertEmbeddings,BertSelfOutput` ...? : ",
+                        str,
+                    )
+            elif fsdp_config["fsdp_auto_wrap_policy"] == FSDP_AUTO_WRAP_POLICY[1]:
+                fsdp_config["fsdp_min_num_params"] = _ask_field(
+                    "What should be your FSDP's minimum number of parameters for Default Auto Wrapping Policy? [1e8]: ",
+                    int,
+                    default=100000000,
+                )
+            # Removed in FSDP2, ask for user input for FSDP1
+            if fsdp_version == 1:
+                fsdp_backward_prefetch_query = "What should be your FSDP's backward prefetch policy?"
+                fsdp_config["fsdp_backward_prefetch"] = _ask_options(
+                    fsdp_backward_prefetch_query,
+                    FSDP_BACKWARD_PREFETCH,
+                    lambda x: FSDP_BACKWARD_PREFETCH[int(x)],
+                )
+            fsdp_state_dict_type_query = "What should be your FSDP's state dict type?"
+            fsdp_config["fsdp_state_dict_type"] = _ask_options(
+                fsdp_state_dict_type_query,
+                FSDP_STATE_DICT_TYPE if fsdp_version == 1 else FSDP2_STATE_DICT_TYPE,
+                lambda x: FSDP_STATE_DICT_TYPE[int(x)] if fsdp_version == 1 else FSDP2_STATE_DICT_TYPE[int(x)],
+                default=0,
+            )
+            # Not implemented in FSDP2, ask for user input for FSDP1
+            if fsdp_version == 1:
+                fsdp_config["fsdp_forward_prefetch"] = _ask_field(
+                    "Do you want to enable FSDP's forward prefetch policy? [yes/NO]: ",
+                    _convert_yes_no_to_bool,
+                    default=False,
+                    error_message="Please enter yes or no.",
+                )
+            # Obsolete in FSDP2, ask for user input for FSDP1
+            if fsdp_version == 1:
+                fsdp_config["fsdp_use_orig_params"] = _ask_field(
+                    "Do you want to enable FSDP's `use_orig_params` feature? [YES/no]: ",
+                    _convert_yes_no_to_bool,
+                    default=True,
+                    error_message="Please enter yes or no.",
+                )
+            fsdp_config["fsdp_cpu_ram_efficient_loading"] = _ask_field(
+                "Do you want to enable CPU RAM efficient model loading? Only applicable for 🤗 Transformers models. [YES/no]: ",
+                _convert_yes_no_to_bool,
+                default=True,
+                error_message="Please enter yes or no.",
+            )
+            # Obsolete in FSDP2, ask for user input for FSDP1
+            if fsdp_version == 1:
+                if fsdp_config["fsdp_cpu_ram_efficient_loading"]:
+                    fsdp_config["fsdp_sync_module_states"] = True
+                else:
+                    fsdp_config["fsdp_sync_module_states"] = _ask_field(
+                        "Do you want each individually wrapped FSDP unit to broadcast module parameters from rank 0 at the start? [YES/no]: ",
+                        _convert_yes_no_to_bool,
+                        default=True,
+                        error_message="Please enter yes or no.",
+                    )
+            fsdp_config["fsdp_activation_checkpointing"] = _ask_field(
+                "Do you want to enable FSDP activation checkpointing? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+    parallelism_config = {}
+    if fsdp_config.get("fsdp_version", 1) == 2:
+        use_parallelism_config = _ask_field(
+            "Do you want to use the parallelism config? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_parallelism_config:
+            prefix = "parallelism_config_"
+            parallelism_config[prefix + "dp_replicate_size"] = _ask_field(
+                "What is the data parallelism replicate size? [1]: ",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            parallelism_config[prefix + "dp_shard_size"] = _ask_field(
+                "What is the FSDP shard size? [1]: ",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            parallelism_config[prefix + "tp_size"] = _ask_field(
+                "What is the tensor parallelism size? [1]: ",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            parallelism_config[prefix + "cp_size"] = _ask_field(
+                "What is the context parallelism size? [1]: ",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            if parallelism_config[prefix + "cp_size"] > 1:
+                parallelism_config[prefix + "cp_comm_strategy"] = _ask_options(
+                    "What is the compute parallelism communication strategy?",
+                    ["allgather", "alltoall"],
+                    lambda x: ["allgather", "alltoall"][int(x)],
+                    default=0,
+                )
+    megatron_lm_config = {}
+    if distributed_type in [DistributedType.MULTI_GPU]:
+        use_megatron_lm = _ask_field(
+            "Do you want to use Megatron-LM ? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_megatron_lm:
+            distributed_type = DistributedType.MEGATRON_LM
+        if distributed_type == DistributedType.MEGATRON_LM:
+            prefix = "megatron_lm_"
+            megatron_lm_config[prefix + "tp_degree"] = _ask_field(
+                "What is the Tensor Parallelism degree/size? [1]:",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            if megatron_lm_config[prefix + "tp_degree"] > 1:
+                megatron_lm_config[prefix + "sequence_parallelism"] = _ask_field(
+                    "Do you want to enable Sequence Parallelism? [YES/no]: ",
+                    _convert_yes_no_to_bool,
+                    default=True,
+                    error_message="Please enter yes or no.",
+                )
+            megatron_lm_config[prefix + "pp_degree"] = _ask_field(
+                "What is the Pipeline Parallelism degree/size? [1]:",
+                int,
+                default=1,
+                error_message="Please enter an integer.",
+            )
+            if megatron_lm_config[prefix + "pp_degree"] > 1:
+                megatron_lm_config[prefix + "num_micro_batches"] = _ask_field(
+                    "What is the number of micro-batches? [1]:",
+                    int,
+                    default=1,
+                    error_message="Please enter an integer.",
+                )
+            megatron_lm_config[prefix + "recompute_activations"] = _ask_field(
+                "Do you want to enable selective activation recomputation? [YES/no]: ",
+                _convert_yes_no_to_bool,
+                default=True,
+                error_message="Please enter yes or no.",
+            )
+            megatron_lm_config[prefix + "use_distributed_optimizer"] = _ask_field(
+                "Do you want to use distributed optimizer "
+                "which shards optimizer state and gradients across data parallel ranks? [YES/no]: ",
+                _convert_yes_no_to_bool,
+                default=True,
+                error_message="Please enter yes or no.",
+            )
+            megatron_lm_config[prefix + "gradient_clipping"] = _ask_field(
+                "What is the gradient clipping value based on global L2 Norm (0 to disable)? [1.0]: ",
+                float,
+                default=1.0,
+            )
+    # TPU specific defaults
+    tpu_commands = None
+    tpu_command_file = None
+    tpu_downcast_bf16 = "no"
+    tpu_env = []
+    tpu_name = None
+    tpu_vm = None
+    tpu_zone = None
+    tpu_use_sudo = False
+    tpu_use_cluster = False
+    if distributed_type in [
+        DistributedType.MULTI_CPU,
+        DistributedType.MULTI_XPU,
+        DistributedType.MULTI_HPU,
+        DistributedType.MULTI_GPU,
+        DistributedType.MULTI_MLU,
+        DistributedType.MULTI_SDAA,
+        DistributedType.MULTI_MUSA,
+        DistributedType.MULTI_NPU,
+        DistributedType.XLA,
+    ]:
+        machine_type = str(distributed_type).split(".")[1].replace("MULTI_", "")
+        if machine_type == "TPU":
+            machine_type += " cores"
+        elif machine_type == "CPU":
+            machine_type = "processes"
+        else:
+            machine_type += "(s)"
+        num_processes = _ask_field(
+            f"How many {machine_type} should be used for distributed training? [1]:",
+            int,
+            default=1,
+            error_message="Please enter an integer.",
+        )
+    elif distributed_type in [DistributedType.FSDP, DistributedType.DEEPSPEED, DistributedType.MEGATRON_LM]:
+        num_processes = _ask_field(
+            "How many GPU(s) should be used for distributed training? [1]:",
+            int,
+            default=1,
+            error_message="Please enter an integer.",
+        )
+    else:
+        num_processes = 1
+    if (distributed_type == DistributedType.MULTI_GPU) and (num_machines == 1) and (num_processes == 1):
+        raise ValueError(
+            f"Specified distributed type {distributed_type} but only using 1 GPU on a single machine. Please select `No distributed training` for the type of machine you are using."
+        )
+    if (
+        distributed_type
+        in [
+            DistributedType.MULTI_GPU,
+            DistributedType.MULTI_MLU,
+            DistributedType.MULTI_SDAA,
+            DistributedType.MULTI_MUSA,
+            DistributedType.MULTI_NPU,
+            DistributedType.MULTI_XPU,
+            DistributedType.MULTI_HPU,
+            DistributedType.NO,
+        ]
+        and not use_cpu
+        and not use_mps
+    ):
+        if is_npu_available():
+            machine_type = "NPU(s)"
+        elif is_mlu_available():
+            machine_type = "MLU(s)"
+        elif is_sdaa_available():
+            machine_type = "SDAA(s)"
+        elif is_musa_available():
+            machine_type = "MUSA(s)"
+        elif is_xpu_available():
+            machine_type = "XPU(s)"
+        elif is_hpu_available():
+            machine_type = "HPU(s)"
+        else:
+            machine_type = "GPU(s)"
+        gpu_ids = _ask_field(
+            f"What {machine_type} (by id) should be used for training on this machine as a comma-separated list? [all]:",
+            default="all",
+        )
+    # CPU affinity is only supported on NVIDIA hardware for now
+    enable_cpu_affinity = False
+    if distributed_type in (DistributedType.NO, DistributedType.MULTI_GPU) and not use_cpu and not use_mps:
+        enable_cpu_affinity = _ask_field(
+            "Would you like to enable numa efficiency? (Currently only supported on NVIDIA hardware). [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    fp8_config = None
+    if distributed_type == DistributedType.XLA:
+        mixed_precision = "no"
+        main_training_function = _ask_field(
+            "What is the name of the function in your script that should be launched in all parallel scripts? [main]: ",
+            default="main",
+        )
+        tpu_use_cluster = _ask_field(
+            "Are you using a TPU cluster? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if tpu_use_cluster:
+            tpu_name = _ask_field(
+                "What is the name of your TPU cluster? ",
+                default=None,
+                error_message="Please enter the name of your TPU cluster.",
+            )
+            tpu_zone = _ask_field(
+                "What is the zone of your TPU cluster? ",
+                default=None,
+                error_message="Please enter the zone of your TPU cluster.",
+            )
+            tpu_use_sudo = _ask_field(
+                "To run a python script in a TPU pod, should `sudo` be used? [yes/NO]: ",
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            run_commands = _ask_field(
+                "Do you have code you wish to run on startup in each pod? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            if run_commands:
+                use_command_file = _ask_field(
+                    "Is this code located in a bash script? [yes/NO]: ",
+                    _convert_yes_no_to_bool,
+                    default=False,
+                    error_message="Please enter yes or no.",
+                )
+                if use_command_file:
+                    tpu_command_file = _ask_field(
+                        "What is the path to your bash script? ",
+                        default=None,
+                        error_message="Please enter the path to your bash script.",
+                    )
+                    tpu_command_file = os.path.abspath(tpu_command_file)
+                else:
+                    print("Please enter each command separately you wish to run on startup in each pod.")
+                    tpu_commands = []
+                    another_command = True
+                    while another_command:
+                        tpu_commands.append(
+                            _ask_field(
+                                "Please enter a single command to be ran ",
+                                default=None,
+                                error_message="Please enter the commands you wish to run on startup in each pod as a single string.",
+                            )
+                        )
+                        another_command = _ask_field(
+                            "Do you wish to add another command? [yes/NO]: ",
+                            _convert_yes_no_to_bool,
+                            default=False,
+                            error_message="Please enter yes or no.",
+                        )
+            tpu_vm = _ask_field(
+                "If not using an instance group, what are the names of the Compute VM instances to be used, separated by a comma: ",
+                default="",
+            ).split(",")
+            tpu_env = _ask_field(
+                "What environment variables do you wish to set in each pod, separated by a comma: ",
+                default="",
+            ).split(",")
+    else:
+        main_training_function = "main"
+        if distributed_type == DistributedType.DEEPSPEED and use_deepspeed_config:
+            mixed_precision = None
+        else:
+            mixed_precision = _ask_options(
+                "Do you wish to use mixed precision?",
+                ["no", "fp16", "bf16", "fp8"],
+                _convert_mixed_precision,
+            )
+            if mixed_precision == "fp8":
+                if not is_fp8_available():
+                    raise ValueError("FP8 (either Transformer Engine or MSAMP) is not installed on this machine.")
+                fp8_config = {}
+                fp8_config["backend"] = _ask_options(
+                    "Which FP8 backend do you want to use?",
+                    ["te", "msamp"],
+                    _convert_fp8_backend,
+                )
+                if fp8_config["backend"] == "TE":
+                    if not is_transformer_engine_available():
+                        raise ValueError("TransformersEngine was selected, but it is not installed on this machine.")
+                    fp8_config["use_autocast_during_eval"] = _ask_field(
+                        "Do you want to use FP8 autocast during eval mode? Generally better metrics are found when this is disabled [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                    )
+                    fp8_config["margin"] = _ask_field(
+                        "What margin should be used for gradient scaling? [0]: ",
+                        int,
+                        default=0,
+                    )
+                    fp8_config["interval"] = _ask_field(
+                        "What interval should be used for for how often the scaling factor is recomputed? [1]: ",
+                        int,
+                        default=1,
+                    )
+                    fp8_config["fp8_format"] = _ask_options(
+                        "Which weight format should be used?",
+                        ["HYBRID", "E4M3", "E5M2"],
+                        lambda i: ["HYBRID", "E4M3", "E5M2"][i],
+                        default=0,
+                    )
+                    fp8_config["amax_history_length"] = _ask_field(
+                        "What length of history should be used for the amax scaling factor computation? [1024]: ",
+                        int,
+                        default=1024,
+                    )
+                    fp8_config["amax_compute_algorithm"] = _ask_options(
+                        "Which algorithm should be used for the amax scaling factor computation?",
+                        ["max", "most_recent"],
+                        lambda x: "max" if x == 0 else "most_recent",
+                        default=0,
+                    )
+                    fp8_config["override_linear_precision"] = _ask_field(
+                        "Do you want to to execute `fprop`, `dgrad`, and `wgrad` GEMMS in higher precision? [yes/NO]: ",
+                        _convert_yes_no_to_bool,
+                        default=False,
+                    )
+                    if fp8_config["override_linear_precision"]:
+                        fprop = _ask_field(
+                            "Should `fprop` be executed in higher precision? [yes/NO]: ",
+                            _convert_yes_no_to_bool,
+                            default=False,
+                        )
+                        dgrad = _ask_field(
+                            "Should `dgrad` be executed in higher precision? [yes/NO]: ",
+                            _convert_yes_no_to_bool,
+                            default=False,
+                        )
+                        wgrad = _ask_field(
+                            "Should `wgrad` be executed in higher precision? [yes/NO]: ",
+                            _convert_yes_no_to_bool,
+                            default=False,
+                        )
+                        fp8_config["override_linear_precision"] = (fprop, dgrad, wgrad)
+                    else:
+                        fp8_config["override_linear_precision"] = (False, False, False)
+                elif fp8_config["backend"] == "MSAMP":
+                    if not is_msamp_available():
+                        raise ValueError("MSAMP was selected, but it is not installed on this machine.")
+                    fp8_config["optimization_level"] = _ask_options(
+                        "Which optimization level should be used?",
+                        ["O1", "O2"],
+                        lambda x: "O1" if x == 0 else "O2",
+                        default=1,
+                    )
+    if use_dynamo and mixed_precision == "no" and not use_cpu:
+        print(
+            "Torch dynamo used without mixed precision requires TF32 to be efficient. Accelerate will enable it by default when launching your scripts."
+        )
+    if distributed_type == DistributedType.XLA and mixed_precision == "bf16":
+        tpu_downcast_bf16 = _ask_field(
+            "Should `torch.float` be cast as `bfloat16` and `torch.double` remain `float32` on TPUs?", default="no"
+        )
+    return ClusterConfig(
+        compute_environment=ComputeEnvironment.LOCAL_MACHINE,
+        distributed_type=distributed_type,
+        num_processes=num_processes,
+        gpu_ids=gpu_ids,
+        mixed_precision=mixed_precision,
+        downcast_bf16=tpu_downcast_bf16,
+        machine_rank=machine_rank,
+        num_machines=num_machines,
+        main_process_ip=main_process_ip,
+        main_process_port=main_process_port,
+        main_training_function=main_training_function,
+        fp8_config=fp8_config,
+        deepspeed_config=deepspeed_config,
+        fsdp_config=fsdp_config,
+        parallelism_config=parallelism_config,
+        megatron_lm_config=megatron_lm_config,
+        ipex_config=ipex_config,
+        mpirun_config=mpirun_config,
+        use_cpu=use_cpu,
+        rdzv_backend=rdzv_backend,
+        same_network=same_network,
+        commands=tpu_commands,
+        command_file=tpu_command_file,
+        tpu_env=tpu_env,
+        tpu_name=tpu_name,
+        tpu_vm=tpu_vm,
+        tpu_zone=tpu_zone,
+        tpu_use_sudo=tpu_use_sudo,
+        tpu_use_cluster=tpu_use_cluster,
+        dynamo_config=dynamo_config,
+        debug=debug,
+        enable_cpu_affinity=enable_cpu_affinity,
+    )

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/config.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+from accelerate.utils import ComputeEnvironment
+from .cluster import get_cluster_input
+from .config_args import cache_dir, default_config_file, default_yaml_config_file, load_config_from_file  # noqa: F401
+from .config_utils import _ask_field, _ask_options, _convert_compute_environment  # noqa: F401
+from .sagemaker import get_sagemaker_input
+description = "Launches a series of prompts to create and save a `default_config.yaml` configuration file for your training system. Should always be ran first on your machine"
+def get_user_input():
+    compute_environment = _ask_options(
+        "In which compute environment are you running?",
+        ["This machine", "AWS (Amazon SageMaker)"],
+        _convert_compute_environment,
+    )
+    if compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
+        config = get_sagemaker_input()
+    else:
+        config = get_cluster_input()
+    return config
+def config_command_parser(subparsers=None):
+    if subparsers is not None:
+        parser = subparsers.add_parser("config", description=description)
+    else:
+        parser = argparse.ArgumentParser("Accelerate config command", description=description)
+    parser.add_argument(
+        "--config_file",
+        default=None,
+        help=(
+            "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
+            "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
+            "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
+            "with 'huggingface'."
+        ),
+    )
+    if subparsers is not None:
+        parser.set_defaults(func=config_command)
+    return parser
+def config_command(args):
+    config = get_user_input()
+    if args.config_file is not None:
+        config_file = args.config_file
+    else:
+        if not os.path.isdir(cache_dir):
+            os.makedirs(cache_dir)
+        config_file = default_yaml_config_file
+    if config_file.endswith(".json"):
+        config.to_json_file(config_file)
+    else:
+        config.to_yaml_file(config_file)
+    print(f"accelerate configuration saved at {config_file}")
+def main():
+    parser = config_command_parser()
+    args = parser.parse_args()
+    config_command(args)
+if __name__ == "__main__":
+    main()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/config_args.py ADDED Viewed

	@@ -0,0 +1,256 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import os
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional, Union
+import yaml
+from ...utils import ComputeEnvironment, DistributedType, SageMakerDistributedType
+from ...utils.constants import SAGEMAKER_PYTHON_VERSION, SAGEMAKER_PYTORCH_VERSION, SAGEMAKER_TRANSFORMERS_VERSION
+hf_cache_home = os.path.expanduser(
+    os.environ.get("HF_HOME", os.path.join(os.environ.get("XDG_CACHE_HOME", "~/.cache"), "huggingface"))
+)
+cache_dir = os.path.join(hf_cache_home, "accelerate")
+default_json_config_file = os.path.join(cache_dir, "default_config.yaml")
+default_yaml_config_file = os.path.join(cache_dir, "default_config.yaml")
+# For backward compatibility: the default config is the json one if it's the only existing file.
+if os.path.isfile(default_yaml_config_file) or not os.path.isfile(default_json_config_file):
+    default_config_file = default_yaml_config_file
+else:
+    default_config_file = default_json_config_file
+def load_config_from_file(config_file):
+    if config_file is not None:
+        if not os.path.isfile(config_file):
+            raise FileNotFoundError(
+                f"The passed configuration file `{config_file}` does not exist. "
+                "Please pass an existing file to `accelerate launch`, or use the default one "
+                "created through `accelerate config` and run `accelerate launch` "
+                "without the `--config_file` argument."
+            )
+    else:
+        config_file = default_config_file
+    with open(config_file, encoding="utf-8") as f:
+        if config_file.endswith(".json"):
+            if (
+                json.load(f).get("compute_environment", ComputeEnvironment.LOCAL_MACHINE)
+                == ComputeEnvironment.LOCAL_MACHINE
+            ):
+                config_class = ClusterConfig
+            else:
+                config_class = SageMakerConfig
+            return config_class.from_json_file(json_file=config_file)
+        else:
+            if (
+                yaml.safe_load(f).get("compute_environment", ComputeEnvironment.LOCAL_MACHINE)
+                == ComputeEnvironment.LOCAL_MACHINE
+            ):
+                config_class = ClusterConfig
+            else:
+                config_class = SageMakerConfig
+            return config_class.from_yaml_file(yaml_file=config_file)
+@dataclass
+class BaseConfig:
+    compute_environment: ComputeEnvironment
+    distributed_type: Union[DistributedType, SageMakerDistributedType]
+    mixed_precision: str
+    use_cpu: bool
+    debug: bool
+    def to_dict(self):
+        result = self.__dict__
+        # For serialization, it's best to convert Enums to strings (or their underlying value type).
+        def _convert_enums(value):
+            if isinstance(value, Enum):
+                return value.value
+            if isinstance(value, dict):
+                if not bool(value):
+                    return None
+                for key1, value1 in value.items():
+                    value[key1] = _convert_enums(value1)
+            return value
+        for key, value in result.items():
+            result[key] = _convert_enums(value)
+        result = {k: v for k, v in result.items() if v is not None}
+        return result
+    @staticmethod
+    def process_config(config_dict):
+        """
+        Processes `config_dict` and sets default values for any missing keys
+        """
+        if "compute_environment" not in config_dict:
+            config_dict["compute_environment"] = ComputeEnvironment.LOCAL_MACHINE
+        if "distributed_type" not in config_dict:
+            raise ValueError("A `distributed_type` must be specified in the config file.")
+        if "num_processes" not in config_dict and config_dict["distributed_type"] == DistributedType.NO:
+            config_dict["num_processes"] = 1
+        if "mixed_precision" not in config_dict:
+            config_dict["mixed_precision"] = "fp16" if ("fp16" in config_dict and config_dict["fp16"]) else None
+        if "fp16" in config_dict:  # Convert the config to the new format.
+            del config_dict["fp16"]
+        if "dynamo_backend" in config_dict:  # Convert the config to the new format.
+            dynamo_backend = config_dict.pop("dynamo_backend")
+            config_dict["dynamo_config"] = {} if dynamo_backend == "NO" else {"dynamo_backend": dynamo_backend}
+        if "use_cpu" not in config_dict:
+            config_dict["use_cpu"] = False
+        if "debug" not in config_dict:
+            config_dict["debug"] = False
+        if "enable_cpu_affinity" not in config_dict:
+            config_dict["enable_cpu_affinity"] = False
+        return config_dict
+    @classmethod
+    def from_json_file(cls, json_file=None):
+        json_file = default_json_config_file if json_file is None else json_file
+        with open(json_file, encoding="utf-8") as f:
+            config_dict = json.load(f)
+        config_dict = cls.process_config(config_dict)
+        extra_keys = sorted(set(config_dict.keys()) - set(cls.__dataclass_fields__.keys()))
+        if len(extra_keys) > 0:
+            raise ValueError(
+                f"The config file at {json_file} had unknown keys ({extra_keys}), please try upgrading your `accelerate`"
+                " version or fix (and potentially remove) these keys from your config file."
+            )
+        return cls(**config_dict)
+    def to_json_file(self, json_file):
+        with open(json_file, "w", encoding="utf-8") as f:
+            content = json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+            f.write(content)
+    @classmethod
+    def from_yaml_file(cls, yaml_file=None):
+        yaml_file = default_yaml_config_file if yaml_file is None else yaml_file
+        with open(yaml_file, encoding="utf-8") as f:
+            config_dict = yaml.safe_load(f)
+        config_dict = cls.process_config(config_dict)
+        extra_keys = sorted(set(config_dict.keys()) - set(cls.__dataclass_fields__.keys()))
+        if len(extra_keys) > 0:
+            raise ValueError(
+                f"The config file at {yaml_file} had unknown keys ({extra_keys}), please try upgrading your `accelerate`"
+                " version or fix (and potentially remove) these keys from your config file."
+            )
+        return cls(**config_dict)
+    def to_yaml_file(self, yaml_file):
+        with open(yaml_file, "w", encoding="utf-8") as f:
+            yaml.safe_dump(self.to_dict(), f)
+    def __post_init__(self):
+        if isinstance(self.compute_environment, str):
+            self.compute_environment = ComputeEnvironment(self.compute_environment)
+        if isinstance(self.distributed_type, str):
+            if self.compute_environment == ComputeEnvironment.AMAZON_SAGEMAKER:
+                self.distributed_type = SageMakerDistributedType(self.distributed_type)
+            else:
+                self.distributed_type = DistributedType(self.distributed_type)
+        if getattr(self, "dynamo_config", None) is None:
+            self.dynamo_config = {}
+@dataclass
+class ClusterConfig(BaseConfig):
+    num_processes: int = -1  # For instance if we use SLURM and the user manually passes it in
+    machine_rank: int = 0
+    num_machines: int = 1
+    gpu_ids: Optional[str] = None
+    main_process_ip: Optional[str] = None
+    main_process_port: Optional[int] = None
+    rdzv_backend: Optional[str] = "static"
+    same_network: Optional[bool] = False
+    main_training_function: str = "main"
+    enable_cpu_affinity: bool = False
+    # args for FP8 training
+    fp8_config: Optional[dict] = None
+    # args for deepspeed_plugin
+    deepspeed_config: Optional[dict] = None
+    # args for fsdp
+    fsdp_config: Optional[dict] = None
+    # args for parallelism config
+    parallelism_config: Optional[dict] = None
+    # args for megatron_lm
+    megatron_lm_config: Optional[dict] = None
+    # args for ipex
+    ipex_config: Optional[dict] = None
+    # args for mpirun
+    mpirun_config: Optional[dict] = None
+    # args for TPU
+    downcast_bf16: bool = False
+    # args for TPU pods
+    tpu_name: Optional[str] = None
+    tpu_zone: Optional[str] = None
+    tpu_use_cluster: bool = False
+    tpu_use_sudo: bool = False
+    command_file: Optional[str] = None
+    commands: list[str] = None
+    tpu_vm: list[str] = None
+    tpu_env: list[str] = None
+    # args for dynamo
+    dynamo_config: Optional[dict] = None
+    def __post_init__(self):
+        if self.deepspeed_config is None:
+            self.deepspeed_config = {}
+        if self.fsdp_config is None:
+            self.fsdp_config = {}
+        if self.megatron_lm_config is None:
+            self.megatron_lm_config = {}
+        if self.ipex_config is None:
+            self.ipex_config = {}
+        if self.mpirun_config is None:
+            self.mpirun_config = {}
+        if self.fp8_config is None:
+            self.fp8_config = {}
+        if self.parallelism_config is None:
+            self.parallelism_config = {}
+        return super().__post_init__()
+@dataclass
+class SageMakerConfig(BaseConfig):
+    ec2_instance_type: str
+    iam_role_name: str
+    image_uri: Optional[str] = None
+    profile: Optional[str] = None
+    region: str = "us-east-1"
+    num_machines: int = 1
+    gpu_ids: str = "all"
+    base_job_name: str = f"accelerate-sagemaker-{num_machines}"
+    pytorch_version: str = SAGEMAKER_PYTORCH_VERSION
+    transformers_version: str = SAGEMAKER_TRANSFORMERS_VERSION
+    py_version: str = SAGEMAKER_PYTHON_VERSION
+    sagemaker_inputs_file: Optional[str] = None
+    sagemaker_metrics_file: Optional[str] = None
+    additional_args: Optional[dict] = None
+    dynamo_config: Optional[dict] = None
+    enable_cpu_affinity: bool = False

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/config_utils.py ADDED Viewed

	@@ -0,0 +1,122 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+from ...utils.dataclasses import (
+    ComputeEnvironment,
+    DistributedType,
+    DynamoBackend,
+    FP8BackendType,
+    PrecisionType,
+    SageMakerDistributedType,
+)
+from ..menu import BulletMenu
+DYNAMO_BACKENDS = [
+    "EAGER",
+    "AOT_EAGER",
+    "INDUCTOR",
+    "AOT_TS_NVFUSER",
+    "NVPRIMS_NVFUSER",
+    "CUDAGRAPHS",
+    "OFI",
+    "FX2TRT",
+    "ONNXRT",
+    "TENSORRT",
+    "AOT_TORCHXLA_TRACE_ONCE",
+    "TORHCHXLA_TRACE_ONCE",
+    "IPEX",
+    "TVM",
+]
+def _ask_field(input_text, convert_value=None, default=None, error_message=None):
+    ask_again = True
+    while ask_again:
+        result = input(input_text)
+        try:
+            if default is not None and len(result) == 0:
+                return default
+            return convert_value(result) if convert_value is not None else result
+        except Exception:
+            if error_message is not None:
+                print(error_message)
+def _ask_options(input_text, options=[], convert_value=None, default=0):
+    menu = BulletMenu(input_text, options)
+    result = menu.run(default_choice=default)
+    return convert_value(result) if convert_value is not None else result
+def _convert_compute_environment(value):
+    value = int(value)
+    return ComputeEnvironment(["LOCAL_MACHINE", "AMAZON_SAGEMAKER"][value])
+def _convert_distributed_mode(value):
+    value = int(value)
+    return DistributedType(
+        [
+            "NO",
+            "MULTI_CPU",
+            "MULTI_XPU",
+            "MULTI_HPU",
+            "MULTI_GPU",
+            "MULTI_NPU",
+            "MULTI_MLU",
+            "MULTI_SDAA",
+            "MULTI_MUSA",
+            "XLA",
+        ][value]
+    )
+def _convert_dynamo_backend(value):
+    value = int(value)
+    return DynamoBackend(DYNAMO_BACKENDS[value]).value
+def _convert_mixed_precision(value):
+    value = int(value)
+    return PrecisionType(["no", "fp16", "bf16", "fp8"][value])
+def _convert_sagemaker_distributed_mode(value):
+    value = int(value)
+    return SageMakerDistributedType(["NO", "DATA_PARALLEL", "MODEL_PARALLEL"][value])
+def _convert_fp8_backend(value):
+    value = int(value)
+    return FP8BackendType(["TE", "MSAMP"][value])
+def _convert_yes_no_to_bool(value):
+    return {"yes": True, "no": False}[value.lower()]
+class SubcommandHelpFormatter(argparse.RawDescriptionHelpFormatter):
+    """
+    A custom formatter that will remove the usage line from the help message for subcommands.
+    """
+    def _format_usage(self, usage, actions, groups, prefix):
+        usage = super()._format_usage(usage, actions, groups, prefix)
+        usage = usage.replace("<command> [<args>] ", "")
+        return usage

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/default.py ADDED Viewed

	@@ -0,0 +1,163 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+import torch
+from ...utils import (
+    is_hpu_available,
+    is_mlu_available,
+    is_musa_available,
+    is_npu_available,
+    is_sdaa_available,
+    is_xpu_available,
+)
+from .config_args import ClusterConfig, default_json_config_file
+from .config_utils import SubcommandHelpFormatter
+description = "Create a default config file for Accelerate with only a few flags set."
+def write_basic_config(mixed_precision="no", save_location: str = default_json_config_file):
+    """
+    Creates and saves a basic cluster config to be used on a local machine with potentially multiple GPUs. Will also
+    set CPU if it is a CPU-only machine.
+    Args:
+        mixed_precision (`str`, *optional*, defaults to "no"):
+            Mixed Precision to use. Should be one of "no", "fp16", or "bf16"
+        save_location (`str`, *optional*, defaults to `default_json_config_file`):
+            Optional custom save location. Should be passed to `--config_file` when using `accelerate launch`. Default
+            location is inside the huggingface cache folder (`~/.cache/huggingface`) but can be overridden by setting
+            the `HF_HOME` environmental variable, followed by `accelerate/default_config.yaml`.
+    """
+    path = Path(save_location)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    if path.exists():
+        print(
+            f"Configuration already exists at {save_location}, will not override. Run `accelerate config` manually or pass a different `save_location`."
+        )
+        return False
+    mixed_precision = mixed_precision.lower()
+    if mixed_precision not in ["no", "fp16", "bf16", "fp8"]:
+        raise ValueError(
+            f"`mixed_precision` should be one of 'no', 'fp16', 'bf16', or 'fp8'. Received {mixed_precision}"
+        )
+    config = {
+        "compute_environment": "LOCAL_MACHINE",
+        "mixed_precision": mixed_precision,
+    }
+    if is_mlu_available():
+        num_mlus = torch.mlu.device_count()
+        config["num_processes"] = num_mlus
+        config["use_cpu"] = False
+        if num_mlus > 1:
+            config["distributed_type"] = "MULTI_MLU"
+        else:
+            config["distributed_type"] = "NO"
+    if is_sdaa_available():
+        num_sdaas = torch.sdaa.device_count()
+        config["num_processes"] = num_sdaas
+        config["use_cpu"] = False
+        if num_sdaas > 1:
+            config["distributed_type"] = "MULTI_SDAA"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_musa_available():
+        num_musas = torch.musa.device_count()
+        config["num_processes"] = num_musas
+        config["use_cpu"] = False
+        if num_musas > 1:
+            config["distributed_type"] = "MULTI_MUSA"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_hpu_available():
+        num_hpus = torch.hpu.device_count()
+        config["num_processes"] = num_hpus
+        config["use_cpu"] = False
+        if num_hpus > 1:
+            config["distributed_type"] = "MULTI_HPU"
+        else:
+            config["distributed_type"] = "NO"
+    elif torch.cuda.is_available():
+        num_gpus = torch.cuda.device_count()
+        config["num_processes"] = num_gpus
+        config["use_cpu"] = False
+        if num_gpus > 1:
+            config["distributed_type"] = "MULTI_GPU"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_xpu_available():
+        num_xpus = torch.xpu.device_count()
+        config["num_processes"] = num_xpus
+        config["use_cpu"] = False
+        if num_xpus > 1:
+            config["distributed_type"] = "MULTI_XPU"
+        else:
+            config["distributed_type"] = "NO"
+    elif is_npu_available():
+        num_npus = torch.npu.device_count()
+        config["num_processes"] = num_npus
+        config["use_cpu"] = False
+        if num_npus > 1:
+            config["distributed_type"] = "MULTI_NPU"
+        else:
+            config["distributed_type"] = "NO"
+    else:
+        num_xpus = 0
+        config["use_cpu"] = True
+        config["num_processes"] = 1
+        config["distributed_type"] = "NO"
+    config["debug"] = False
+    config["enable_cpu_affinity"] = False
+    config = ClusterConfig(**config)
+    config.to_json_file(path)
+    return path
+def default_command_parser(parser, parents):
+    parser = parser.add_parser("default", parents=parents, help=description, formatter_class=SubcommandHelpFormatter)
+    parser.add_argument(
+        "--config_file",
+        default=default_json_config_file,
+        help=(
+            "The path to use to store the config file. Will default to a file named default_config.yaml in the cache "
+            "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
+            "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
+            "with 'huggingface'."
+        ),
+        dest="save_location",
+    )
+    parser.add_argument(
+        "--mixed_precision",
+        choices=["no", "fp16", "bf16"],
+        type=str,
+        help="Whether or not to use mixed precision training. "
+        "Choose between FP16 and BF16 (bfloat16) training. "
+        "BF16 training is only supported on Nvidia Ampere GPUs and PyTorch 1.10 or later.",
+        default="no",
+    )
+    parser.set_defaults(func=default_config_command)
+    return parser
+def default_config_command(args):
+    config_file = write_basic_config(args.mixed_precision, args.save_location)
+    if config_file:
+        print(f"accelerate configuration saved at {config_file}")

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/sagemaker.py ADDED Viewed

	@@ -0,0 +1,274 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import os
+from ...utils.constants import SAGEMAKER_PARALLEL_EC2_INSTANCES, TORCH_DYNAMO_MODES
+from ...utils.dataclasses import ComputeEnvironment, SageMakerDistributedType
+from ...utils.imports import is_boto3_available
+from .config_args import SageMakerConfig
+from .config_utils import (
+    DYNAMO_BACKENDS,
+    _ask_field,
+    _ask_options,
+    _convert_dynamo_backend,
+    _convert_mixed_precision,
+    _convert_sagemaker_distributed_mode,
+    _convert_yes_no_to_bool,
+)
+if is_boto3_available():
+    import boto3  # noqa: F401
+def _create_iam_role_for_sagemaker(role_name):
+    iam_client = boto3.client("iam")
+    sagemaker_trust_policy = {
+        "Version": "2012-10-17",
+        "Statement": [
+            {"Effect": "Allow", "Principal": {"Service": "sagemaker.amazonaws.com"}, "Action": "sts:AssumeRole"}
+        ],
+    }
+    try:
+        # create the role, associated with the chosen trust policy
+        iam_client.create_role(
+            RoleName=role_name, AssumeRolePolicyDocument=json.dumps(sagemaker_trust_policy, indent=2)
+        )
+        policy_document = {
+            "Version": "2012-10-17",
+            "Statement": [
+                {
+                    "Effect": "Allow",
+                    "Action": [
+                        "sagemaker:*",
+                        "ecr:GetDownloadUrlForLayer",
+                        "ecr:BatchGetImage",
+                        "ecr:BatchCheckLayerAvailability",
+                        "ecr:GetAuthorizationToken",
+                        "cloudwatch:PutMetricData",
+                        "cloudwatch:GetMetricData",
+                        "cloudwatch:GetMetricStatistics",
+                        "cloudwatch:ListMetrics",
+                        "logs:CreateLogGroup",
+                        "logs:CreateLogStream",
+                        "logs:DescribeLogStreams",
+                        "logs:PutLogEvents",
+                        "logs:GetLogEvents",
+                        "s3:CreateBucket",
+                        "s3:ListBucket",
+                        "s3:GetBucketLocation",
+                        "s3:GetObject",
+                        "s3:PutObject",
+                    ],
+                    "Resource": "*",
+                }
+            ],
+        }
+        # attach policy to role
+        iam_client.put_role_policy(
+            RoleName=role_name,
+            PolicyName=f"{role_name}_policy_permission",
+            PolicyDocument=json.dumps(policy_document, indent=2),
+        )
+    except iam_client.exceptions.EntityAlreadyExistsException:
+        print(f"role {role_name} already exists. Using existing one")
+def _get_iam_role_arn(role_name):
+    iam_client = boto3.client("iam")
+    return iam_client.get_role(RoleName=role_name)["Role"]["Arn"]
+def get_sagemaker_input():
+    credentials_configuration = _ask_options(
+        "How do you want to authorize?",
+        ["AWS Profile", "Credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) "],
+        int,
+    )
+    aws_profile = None
+    if credentials_configuration == 0:
+        aws_profile = _ask_field("Enter your AWS Profile name: [default] ", default="default")
+        os.environ["AWS_PROFILE"] = aws_profile
+    else:
+        print(
+            "Note you will need to provide AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY when you launch you training script with,"
+            "`accelerate launch --aws_access_key_id XXX --aws_secret_access_key YYY`"
+        )
+        aws_access_key_id = _ask_field("AWS Access Key ID: ")
+        os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
+        aws_secret_access_key = _ask_field("AWS Secret Access Key: ")
+        os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
+    aws_region = _ask_field("Enter your AWS Region: [us-east-1]", default="us-east-1")
+    os.environ["AWS_DEFAULT_REGION"] = aws_region
+    role_management = _ask_options(
+        "Do you already have an IAM Role for executing Amazon SageMaker Training Jobs?",
+        ["Provide IAM Role name", "Create new IAM role using credentials"],
+        int,
+    )
+    if role_management == 0:
+        iam_role_name = _ask_field("Enter your IAM role name: ")
+    else:
+        iam_role_name = "accelerate_sagemaker_execution_role"
+        print(f'Accelerate will create an iam role "{iam_role_name}" using the provided credentials')
+        _create_iam_role_for_sagemaker(iam_role_name)
+    is_custom_docker_image = _ask_field(
+        "Do you want to use custom Docker image? [yes/NO]: ",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    docker_image = None
+    if is_custom_docker_image:
+        docker_image = _ask_field("Enter your Docker image: ", lambda x: str(x).lower())
+    is_sagemaker_inputs_enabled = _ask_field(
+        "Do you want to provide SageMaker input channels with data locations? [yes/NO]: ",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    sagemaker_inputs_file = None
+    if is_sagemaker_inputs_enabled:
+        sagemaker_inputs_file = _ask_field(
+            "Enter the path to the SageMaker inputs TSV file with columns (channel_name, data_location): ",
+            lambda x: str(x).lower(),
+        )
+    is_sagemaker_metrics_enabled = _ask_field(
+        "Do you want to enable SageMaker metrics? [yes/NO]: ",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    sagemaker_metrics_file = None
+    if is_sagemaker_metrics_enabled:
+        sagemaker_metrics_file = _ask_field(
+            "Enter the path to the SageMaker metrics TSV file with columns (metric_name, metric_regex): ",
+            lambda x: str(x).lower(),
+        )
+    distributed_type = _ask_options(
+        "What is the distributed mode?",
+        ["No distributed training", "Data parallelism"],
+        _convert_sagemaker_distributed_mode,
+    )
+    dynamo_config = {}
+    use_dynamo = _ask_field(
+        "Do you wish to optimize your script with torch dynamo?[yes/NO]:",
+        _convert_yes_no_to_bool,
+        default=False,
+        error_message="Please enter yes or no.",
+    )
+    if use_dynamo:
+        prefix = "dynamo_"
+        dynamo_config[prefix + "backend"] = _ask_options(
+            "Which dynamo backend would you like to use?",
+            [x.lower() for x in DYNAMO_BACKENDS],
+            _convert_dynamo_backend,
+            default=2,
+        )
+        use_custom_options = _ask_field(
+            "Do you want to customize the defaults sent to torch.compile? [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+        if use_custom_options:
+            dynamo_config[prefix + "mode"] = _ask_options(
+                "Which mode do you want to use?",
+                TORCH_DYNAMO_MODES,
+                lambda x: TORCH_DYNAMO_MODES[int(x)],
+                default="default",
+            )
+            dynamo_config[prefix + "use_fullgraph"] = _ask_field(
+                "Do you want the fullgraph mode or it is ok to break model into several subgraphs? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            dynamo_config[prefix + "use_dynamic"] = _ask_field(
+                "Do you want to enable dynamic shape tracing? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+            dynamo_config[prefix + "use_regional_compilation"] = _ask_field(
+                "Do you want to enable regional compilation? [yes/NO]: ",
+                _convert_yes_no_to_bool,
+                default=False,
+                error_message="Please enter yes or no.",
+            )
+    ec2_instance_query = "Which EC2 instance type you want to use for your training?"
+    if distributed_type != SageMakerDistributedType.NO:
+        ec2_instance_type = _ask_options(
+            ec2_instance_query, SAGEMAKER_PARALLEL_EC2_INSTANCES, lambda x: SAGEMAKER_PARALLEL_EC2_INSTANCES[int(x)]
+        )
+    else:
+        ec2_instance_query += "? [ml.p3.2xlarge]:"
+        ec2_instance_type = _ask_field(ec2_instance_query, lambda x: str(x).lower(), default="ml.p3.2xlarge")
+    debug = False
+    if distributed_type != SageMakerDistributedType.NO:
+        debug = _ask_field(
+            "Should distributed operations be checked while running for errors? This can avoid timeout issues but will be slower. [yes/NO]: ",
+            _convert_yes_no_to_bool,
+            default=False,
+            error_message="Please enter yes or no.",
+        )
+    num_machines = 1
+    if distributed_type in (SageMakerDistributedType.DATA_PARALLEL, SageMakerDistributedType.MODEL_PARALLEL):
+        num_machines = _ask_field(
+            "How many machines do you want use? [1]: ",
+            int,
+            default=1,
+        )
+    mixed_precision = _ask_options(
+        "Do you wish to use FP16 or BF16 (mixed precision)?",
+        ["no", "fp16", "bf16", "fp8"],
+        _convert_mixed_precision,
+    )
+    if use_dynamo and mixed_precision == "no":
+        print(
+            "Torch dynamo used without mixed precision requires TF32 to be efficient. Accelerate will enable it by default when launching your scripts."
+        )
+    return SageMakerConfig(
+        image_uri=docker_image,
+        compute_environment=ComputeEnvironment.AMAZON_SAGEMAKER,
+        distributed_type=distributed_type,
+        use_cpu=False,
+        dynamo_config=dynamo_config,
+        ec2_instance_type=ec2_instance_type,
+        profile=aws_profile,
+        region=aws_region,
+        iam_role_name=iam_role_name,
+        mixed_precision=mixed_precision,
+        num_machines=num_machines,
+        sagemaker_inputs_file=sagemaker_inputs_file,
+        sagemaker_metrics_file=sagemaker_metrics_file,
+        debug=debug,
+    )

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/config/update.py ADDED Viewed

	@@ -0,0 +1,63 @@

+#!/usr/bin/env python
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+from .config_args import default_config_file, load_config_from_file
+from .config_utils import SubcommandHelpFormatter
+description = "Update an existing config file with the latest defaults while maintaining the old configuration."
+def update_config(args):
+    """
+    Update an existing config file with the latest defaults while maintaining the old configuration.
+    """
+    config_file = args.config_file
+    if config_file is None and Path(default_config_file).exists():
+        config_file = default_config_file
+    elif not Path(config_file).exists():
+        raise ValueError(f"The passed config file located at {config_file} doesn't exist.")
+    config = load_config_from_file(config_file)
+    if config_file.endswith(".json"):
+        config.to_json_file(config_file)
+    else:
+        config.to_yaml_file(config_file)
+    return config_file
+def update_command_parser(parser, parents):
+    parser = parser.add_parser("update", parents=parents, help=description, formatter_class=SubcommandHelpFormatter)
+    parser.add_argument(
+        "--config_file",
+        default=None,
+        help=(
+            "The path to the config file to update. Will default to a file named default_config.yaml in the cache "
+            "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have "
+            "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed "
+            "with 'huggingface'."
+        ),
+    )
+    parser.set_defaults(func=update_config_command)
+    return parser
+def update_config_command(args):
+    config_file = update_config(args)
+    print(f"Successfully updated the configuration file at {config_file}.")

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .selection_menu import BulletMenu

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (284 Bytes). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/cursor.cpython-312.pyc ADDED Viewed

Binary file (3.06 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/helpers.cpython-312.pyc ADDED Viewed

Binary file (2.21 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/input.cpython-312.pyc ADDED Viewed

Binary file (3.17 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/keymap.cpython-312.pyc ADDED Viewed

Binary file (4.52 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/__pycache__/selection_menu.cpython-312.pyc ADDED Viewed

Binary file (7.47 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/cursor.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# Copyright 2022 The HuggingFace Team and Brian Chao. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+A utility for showing and hiding the terminal cursor on Windows and Linux, based on https://github.com/bchao1/bullet
+"""
+import os
+import sys
+from contextlib import contextmanager
+# Windows only
+if os.name == "nt":
+    import ctypes
+    import msvcrt  # noqa
+    class CursorInfo(ctypes.Structure):
+        # _fields is a specific attr expected by ctypes
+        _fields_ = [("size", ctypes.c_int), ("visible", ctypes.c_byte)]
+def hide_cursor():
+    if os.name == "nt":
+        ci = CursorInfo()
+        handle = ctypes.windll.kernel32.GetStdHandle(-11)
+        ctypes.windll.kernel32.GetConsoleCursorInfo(handle, ctypes.byref(ci))
+        ci.visible = False
+        ctypes.windll.kernel32.SetConsoleCursorInfo(handle, ctypes.byref(ci))
+    elif os.name == "posix":
+        sys.stdout.write("\033[?25l")
+        sys.stdout.flush()
+def show_cursor():
+    if os.name == "nt":
+        ci = CursorInfo()
+        handle = ctypes.windll.kernel32.GetStdHandle(-11)
+        ctypes.windll.kernel32.GetConsoleCursorInfo(handle, ctypes.byref(ci))
+        ci.visible = True
+        ctypes.windll.kernel32.SetConsoleCursorInfo(handle, ctypes.byref(ci))
+    elif os.name == "posix":
+        sys.stdout.write("\033[?25h")
+        sys.stdout.flush()
+@contextmanager
+def hide():
+    "Context manager to hide the terminal cursor"
+    try:
+        hide_cursor()
+        yield
+    finally:
+        show_cursor()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/helpers.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# Copyright 2022 The HuggingFace Team and Brian Chao. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+A variety of helper functions and constants when dealing with terminal menu choices, based on
+https://github.com/bchao1/bullet
+"""
+import enum
+import shutil
+import sys
+TERMINAL_WIDTH, _ = shutil.get_terminal_size()
+CURSOR_TO_CHAR = {"UP": "A", "DOWN": "B", "RIGHT": "C", "LEFT": "D"}
+class Direction(enum.Enum):
+    UP = 0
+    DOWN = 1
+def forceWrite(content, end=""):
+    sys.stdout.write(str(content) + end)
+    sys.stdout.flush()
+def writeColor(content, color, end=""):
+    forceWrite(f"\u001b[{color}m{content}\u001b[0m", end)
+def reset_cursor():
+    forceWrite("\r")
+def move_cursor(num_lines: int, direction: str):
+    forceWrite(f"\033[{num_lines}{CURSOR_TO_CHAR[direction.upper()]}")
+def clear_line():
+    forceWrite(" " * TERMINAL_WIDTH)
+    reset_cursor()
+def linebreak():
+    reset_cursor()
+    forceWrite("-" * TERMINAL_WIDTH)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/input.py ADDED Viewed

	@@ -0,0 +1,84 @@

+# Copyright 2022 The HuggingFace Team and Brian Chao. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This file contains utilities for handling input from the user and registering specific keys to specific functions,
+based on https://github.com/bchao1/bullet
+"""
+from .keymap import KEYMAP, get_character
+def mark(key: str):
+    """
+    Mark the function with the key code so it can be handled in the register
+    """
+    def decorator(func):
+        handle = getattr(func, "handle_key", [])
+        handle += [key]
+        func.handle_key = handle
+        return func
+    return decorator
+def mark_multiple(*keys: list[str]):
+    """
+    Mark the function with the key codes so it can be handled in the register
+    """
+    def decorator(func):
+        handle = getattr(func, "handle_key", [])
+        handle += keys
+        func.handle_key = handle
+        return func
+    return decorator
+class KeyHandler(type):
+    """
+    Metaclass that adds the key handlers to the class
+    """
+    def __new__(cls, name, bases, attrs):
+        new_cls = super().__new__(cls, name, bases, attrs)
+        if not hasattr(new_cls, "key_handler"):
+            new_cls.key_handler = {}
+        new_cls.handle_input = KeyHandler.handle_input
+        for value in attrs.values():
+            handled_keys = getattr(value, "handle_key", [])
+            for key in handled_keys:
+                new_cls.key_handler[key] = value
+        return new_cls
+    @staticmethod
+    def handle_input(cls):
+        "Finds and returns the selected character if it exists in the handler"
+        char = get_character()
+        if char != KEYMAP["undefined"]:
+            char = ord(char)
+        handler = cls.key_handler.get(char)
+        if handler:
+            cls.current_selection = char
+            return handler(cls)
+        else:
+            return None
+def register(cls):
+    """Adds KeyHandler metaclass to the class"""
+    return KeyHandler(cls.__name__, cls.__bases__, cls.__dict__.copy())

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/keymap.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# Copyright 2022 The HuggingFace Team and Brian Chao. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Utilities relating to parsing raw characters from the keyboard, based on https://github.com/bchao1/bullet
+"""
+import os
+import string
+import sys
+ARROW_KEY_FLAG = 1 << 8
+KEYMAP = {
+    "tab": ord("\t"),
+    "newline": ord("\r"),
+    "esc": 27,
+    "up": 65 + ARROW_KEY_FLAG,
+    "down": 66 + ARROW_KEY_FLAG,
+    "right": 67 + ARROW_KEY_FLAG,
+    "left": 68 + ARROW_KEY_FLAG,
+    "mod_int": 91,
+    "undefined": sys.maxsize,
+    "interrupt": 3,
+    "insert": 50,
+    "delete": 51,
+    "pg_up": 53,
+    "pg_down": 54,
+}
+KEYMAP["arrow_begin"] = KEYMAP["up"]
+KEYMAP["arrow_end"] = KEYMAP["left"]
+if sys.platform == "win32":
+    WIN_CH_BUFFER = []
+    WIN_KEYMAP = {
+        b"\xe0H": KEYMAP["up"] - ARROW_KEY_FLAG,
+        b"\x00H": KEYMAP["up"] - ARROW_KEY_FLAG,
+        b"\xe0P": KEYMAP["down"] - ARROW_KEY_FLAG,
+        b"\x00P": KEYMAP["down"] - ARROW_KEY_FLAG,
+        b"\xe0M": KEYMAP["right"] - ARROW_KEY_FLAG,
+        b"\x00M": KEYMAP["right"] - ARROW_KEY_FLAG,
+        b"\xe0K": KEYMAP["left"] - ARROW_KEY_FLAG,
+        b"\x00K": KEYMAP["left"] - ARROW_KEY_FLAG,
+    }
+for i in range(10):
+    KEYMAP[str(i)] = ord(str(i))
+def get_raw_chars():
+    "Gets raw characters from inputs"
+    if os.name == "nt":
+        import msvcrt
+        encoding = "mbcs"
+        # Flush the keyboard buffer
+        while msvcrt.kbhit():
+            msvcrt.getch()
+        if len(WIN_CH_BUFFER) == 0:
+            # Read the keystroke
+            ch = msvcrt.getch()
+            # If it is a prefix char, get second part
+            if ch in (b"\x00", b"\xe0"):
+                ch2 = ch + msvcrt.getch()
+                # Translate actual Win chars to bullet char types
+                try:
+                    chx = chr(WIN_KEYMAP[ch2])
+                    WIN_CH_BUFFER.append(chr(KEYMAP["mod_int"]))
+                    WIN_CH_BUFFER.append(chx)
+                    if ord(chx) in (
+                        KEYMAP["insert"] - 1 << 9,
+                        KEYMAP["delete"] - 1 << 9,
+                        KEYMAP["pg_up"] - 1 << 9,
+                        KEYMAP["pg_down"] - 1 << 9,
+                    ):
+                        WIN_CH_BUFFER.append(chr(126))
+                    ch = chr(KEYMAP["esc"])
+                except KeyError:
+                    ch = ch2[1]
+            else:
+                ch = ch.decode(encoding)
+        else:
+            ch = WIN_CH_BUFFER.pop(0)
+    elif os.name == "posix":
+        import termios
+        import tty
+        fd = sys.stdin.fileno()
+        old_settings = termios.tcgetattr(fd)
+        try:
+            tty.setraw(fd)
+            ch = sys.stdin.read(1)
+        finally:
+            termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
+    return ch
+def get_character():
+    "Gets a character from the keyboard and returns the key code"
+    char = get_raw_chars()
+    if ord(char) in [KEYMAP["interrupt"], KEYMAP["newline"]]:
+        return char
+    elif ord(char) == KEYMAP["esc"]:
+        combo = get_raw_chars()
+        if ord(combo) == KEYMAP["mod_int"]:
+            key = get_raw_chars()
+            if ord(key) >= KEYMAP["arrow_begin"] - ARROW_KEY_FLAG and ord(key) <= KEYMAP["arrow_end"] - ARROW_KEY_FLAG:
+                return chr(ord(key) + ARROW_KEY_FLAG)
+            else:
+                return KEYMAP["undefined"]
+        else:
+            return get_raw_chars()
+    else:
+        if char in string.printable:
+            return char
+        else:
+            return KEYMAP["undefined"]

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/commands/menu/selection_menu.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# Copyright 2022 The HuggingFace Team and Brian Chao. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Main driver for the selection menu, based on https://github.com/bchao1/bullet
+"""
+import builtins
+import sys
+from typing import Optional
+from ...utils.imports import _is_package_available
+from . import cursor, input
+from .helpers import Direction, clear_line, forceWrite, linebreak, move_cursor, reset_cursor, writeColor
+from .keymap import KEYMAP
+in_colab = False
+try:
+    in_colab = _is_package_available("google.colab")
+except ModuleNotFoundError:
+    pass
+@input.register
+class BulletMenu:
+    """
+    A CLI menu to select a choice from a list of choices using the keyboard.
+    """
+    def __init__(self, prompt: Optional[str] = None, choices: list = []):
+        self.position = 0
+        self.choices = choices
+        self.prompt = prompt
+        if sys.platform == "win32":
+            self.arrow_char = "*"
+        else:
+            self.arrow_char = "➔ "
+    def write_choice(self, index, end: str = ""):
+        if sys.platform != "win32":
+            writeColor(self.choices[index], 32, end)
+        else:
+            forceWrite(self.choices[index], end)
+    def print_choice(self, index: int):
+        "Prints the choice at the given index"
+        if index == self.position:
+            forceWrite(f" {self.arrow_char} ")
+            self.write_choice(index)
+        else:
+            forceWrite(f"    {self.choices[index]}")
+        reset_cursor()
+    def move_direction(self, direction: Direction, num_spaces: int = 1):
+        "Should not be directly called, used to move a direction of either up or down"
+        old_position = self.position
+        if direction == Direction.DOWN:
+            if self.position + 1 >= len(self.choices):
+                return
+            self.position += num_spaces
+        else:
+            if self.position - 1 < 0:
+                return
+            self.position -= num_spaces
+        clear_line()
+        self.print_choice(old_position)
+        move_cursor(num_spaces, direction.name)
+        self.print_choice(self.position)
+    @input.mark(KEYMAP["up"])
+    def move_up(self):
+        self.move_direction(Direction.UP)
+    @input.mark(KEYMAP["down"])
+    def move_down(self):
+        self.move_direction(Direction.DOWN)
+    @input.mark(KEYMAP["newline"])
+    def select(self):
+        move_cursor(len(self.choices) - self.position, "DOWN")
+        return self.position
+    @input.mark(KEYMAP["interrupt"])
+    def interrupt(self):
+        move_cursor(len(self.choices) - self.position, "DOWN")
+        raise KeyboardInterrupt
+    @input.mark_multiple(*[KEYMAP[str(number)] for number in range(10)])
+    def select_row(self):
+        index = int(chr(self.current_selection))
+        movement = index - self.position
+        if index == self.position:
+            return
+        if index < len(self.choices):
+            if self.position > index:
+                self.move_direction(Direction.UP, -movement)
+            elif self.position < index:
+                self.move_direction(Direction.DOWN, movement)
+            else:
+                return
+        else:
+            return
+    def run(self, default_choice: int = 0):
+        "Start the menu and return the selected choice"
+        if self.prompt:
+            linebreak()
+            forceWrite(self.prompt, "\n")
+            if in_colab:
+                forceWrite("Please input a choice index (starting from 0), and press enter", "\n")
+            else:
+                forceWrite("Please select a choice using the arrow or number keys, and selecting with enter", "\n")
+        self.position = default_choice
+        for i in range(len(self.choices)):
+            self.print_choice(i)
+            forceWrite("\n")
+        move_cursor(len(self.choices) - self.position, "UP")
+        with cursor.hide():
+            while True:
+                if in_colab:
+                    try:
+                        choice = int(builtins.input())
+                    except ValueError:
+                        choice = default_choice
+                else:
+                    choice = self.handle_input()
+                if choice is not None:
+                    reset_cursor()
+                    for _ in range(len(self.choices) + 1):
+                        move_cursor(1, "UP")
+                        clear_line()
+                    self.write_choice(choice, "\n")
+                    return choice

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.78 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/__pycache__/examples.cpython-312.pyc ADDED Viewed

Binary file (6.93 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/__pycache__/testing.cpython-312.pyc ADDED Viewed

Binary file (42.3 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/__pycache__/training.cpython-312.pyc ADDED Viewed

Binary file (7.46 kB). View file

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/test_ddp_comm_hook.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from accelerate import Accelerator, DDPCommunicationHookType, DistributedDataParallelKwargs, PartialState
+from accelerate.utils import is_hpu_available
+class MockModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        torch.manual_seed(0)
+        self.p = torch.nn.Parameter(torch.randn(40, 20))
+    def forward(self, x, rank):
+        return self.p * (x ** (1 + rank))
+def _run_and_get_grads(model, rank):
+    torch.manual_seed(2024)
+    input = torch.randn(40, 20)
+    output = model(input, rank)
+    output.mean().backward()
+    param = next(model.parameters())
+    return param.grad
+def test_ddp_comm_hook(comm_hook, comm_wrapper, comm_state_option):
+    ddp_kwargs = DistributedDataParallelKwargs(
+        comm_hook=comm_hook,
+        comm_wrapper=comm_wrapper,
+        comm_state_option=comm_state_option,
+    )
+    accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
+    model = accelerator.prepare(MockModel())
+    hook_grads = _run_and_get_grads(model, accelerator.local_process_index)
+    reference_model = torch.nn.parallel.DistributedDataParallel(
+        MockModel().to(accelerator.device),
+        device_ids=[accelerator.local_process_index],
+        output_device=accelerator.local_process_index,
+    )
+    reference_grads = _run_and_get_grads(reference_model, accelerator.local_process_index)
+    torch.testing.assert_close(hook_grads, reference_grads, rtol=1e-2, atol=1e-2)
+def main():
+    for comm_hook, comm_wrapper, comm_state_option in [
+        (DDPCommunicationHookType.NO, DDPCommunicationHookType.NO, {}),
+        (DDPCommunicationHookType.FP16, DDPCommunicationHookType.NO, {}),
+        (DDPCommunicationHookType.BF16, DDPCommunicationHookType.NO, {}),
+        (DDPCommunicationHookType.POWER_SGD, DDPCommunicationHookType.NO, {}),
+        (DDPCommunicationHookType.POWER_SGD, DDPCommunicationHookType.FP16, {}),
+        (DDPCommunicationHookType.POWER_SGD, DDPCommunicationHookType.BF16, {}),
+        (DDPCommunicationHookType.POWER_SGD, DDPCommunicationHookType.NO, {"matrix_approximation_rank": 2}),
+        (DDPCommunicationHookType.BATCHED_POWER_SGD, DDPCommunicationHookType.NO, {}),
+        (DDPCommunicationHookType.BATCHED_POWER_SGD, DDPCommunicationHookType.FP16, {}),
+        (DDPCommunicationHookType.BATCHED_POWER_SGD, DDPCommunicationHookType.BF16, {}),
+    ]:
+        if is_hpu_available():
+            HPU_UNSUPPORTED_COMM_HOOKS = {DDPCommunicationHookType.FP16, DDPCommunicationHookType.BF16}
+            if comm_hook in HPU_UNSUPPORTED_COMM_HOOKS or comm_wrapper in HPU_UNSUPPORTED_COMM_HOOKS:
+                print(f"Skipping test DDP comm hook: {comm_hook}, comm wrapper: {comm_wrapper} on HPU")
+                continue
+        print(f"Test DDP comm hook: {comm_hook}, comm wrapper: {comm_wrapper}")
+        test_ddp_comm_hook(comm_hook, comm_wrapper, comm_state_option)
+    PartialState().destroy_process_group()
+if __name__ == "__main__":
+    main()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/test_distributed_data_loop.py ADDED Viewed

	@@ -0,0 +1,410 @@

+#!/usr/bin/env python
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+import tempfile
+import warnings
+from unittest.mock import Mock
+import torch
+from torch.utils.data import (
+    BatchSampler,
+    DataLoader,
+    Dataset,
+    IterableDataset,
+    RandomSampler,
+    TensorDataset,
+    default_collate,
+)
+from accelerate.accelerator import Accelerator, DataLoaderConfiguration
+from accelerate.utils.dataclasses import DistributedType
+NUM_ELEMENTS = 22
+NUM_WORKERS = 4
+BATCH_SIZE = 4
+class DummyDataset(Dataset):
+    def __len__(self):
+        return NUM_ELEMENTS
+    def __getitem__(self, index):
+        squeeze = False
+        if isinstance(index, int):
+            index = [index]
+            squeeze = True
+        elif isinstance(index, slice):
+            index = list(range(*index.indices(self.size)))
+        else:
+            index = list(index)
+        batch = [{"index": i, "label": i % 2, "random_augmentation": torch.rand(1).item()} for i in index]
+        if squeeze:
+            batch = batch[0]
+        return batch
+class DummyIterableDataset(IterableDataset):
+    def __init__(self, data):
+        self.data = data
+    def __iter__(self):
+        yield from self.data
+def create_accelerator(even_batches=True):
+    dataloader_config = DataLoaderConfiguration(even_batches=even_batches)
+    accelerator = Accelerator(dataloader_config=dataloader_config)
+    assert accelerator.num_processes == 2, "this script expects that two GPUs are available"
+    return accelerator
+def create_dataloader(
+    accelerator: Accelerator, dataset_size: int, batch_size: int, iterable: bool = False, shuffle: bool = False
+):
+    """
+    Create a simple DataLoader to use during the test cases
+    """
+    values = torch.as_tensor(range(dataset_size))
+    if shuffle:
+        values = values[torch.randperm(values.size(0))]
+    if iterable:
+        dataset = DummyIterableDataset(values)
+    else:
+        dataset = TensorDataset(torch.as_tensor(range(dataset_size)))
+    dl = DataLoader(dataset, batch_size=batch_size)
+    dl = accelerator.prepare(dl)
+    return dl
+def verify_dataloader_batch_sizes(
+    accelerator: Accelerator,
+    dataset_size: int,
+    batch_size: int,
+    process_0_expected_batch_sizes: list[int],
+    process_1_expected_batch_sizes: list[int],
+):
+    """
+    A helper function for verifying the batch sizes coming from a prepared dataloader in each process
+    """
+    dl = create_dataloader(accelerator=accelerator, dataset_size=dataset_size, batch_size=batch_size)
+    batch_sizes = [len(batch[0]) for batch in dl]
+    if accelerator.process_index == 0:
+        assert batch_sizes == process_0_expected_batch_sizes
+    elif accelerator.process_index == 1:
+        assert batch_sizes == process_1_expected_batch_sizes
+def test_default_ensures_even_batch_sizes():
+    accelerator = create_accelerator()
+    # without padding, we would expect a different number of batches
+    verify_dataloader_batch_sizes(
+        accelerator,
+        dataset_size=3,
+        batch_size=1,
+        process_0_expected_batch_sizes=[1, 1],
+        process_1_expected_batch_sizes=[1, 1],
+    )
+    # without padding, we would expect the same number of batches, but different sizes
+    verify_dataloader_batch_sizes(
+        accelerator,
+        dataset_size=7,
+        batch_size=2,
+        process_0_expected_batch_sizes=[2, 2],
+        process_1_expected_batch_sizes=[2, 2],
+    )
+def test_can_disable_even_batches():
+    accelerator = create_accelerator(even_batches=False)
+    verify_dataloader_batch_sizes(
+        accelerator,
+        dataset_size=3,
+        batch_size=1,
+        process_0_expected_batch_sizes=[1, 1],
+        process_1_expected_batch_sizes=[1],
+    )
+    verify_dataloader_batch_sizes(
+        accelerator,
+        dataset_size=7,
+        batch_size=2,
+        process_0_expected_batch_sizes=[2, 2],
+        process_1_expected_batch_sizes=[2, 1],
+    )
+def test_can_join_uneven_inputs():
+    accelerator = create_accelerator(even_batches=False)
+    model = torch.nn.Linear(1, 1)
+    ddp_model = accelerator.prepare(model)
+    dl = create_dataloader(accelerator, dataset_size=3, batch_size=1)
+    batch_idxs = []
+    with accelerator.join_uneven_inputs([ddp_model]):
+        for batch_idx, batch in enumerate(dl):
+            output = ddp_model(batch[0].float())
+            loss = output.sum()
+            loss.backward()
+            batch_idxs.append(batch_idx)
+    accelerator.wait_for_everyone()
+    if accelerator.process_index == 0:
+        assert batch_idxs == [0, 1]
+    elif accelerator.process_index == 1:
+        assert batch_idxs == [0]
+def test_join_raises_warning_for_non_ddp_distributed(accelerator):
+    with warnings.catch_warnings(record=True) as w:
+        with accelerator.join_uneven_inputs([Mock()]):
+            pass
+        assert issubclass(w[-1].category, UserWarning)
+        assert "only supported for multi-GPU" in str(w[-1].message)
+def test_join_can_override_even_batches():
+    default_even_batches = True
+    overridden_even_batches = False
+    accelerator = create_accelerator(even_batches=default_even_batches)
+    model = torch.nn.Linear(1, 1)
+    ddp_model = accelerator.prepare(model)
+    train_dl = create_dataloader(accelerator, dataset_size=3, batch_size=1)
+    valid_dl = create_dataloader(accelerator, dataset_size=3, batch_size=1)
+    with accelerator.join_uneven_inputs([ddp_model], even_batches=overridden_even_batches):
+        train_dl_overridden_value = train_dl.batch_sampler.even_batches
+        valid_dl_overridden_value = valid_dl.batch_sampler.even_batches
+    assert train_dl_overridden_value == overridden_even_batches
+    assert valid_dl_overridden_value == overridden_even_batches
+    assert train_dl.batch_sampler.even_batches == default_even_batches
+    assert valid_dl.batch_sampler.even_batches == default_even_batches
+def test_join_can_override_for_mixed_type_dataloaders():
+    default_even_batches = True
+    overridden_even_batches = False
+    accelerator = create_accelerator(even_batches=default_even_batches)
+    model = torch.nn.Linear(1, 1)
+    ddp_model = accelerator.prepare(model)
+    create_dataloader(accelerator, dataset_size=3, batch_size=1, iterable=True)
+    batch_dl = create_dataloader(accelerator, dataset_size=3, batch_size=1)
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore")
+        try:
+            with accelerator.join_uneven_inputs([ddp_model], even_batches=overridden_even_batches):
+                batch_dl_overridden_value = batch_dl.batch_sampler.even_batches
+        except AttributeError:
+            # ensure attribute error is not raised when processing iterable dl
+            raise AssertionError
+    assert batch_dl_overridden_value == overridden_even_batches
+    assert batch_dl.batch_sampler.even_batches == default_even_batches
+def test_join_raises_warning_for_iterable_when_overriding_even_batches():
+    accelerator = create_accelerator()
+    model = torch.nn.Linear(1, 1)
+    ddp_model = accelerator.prepare(model)
+    create_dataloader(accelerator, dataset_size=3, batch_size=1, iterable=True)
+    with warnings.catch_warnings(record=True) as w:
+        with accelerator.join_uneven_inputs([ddp_model], even_batches=False):
+            pass
+        assert issubclass(w[-1].category, UserWarning)
+        assert "only supported for map-style datasets" in str(w[-1].message)
+def test_pickle_accelerator():
+    accelerator = create_accelerator()
+    data_loader = create_dataloader(accelerator, dataset_size=32, batch_size=4)
+    _ = accelerator.prepare(data_loader)
+    pickled_accelerator = pickle.dumps(accelerator)
+    unpickled_accelerator = pickle.loads(pickled_accelerator)
+    # TODO: Maybe this should be implemented as __eq__ for AcceleratorState?
+    assert accelerator.state.__dict__ == unpickled_accelerator.state.__dict__
+def test_data_loader(data_loader, accelerator):
+    # Prepare the DataLoader
+    data_loader = accelerator.prepare(data_loader)
+    all_examples = []
+    for i, batch in enumerate(data_loader):
+        index, _ = accelerator.gather_for_metrics((batch["index"], batch["label"]))
+        all_examples.extend(index.detach().cpu().numpy().tolist())
+    # Sort the examples
+    sorted_all_examples = sorted(all_examples)
+    # Check if all elements are present in the sorted list of iterated samples
+    assert len(set(sorted_all_examples)) == NUM_ELEMENTS, (
+        "Not all the dataset elements have been iterated in an epoch due to duplication of samples across processes."
+    )
+def test_stateful_dataloader(accelerator):
+    """
+    Tests that a stateful dataloader can be iterated over, saved after a few batches using `load_state_dict`, and then
+    resumed from the saved state.
+    The result should be the same as the rest of the data that iterated over after saving.
+    """
+    old_dataloader_config = accelerator.dataloader_config
+    try:
+        accelerator.dataloader_config = DataLoaderConfiguration(use_stateful_dataloader=True)
+        prepared_dl = create_dataloader(
+            accelerator, dataset_size=32 * accelerator.num_processes, batch_size=4, iterable=True, shuffle=True
+        )
+        untrained_batches = []
+        # Calculate what step that will be
+        total_batches = 32 * accelerator.num_processes // (4 * accelerator.num_processes)
+        last_batch_num = total_batches - 1
+        for step, batch in enumerate(prepared_dl):
+            # Step just before
+            if step == last_batch_num - 1:
+                state_dict = prepared_dl.state_dict()
+            if step >= last_batch_num:
+                # Otherwise grab the "unseen" batches
+                untrained_batches.append(batch)
+        not_skipped_batches = accelerator.gather(untrained_batches)
+        prepared_dl.load_state_dict(state_dict)
+        resumed_batches = []
+        for batch in prepared_dl:
+            resumed_batches.append(batch)
+        resumed_batches = accelerator.gather(resumed_batches)
+        for b1, b2 in zip(not_skipped_batches, resumed_batches):
+            for v1, v2 in zip(b1, b2):
+                assert torch.equal(v1, v2), f"Batch {b1} and {b2} are not equal"
+    finally:
+        accelerator.dataloader_config = old_dataloader_config
+def test_stateful_dataloader_save_state(accelerator):
+    """
+    Tests that a stateful dataloader can be iterated over, saved after a few batches using `Accelerator.save_state`,
+    and then resumed from the saved state.
+    The result should be the same as the rest of the data that iterated over after saving.
+    """
+    old_dataloader_config = accelerator.dataloader_config
+    try:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            accelerator.dataloader_config = DataLoaderConfiguration(use_stateful_dataloader=True)
+            prepared_dl = create_dataloader(
+                accelerator, dataset_size=32 * accelerator.num_processes, batch_size=4, iterable=True, shuffle=True
+            )
+            untrained_batches = []
+            # Calculate what step that will be
+            total_batches = 32 * accelerator.num_processes // (4 * accelerator.num_processes)
+            last_batch_num = total_batches - 1
+            for step, batch in enumerate(prepared_dl):
+                # Step just before
+                if step == last_batch_num - 1:
+                    accelerator.save_state(tmpdir)
+                if step >= last_batch_num:
+                    # Otherwise grab the "unseen" batches
+                    untrained_batches.append(batch)
+            not_skipped_batches = accelerator.gather(untrained_batches)
+            accelerator.load_state(tmpdir)
+            resumed_batches = []
+            for batch in prepared_dl:
+                resumed_batches.append(batch)
+            resumed_batches = accelerator.gather(resumed_batches)
+            for b1, b2 in zip(not_skipped_batches, resumed_batches):
+                for v1, v2 in zip(b1, b2):
+                    assert torch.equal(v1, v2), f"Batch {b1} and {b2} are not equal"
+    finally:
+        accelerator.dataloader_config = old_dataloader_config
+def main():
+    accelerator = create_accelerator()
+    torch.manual_seed(accelerator.process_index)
+    accelerator.print("Test that even_batches variable ensures uniform batches across processes")
+    test_default_ensures_even_batch_sizes()
+    accelerator.print("Run tests with even_batches disabled")
+    test_can_disable_even_batches()
+    accelerator.print("Test joining uneven inputs")
+    test_can_join_uneven_inputs()
+    accelerator.print("Test overriding even_batches when joining uneven inputs")
+    test_join_can_override_even_batches()
+    accelerator.print("Test overriding even_batches for mixed dataloader types")
+    test_join_can_override_for_mixed_type_dataloaders()
+    accelerator.print("Test overriding even_batches raises a warning for iterable dataloaders")
+    test_join_raises_warning_for_iterable_when_overriding_even_batches()
+    accelerator.print("Test join with non DDP distributed raises warning")
+    original_state = accelerator.state.distributed_type
+    accelerator.state.distributed_type = DistributedType.FSDP
+    test_join_raises_warning_for_non_ddp_distributed(accelerator)
+    accelerator.state.distributed_type = original_state
+    accelerator.print("Test pickling an accelerator")
+    test_pickle_accelerator()
+    dataset = DummyDataset()
+    accelerator.print("Test DataLoader with shuffle=False")
+    loader = DataLoader(dataset, shuffle=False, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
+    test_data_loader(loader, accelerator)
+    accelerator.print("Test DataLoader with shuffle=True")
+    loader = DataLoader(dataset, shuffle=True, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)
+    test_data_loader(loader, accelerator)
+    accelerator.print("Test DataLoader with batch_sampler")
+    sampler = BatchSampler(RandomSampler(dataset), batch_size=BATCH_SIZE, drop_last=False)
+    loader = DataLoader(dataset, batch_sampler=sampler, num_workers=NUM_WORKERS)
+    test_data_loader(loader, accelerator)
+    accelerator.print("Test DataLoader with sampler as an instance of `BatchSampler`")
+    sampler = BatchSampler(RandomSampler(dataset), batch_size=BATCH_SIZE, drop_last=False)
+    loader = DataLoader(dataset, sampler=sampler, batch_size=None, collate_fn=default_collate, num_workers=NUM_WORKERS)
+    test_data_loader(loader, accelerator)
+    test_stateful_dataloader(accelerator)
+    test_stateful_dataloader_save_state(accelerator)
+    accelerator.end_training()
+if __name__ == "__main__":
+    main()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/test_merge_weights.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import gc
+import logging
+import shutil
+from pathlib import Path
+import torch
+from safetensors.torch import load_file
+from torch.distributed.fsdp.fully_sharded_data_parallel import ShardingStrategy, StateDictType
+from torch.utils.data import DataLoader
+from accelerate import Accelerator, FullyShardedDataParallelPlugin
+from accelerate.commands.merge import merge_command, merge_command_parser
+from accelerate.state import AcceleratorState
+from accelerate.test_utils import torch_device
+from accelerate.test_utils.training import RegressionDataset
+from accelerate.utils import merge_fsdp_weights, patch_environment, save_fsdp_model
+logging.basicConfig(level=logging.INFO)
+parser = merge_command_parser()
+class TinyModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear1 = torch.nn.Linear(16, 16)
+        self.activation = torch.nn.ReLU()
+        self.linear2 = torch.nn.Linear(16, 16)
+        self.softmax = torch.nn.Softmax()
+    def forward(self, x):
+        return self.linear2(self.activation(self.linear1(x)))
+def setup():
+    if AcceleratorState._shared_state != {}:
+        AcceleratorState()._reset_state()
+    plugin = FullyShardedDataParallelPlugin(
+        sharding_strategy=ShardingStrategy.FULL_SHARD, state_dict_type=StateDictType.SHARDED_STATE_DICT
+    )
+    model = TinyModel()
+    with patch_environment(fsdp_auto_wrap_policy="SIZE_BASED_WRAP"):
+        plugin.set_auto_wrap_policy(model)
+    accelerator = Accelerator(fsdp_plugin=plugin)
+    model = accelerator.prepare(model)
+    return model, plugin, accelerator
+def mock_training(accelerator, model):
+    train_set = RegressionDataset(length=128, seed=42)
+    train_dl = DataLoader(train_set, batch_size=16, shuffle=False)
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
+    train_dl, model, optimizer = accelerator.prepare(train_dl, model, optimizer)
+    for _ in range(3):
+        for batch in train_dl:
+            model.zero_grad()
+            output = model(batch["x"])
+            loss = torch.nn.functional.mse_loss(output, batch["y"])
+            accelerator.backward(loss)
+            optimizer.step()
+    return model
+def check_weights(operation, state_1, state_2):
+    for weight_1, weight_2 in zip(state_1.values(), state_2.values()):
+        if operation == "same":
+            assert torch.allclose(weight_1, weight_2)
+        else:
+            assert not torch.allclose(weight_1, weight_2)
+def check_safetensors_weights(path, model):
+    safe_state_dict = load_file(path / "model.safetensors")
+    safe_loaded_model = TinyModel().to(torch_device)
+    check_weights("diff", model.state_dict(), safe_loaded_model.state_dict())
+    safe_loaded_model.load_state_dict(safe_state_dict)
+    check_weights("same", model.state_dict(), safe_loaded_model.state_dict())
+def check_pytorch_weights(path, model):
+    nonsafe_state_dict = torch.load(path / "pytorch_model.bin", weights_only=True)
+    nonsafe_loaded_model = TinyModel().to(torch_device)
+    check_weights("diff", model.state_dict(), nonsafe_loaded_model.state_dict())
+    nonsafe_loaded_model.load_state_dict(nonsafe_state_dict)
+    check_weights("same", model.state_dict(), nonsafe_loaded_model.state_dict())
+def test_merge_weights_safetensors(model, path):
+    # Should now be saved at `path/merged.safetensors`
+    merge_fsdp_weights(path / "pytorch_model_fsdp_0", path, safe_serialization=True)
+    check_safetensors_weights(path, model)
+def test_merge_weights_command_safetensors(model, path):
+    args = parser.parse_args([str(path / "pytorch_model_fsdp_0"), str(path)])
+    merge_command(args)
+    check_safetensors_weights(path, model)
+def test_merge_weights_pytorch(model, path):
+    # Should now be saved at `path/merged.bin`
+    merge_fsdp_weights(path / "pytorch_model_fsdp_0", path, safe_serialization=False)
+    check_pytorch_weights(path, model)
+def test_merge_weights_command_pytorch(model, path):
+    args = parser.parse_args([str(path / "pytorch_model_fsdp_0"), str(path), "--unsafe_serialization"])
+    merge_command(args)
+    check_pytorch_weights(path, model)
+if __name__ == "__main__":
+    # Note this test requires at least two accelerators!
+    model, plugin, accelerator = setup()
+    if accelerator.num_processes > 1:
+        try:
+            # Initial setup for things
+            out_path = Path("test_merge_weights_fsdp_weights")
+            if not out_path.exists():
+                out_path.mkdir(parents=True, exist_ok=True)
+            # Train briefly once weights aren't the baseline
+            model = mock_training(accelerator, model)
+            accelerator.wait_for_everyone()
+            gc.collect()  # Needed for some lingering refs after training
+            save_fsdp_model(plugin, accelerator, model, out_path)
+            accelerator.wait_for_everyone()
+            # Finally we can test
+            test_merge_weights_safetensors(model, out_path)
+            test_merge_weights_command_safetensors(model, out_path)
+            test_merge_weights_pytorch(model, out_path)
+            test_merge_weights_command_pytorch(model, out_path)
+        except Exception:
+            raise
+        finally:
+            # Cleanup in case of any failures
+            if accelerator.is_main_process:
+                shutil.rmtree(out_path)
+            accelerator.wait_for_everyone()
+            accelerator.end_training()

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/test_utils/scripts/test_notebook.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Test file to ensure that in general certain situational setups for notebooks work.
+"""
+import os
+import time
+from multiprocessing import Queue
+from pytest import mark, raises
+from torch.distributed.elastic.multiprocessing.errors import ChildFailedError
+from accelerate import PartialState, notebook_launcher
+from accelerate.test_utils import require_bnb
+from accelerate.utils import is_bnb_available
+def basic_function():
+    # Just prints the PartialState
+    print(f"PartialState:\n{PartialState()}")
+def tough_nut_function(queue: Queue):
+    if queue.empty():
+        return
+    trial = queue.get()
+    if trial > 0:
+        queue.put(trial - 1)
+        raise RuntimeError("The nut hasn't cracked yet! Try again.")
+    print(f"PartialState:\n{PartialState()}")
+def bipolar_sleep_function(sleep_sec: int):
+    state = PartialState()
+    if state.process_index % 2 == 0:
+        raise RuntimeError("I'm an even process. I don't like to sleep.")
+    else:
+        time.sleep(sleep_sec)
+NUM_PROCESSES = int(os.environ.get("ACCELERATE_NUM_PROCESSES", 1))
+def test_can_initialize():
+    notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES)
+@mark.skipif(NUM_PROCESSES < 2, reason="Need at least 2 processes to test static rendezvous backends")
+def test_static_rdzv_backend():
+    notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES, rdzv_backend="static")
+@mark.skipif(NUM_PROCESSES < 2, reason="Need at least 2 processes to test c10d rendezvous backends")
+def test_c10d_rdzv_backend():
+    notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES, rdzv_backend="c10d")
+@mark.skipif(NUM_PROCESSES < 2, reason="Need at least 2 processes to test fault tolerance")
+def test_fault_tolerant(max_restarts: int = 3):
+    queue = Queue()
+    queue.put(max_restarts)
+    notebook_launcher(tough_nut_function, (queue,), num_processes=NUM_PROCESSES, max_restarts=max_restarts)
+@mark.skipif(NUM_PROCESSES < 2, reason="Need at least 2 processes to test monitoring")
+def test_monitoring(monitor_interval: float = 0.01, sleep_sec: int = 100):
+    start_time = time.time()
+    with raises(ChildFailedError, match="I'm an even process. I don't like to sleep."):
+        notebook_launcher(
+            bipolar_sleep_function,
+            (sleep_sec,),
+            num_processes=NUM_PROCESSES,
+            monitor_interval=monitor_interval,
+        )
+    assert time.time() - start_time < sleep_sec, "Monitoring did not stop the process in time."
+@require_bnb
+def test_problematic_imports():
+    with raises(RuntimeError, match="Please keep these imports"):
+        import bitsandbytes as bnb  # noqa: F401
+        notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES)
+def main():
+    print("Test basic notebook can be ran")
+    test_can_initialize()
+    print("Test static rendezvous backend")
+    test_static_rdzv_backend()
+    print("Test c10d rendezvous backend")
+    test_c10d_rdzv_backend()
+    print("Test fault tolerant")
+    test_fault_tolerant()
+    print("Test monitoring")
+    test_monitoring()
+    if is_bnb_available():
+        print("Test problematic imports (bnb)")
+        test_problematic_imports()
+    if NUM_PROCESSES > 1:
+        PartialState().destroy_process_group()
+if __name__ == "__main__":
+    main()