| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | from pathlib import Path |
| |
|
| | import torch |
| |
|
| | from ...utils import ( |
| | is_hpu_available, |
| | is_mlu_available, |
| | is_musa_available, |
| | is_npu_available, |
| | is_sdaa_available, |
| | is_xpu_available, |
| | ) |
| | from .config_args import ClusterConfig, default_json_config_file |
| | from .config_utils import SubcommandHelpFormatter |
| |
|
| |
|
| | description = "Create a default config file for Accelerate with only a few flags set." |
| |
|
| |
|
| | def write_basic_config(mixed_precision="no", save_location: str = default_json_config_file): |
| | """ |
| | Creates and saves a basic cluster config to be used on a local machine with potentially multiple GPUs. Will also |
| | set CPU if it is a CPU-only machine. |
| | |
| | Args: |
| | mixed_precision (`str`, *optional*, defaults to "no"): |
| | Mixed Precision to use. Should be one of "no", "fp16", or "bf16" |
| | save_location (`str`, *optional*, defaults to `default_json_config_file`): |
| | Optional custom save location. Should be passed to `--config_file` when using `accelerate launch`. Default |
| | location is inside the huggingface cache folder (`~/.cache/huggingface`) but can be overridden by setting |
| | the `HF_HOME` environmental variable, followed by `accelerate/default_config.yaml`. |
| | """ |
| | path = Path(save_location) |
| | path.parent.mkdir(parents=True, exist_ok=True) |
| | if path.exists(): |
| | print( |
| | f"Configuration already exists at {save_location}, will not override. Run `accelerate config` manually or pass a different `save_location`." |
| | ) |
| | return False |
| | mixed_precision = mixed_precision.lower() |
| | if mixed_precision not in ["no", "fp16", "bf16", "fp8"]: |
| | raise ValueError( |
| | f"`mixed_precision` should be one of 'no', 'fp16', 'bf16', or 'fp8'. Received {mixed_precision}" |
| | ) |
| | config = { |
| | "compute_environment": "LOCAL_MACHINE", |
| | "mixed_precision": mixed_precision, |
| | } |
| | if is_mlu_available(): |
| | num_mlus = torch.mlu.device_count() |
| | config["num_processes"] = num_mlus |
| | config["use_cpu"] = False |
| | if num_mlus > 1: |
| | config["distributed_type"] = "MULTI_MLU" |
| | else: |
| | config["distributed_type"] = "NO" |
| | if is_sdaa_available(): |
| | num_sdaas = torch.sdaa.device_count() |
| | config["num_processes"] = num_sdaas |
| | config["use_cpu"] = False |
| | if num_sdaas > 1: |
| | config["distributed_type"] = "MULTI_SDAA" |
| | else: |
| | config["distributed_type"] = "NO" |
| | elif is_musa_available(): |
| | num_musas = torch.musa.device_count() |
| | config["num_processes"] = num_musas |
| | config["use_cpu"] = False |
| | if num_musas > 1: |
| | config["distributed_type"] = "MULTI_MUSA" |
| | else: |
| | config["distributed_type"] = "NO" |
| | elif is_hpu_available(): |
| | num_hpus = torch.hpu.device_count() |
| | config["num_processes"] = num_hpus |
| | config["use_cpu"] = False |
| | if num_hpus > 1: |
| | config["distributed_type"] = "MULTI_HPU" |
| | else: |
| | config["distributed_type"] = "NO" |
| | elif torch.cuda.is_available(): |
| | num_gpus = torch.cuda.device_count() |
| | config["num_processes"] = num_gpus |
| | config["use_cpu"] = False |
| | if num_gpus > 1: |
| | config["distributed_type"] = "MULTI_GPU" |
| | else: |
| | config["distributed_type"] = "NO" |
| | elif is_xpu_available(): |
| | num_xpus = torch.xpu.device_count() |
| | config["num_processes"] = num_xpus |
| | config["use_cpu"] = False |
| | if num_xpus > 1: |
| | config["distributed_type"] = "MULTI_XPU" |
| | else: |
| | config["distributed_type"] = "NO" |
| | elif is_npu_available(): |
| | num_npus = torch.npu.device_count() |
| | config["num_processes"] = num_npus |
| | config["use_cpu"] = False |
| | if num_npus > 1: |
| | config["distributed_type"] = "MULTI_NPU" |
| | else: |
| | config["distributed_type"] = "NO" |
| | else: |
| | num_xpus = 0 |
| | config["use_cpu"] = True |
| | config["num_processes"] = 1 |
| | config["distributed_type"] = "NO" |
| | config["debug"] = False |
| | config["enable_cpu_affinity"] = False |
| | config = ClusterConfig(**config) |
| | config.to_json_file(path) |
| | return path |
| |
|
| |
|
| | def default_command_parser(parser, parents): |
| | parser = parser.add_parser("default", parents=parents, help=description, formatter_class=SubcommandHelpFormatter) |
| | parser.add_argument( |
| | "--config_file", |
| | default=default_json_config_file, |
| | help=( |
| | "The path to use to store the config file. Will default to a file named default_config.yaml in the cache " |
| | "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have " |
| | "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed " |
| | "with 'huggingface'." |
| | ), |
| | dest="save_location", |
| | ) |
| |
|
| | parser.add_argument( |
| | "--mixed_precision", |
| | choices=["no", "fp16", "bf16"], |
| | type=str, |
| | help="Whether or not to use mixed precision training. " |
| | "Choose between FP16 and BF16 (bfloat16) training. " |
| | "BF16 training is only supported on Nvidia Ampere GPUs and PyTorch 1.10 or later.", |
| | default="no", |
| | ) |
| | parser.set_defaults(func=default_config_command) |
| | return parser |
| |
|
| |
|
| | def default_config_command(args): |
| | config_file = write_basic_config(args.mixed_precision, args.save_location) |
| | if config_file: |
| | print(f"accelerate configuration saved at {config_file}") |
| |
|