| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| from pathlib import Path |
|
|
| import torch |
|
|
| from ...utils import ( |
| is_hpu_available, |
| is_mlu_available, |
| is_musa_available, |
| is_npu_available, |
| is_sdaa_available, |
| is_xpu_available, |
| ) |
| from .config_args import ClusterConfig, default_json_config_file |
| from .config_utils import SubcommandHelpFormatter |
|
|
|
|
| description = "Create a default config file for Accelerate with only a few flags set." |
|
|
|
|
| def write_basic_config(mixed_precision="no", save_location: str = default_json_config_file): |
| """ |
| Creates and saves a basic cluster config to be used on a local machine with potentially multiple GPUs. Will also |
| set CPU if it is a CPU-only machine. |
| |
| Args: |
| mixed_precision (`str`, *optional*, defaults to "no"): |
| Mixed Precision to use. Should be one of "no", "fp16", or "bf16" |
| save_location (`str`, *optional*, defaults to `default_json_config_file`): |
| Optional custom save location. Should be passed to `--config_file` when using `accelerate launch`. Default |
| location is inside the huggingface cache folder (`~/.cache/huggingface`) but can be overridden by setting |
| the `HF_HOME` environmental variable, followed by `accelerate/default_config.yaml`. |
| """ |
| path = Path(save_location) |
| path.parent.mkdir(parents=True, exist_ok=True) |
| if path.exists(): |
| print( |
| f"Configuration already exists at {save_location}, will not override. Run `accelerate config` manually or pass a different `save_location`." |
| ) |
| return False |
| mixed_precision = mixed_precision.lower() |
| if mixed_precision not in ["no", "fp16", "bf16", "fp8"]: |
| raise ValueError( |
| f"`mixed_precision` should be one of 'no', 'fp16', 'bf16', or 'fp8'. Received {mixed_precision}" |
| ) |
| config = { |
| "compute_environment": "LOCAL_MACHINE", |
| "mixed_precision": mixed_precision, |
| } |
| if is_mlu_available(): |
| num_mlus = torch.mlu.device_count() |
| config["num_processes"] = num_mlus |
| config["use_cpu"] = False |
| if num_mlus > 1: |
| config["distributed_type"] = "MULTI_MLU" |
| else: |
| config["distributed_type"] = "NO" |
| if is_sdaa_available(): |
| num_sdaas = torch.sdaa.device_count() |
| config["num_processes"] = num_sdaas |
| config["use_cpu"] = False |
| if num_sdaas > 1: |
| config["distributed_type"] = "MULTI_SDAA" |
| else: |
| config["distributed_type"] = "NO" |
| elif is_musa_available(): |
| num_musas = torch.musa.device_count() |
| config["num_processes"] = num_musas |
| config["use_cpu"] = False |
| if num_musas > 1: |
| config["distributed_type"] = "MULTI_MUSA" |
| else: |
| config["distributed_type"] = "NO" |
| elif is_hpu_available(): |
| num_hpus = torch.hpu.device_count() |
| config["num_processes"] = num_hpus |
| config["use_cpu"] = False |
| if num_hpus > 1: |
| config["distributed_type"] = "MULTI_HPU" |
| else: |
| config["distributed_type"] = "NO" |
| elif torch.cuda.is_available(): |
| num_gpus = torch.cuda.device_count() |
| config["num_processes"] = num_gpus |
| config["use_cpu"] = False |
| if num_gpus > 1: |
| config["distributed_type"] = "MULTI_GPU" |
| else: |
| config["distributed_type"] = "NO" |
| elif is_xpu_available(): |
| num_xpus = torch.xpu.device_count() |
| config["num_processes"] = num_xpus |
| config["use_cpu"] = False |
| if num_xpus > 1: |
| config["distributed_type"] = "MULTI_XPU" |
| else: |
| config["distributed_type"] = "NO" |
| elif is_npu_available(): |
| num_npus = torch.npu.device_count() |
| config["num_processes"] = num_npus |
| config["use_cpu"] = False |
| if num_npus > 1: |
| config["distributed_type"] = "MULTI_NPU" |
| else: |
| config["distributed_type"] = "NO" |
| else: |
| num_xpus = 0 |
| config["use_cpu"] = True |
| config["num_processes"] = 1 |
| config["distributed_type"] = "NO" |
| config["debug"] = False |
| config["enable_cpu_affinity"] = False |
| config = ClusterConfig(**config) |
| config.to_json_file(path) |
| return path |
|
|
|
|
| def default_command_parser(parser, parents): |
| parser = parser.add_parser("default", parents=parents, help=description, formatter_class=SubcommandHelpFormatter) |
| parser.add_argument( |
| "--config_file", |
| default=default_json_config_file, |
| help=( |
| "The path to use to store the config file. Will default to a file named default_config.yaml in the cache " |
| "location, which is the content of the environment `HF_HOME` suffixed with 'accelerate', or if you don't have " |
| "such an environment variable, your cache directory ('~/.cache' or the content of `XDG_CACHE_HOME`) suffixed " |
| "with 'huggingface'." |
| ), |
| dest="save_location", |
| ) |
|
|
| parser.add_argument( |
| "--mixed_precision", |
| choices=["no", "fp16", "bf16"], |
| type=str, |
| help="Whether or not to use mixed precision training. " |
| "Choose between FP16 and BF16 (bfloat16) training. " |
| "BF16 training is only supported on Nvidia Ampere GPUs and PyTorch 1.10 or later.", |
| default="no", |
| ) |
| parser.set_defaults(func=default_config_command) |
| return parser |
|
|
|
|
| def default_config_command(args): |
| config_file = write_basic_config(args.mixed_precision, args.save_location) |
| if config_file: |
| print(f"accelerate configuration saved at {config_file}") |
|
|