| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import operator as op |
|
|
| import torch |
|
|
|
|
| SCALER_NAME = "scaler.pt" |
| MODEL_NAME = "pytorch_model" |
| SAFE_MODEL_NAME = "model" |
| RNG_STATE_NAME = "random_states" |
| OPTIMIZER_NAME = "optimizer" |
| SCHEDULER_NAME = "scheduler" |
| SAMPLER_NAME = "sampler" |
| PROFILE_PATTERN_NAME = "profile_{suffix}.json" |
| WEIGHTS_NAME = f"{MODEL_NAME}.bin" |
| WEIGHTS_PATTERN_NAME = "pytorch_model{suffix}.bin" |
| WEIGHTS_INDEX_NAME = f"{WEIGHTS_NAME}.index.json" |
| SAFE_WEIGHTS_NAME = f"{SAFE_MODEL_NAME}.safetensors" |
| SAFE_WEIGHTS_PATTERN_NAME = "model{suffix}.safetensors" |
| SAFE_WEIGHTS_INDEX_NAME = f"{SAFE_WEIGHTS_NAME}.index.json" |
| SAGEMAKER_PYTORCH_VERSION = "1.10.2" |
| SAGEMAKER_PYTHON_VERSION = "py38" |
| SAGEMAKER_TRANSFORMERS_VERSION = "4.17.0" |
| SAGEMAKER_PARALLEL_EC2_INSTANCES = ["ml.p3.16xlarge", "ml.p3dn.24xlarge", "ml.p4dn.24xlarge"] |
| FSDP_SHARDING_STRATEGY = ["FULL_SHARD", "SHARD_GRAD_OP", "NO_SHARD", "HYBRID_SHARD", "HYBRID_SHARD_ZERO2"] |
| FSDP_AUTO_WRAP_POLICY = ["TRANSFORMER_BASED_WRAP", "SIZE_BASED_WRAP", "NO_WRAP"] |
| FSDP_BACKWARD_PREFETCH = ["BACKWARD_PRE", "BACKWARD_POST", "NO_PREFETCH"] |
| FSDP_STATE_DICT_TYPE = ["FULL_STATE_DICT", "LOCAL_STATE_DICT", "SHARDED_STATE_DICT"] |
| FSDP2_STATE_DICT_TYPE = ["SHARDED_STATE_DICT", "FULL_STATE_DICT"] |
| FSDP_PYTORCH_VERSION = ( |
| "2.1.0.a0+32f93b1" |
| ) |
| FSDP2_PYTORCH_VERSION = "2.6.0" |
| FSDP_MODEL_NAME = "pytorch_model_fsdp" |
| DEEPSPEED_MULTINODE_LAUNCHERS = ["pdsh", "standard", "openmpi", "mvapich", "mpich", "nossh", "slurm"] |
| TORCH_DYNAMO_MODES = ["default", "reduce-overhead", "max-autotune"] |
| ELASTIC_LOG_LINE_PREFIX_TEMPLATE_PYTORCH_VERSION = "2.2.0" |
| XPU_PROFILING_AVAILABLE_PYTORCH_VERSION = "2.4.0" |
| MITA_PROFILING_AVAILABLE_PYTORCH_VERSION = "2.1.0" |
| BETA_TP_AVAILABLE_PYTORCH_VERSION = "2.3.0" |
|
|
| BETA_TP_AVAILABLE_TRANSFORMERS_VERSION = "4.52.0" |
| BETA_CP_AVAILABLE_PYTORCH_VERSION = "2.6.0" |
|
|
| STR_OPERATION_TO_FUNC = {">": op.gt, ">=": op.ge, "==": op.eq, "!=": op.ne, "<=": op.le, "<": op.lt} |
|
|
| |
| TORCH_LAUNCH_PARAMS = [ |
| "nnodes", |
| "nproc_per_node", |
| "rdzv_backend", |
| "rdzv_endpoint", |
| "rdzv_id", |
| "rdzv_conf", |
| "standalone", |
| "max_restarts", |
| "monitor_interval", |
| "start_method", |
| "role", |
| "module", |
| "m", |
| "no_python", |
| "run_path", |
| "log_dir", |
| "r", |
| "redirects", |
| "t", |
| "tee", |
| "node_rank", |
| "master_addr", |
| "master_port", |
| ] |
|
|
| CUDA_DISTRIBUTED_TYPES = ["DEEPSPEED", "MULTI_GPU", "FSDP", "MEGATRON_LM", "TP"] |
| TORCH_DISTRIBUTED_OPERATION_TYPES = CUDA_DISTRIBUTED_TYPES + [ |
| "MULTI_NPU", |
| "MULTI_MLU", |
| "MULTI_SDAA", |
| "MULTI_MUSA", |
| "MULTI_XPU", |
| "MULTI_CPU", |
| "MULTI_HPU", |
| ] |
| SUPPORTED_PYTORCH_LAYERS_FOR_UPCASTING = ( |
| torch.nn.Conv1d, |
| torch.nn.Conv2d, |
| torch.nn.Conv3d, |
| torch.nn.ConvTranspose1d, |
| torch.nn.ConvTranspose2d, |
| torch.nn.ConvTranspose3d, |
| torch.nn.Linear, |
| ) |
|
|