| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | from ..parallelism_config import ParallelismConfig |
| | from .ao import convert_model_to_fp8_ao, filter_first_and_last_linear_layers, has_ao_layers |
| | from .constants import ( |
| | MITA_PROFILING_AVAILABLE_PYTORCH_VERSION, |
| | MODEL_NAME, |
| | OPTIMIZER_NAME, |
| | PROFILE_PATTERN_NAME, |
| | RNG_STATE_NAME, |
| | SAFE_MODEL_NAME, |
| | SAFE_WEIGHTS_INDEX_NAME, |
| | SAFE_WEIGHTS_NAME, |
| | SAFE_WEIGHTS_PATTERN_NAME, |
| | SAMPLER_NAME, |
| | SCALER_NAME, |
| | SCHEDULER_NAME, |
| | TORCH_DISTRIBUTED_OPERATION_TYPES, |
| | TORCH_LAUNCH_PARAMS, |
| | WEIGHTS_INDEX_NAME, |
| | WEIGHTS_NAME, |
| | WEIGHTS_PATTERN_NAME, |
| | XPU_PROFILING_AVAILABLE_PYTORCH_VERSION, |
| | ) |
| | from .dataclasses import ( |
| | AORecipeKwargs, |
| | AutocastKwargs, |
| | BnbQuantizationConfig, |
| | ComputeEnvironment, |
| | CustomDtype, |
| | DataLoaderConfiguration, |
| | DDPCommunicationHookType, |
| | DeepSpeedPlugin, |
| | DistributedDataParallelKwargs, |
| | DistributedType, |
| | DynamoBackend, |
| | FP8RecipeKwargs, |
| | FullyShardedDataParallelPlugin, |
| | GradientAccumulationPlugin, |
| | GradScalerKwargs, |
| | InitProcessGroupKwargs, |
| | KwargsHandler, |
| | LoggerType, |
| | MegatronLMPlugin, |
| | MSAMPRecipeKwargs, |
| | PrecisionType, |
| | ProfileKwargs, |
| | ProjectConfiguration, |
| | RNGType, |
| | SageMakerDistributedType, |
| | TensorInformation, |
| | TERecipeKwargs, |
| | TorchContextParallelConfig, |
| | TorchDynamoPlugin, |
| | TorchTensorParallelConfig, |
| | TorchTensorParallelPlugin, |
| | add_model_config_to_megatron_parser, |
| | ) |
| | from .environment import ( |
| | are_libraries_initialized, |
| | check_cuda_fp8_capability, |
| | check_cuda_p2p_ib_support, |
| | clear_environment, |
| | convert_dict_to_env_variables, |
| | get_cpu_distributed_information, |
| | get_gpu_info, |
| | get_int_from_env, |
| | parse_choice_from_env, |
| | parse_flag_from_env, |
| | patch_environment, |
| | purge_accelerate_environment, |
| | set_numa_affinity, |
| | str_to_bool, |
| | ) |
| | from .imports import ( |
| | deepspeed_required, |
| | get_ccl_version, |
| | is_4bit_bnb_available, |
| | is_8bit_bnb_available, |
| | is_aim_available, |
| | is_bf16_available, |
| | is_bitsandbytes_multi_backend_available, |
| | is_bnb_available, |
| | is_boto3_available, |
| | is_ccl_available, |
| | is_clearml_available, |
| | is_comet_ml_available, |
| | is_cuda_available, |
| | is_datasets_available, |
| | is_deepspeed_available, |
| | is_dvclive_available, |
| | is_fp8_available, |
| | is_fp16_available, |
| | is_habana_gaudi1, |
| | is_hpu_available, |
| | is_import_timer_available, |
| | is_ipex_available, |
| | is_lomo_available, |
| | is_matplotlib_available, |
| | is_megatron_lm_available, |
| | is_mlflow_available, |
| | is_mlu_available, |
| | is_mps_available, |
| | is_msamp_available, |
| | is_musa_available, |
| | is_npu_available, |
| | is_pandas_available, |
| | is_peft_available, |
| | is_pippy_available, |
| | is_pynvml_available, |
| | is_pytest_available, |
| | is_rich_available, |
| | is_sagemaker_available, |
| | is_schedulefree_available, |
| | is_sdaa_available, |
| | is_swanlab_available, |
| | is_tensorboard_available, |
| | is_timm_available, |
| | is_torch_xla_available, |
| | is_torchao_available, |
| | is_torchdata_available, |
| | is_torchdata_stateful_dataloader_available, |
| | is_torchvision_available, |
| | is_trackio_available, |
| | is_transformer_engine_available, |
| | is_transformers_available, |
| | is_triton_available, |
| | is_wandb_available, |
| | is_weights_only_available, |
| | is_xccl_available, |
| | is_xpu_available, |
| | torchao_required, |
| | ) |
| | from .modeling import ( |
| | align_module_device, |
| | calculate_maximum_sizes, |
| | check_device_map, |
| | check_tied_parameters_in_config, |
| | check_tied_parameters_on_same_device, |
| | compute_module_sizes, |
| | convert_file_size_to_int, |
| | dtype_byte_size, |
| | find_tied_parameters, |
| | get_balanced_memory, |
| | get_grad_scaler, |
| | get_max_layer_size, |
| | get_max_memory, |
| | get_mixed_precision_context_manager, |
| | has_offloaded_params, |
| | id_tensor_storage, |
| | infer_auto_device_map, |
| | is_peft_model, |
| | load_checkpoint_in_model, |
| | load_offloaded_weights, |
| | load_state_dict, |
| | named_module_tensors, |
| | retie_parameters, |
| | set_module_tensor_to_device, |
| | ) |
| | from .offload import ( |
| | OffloadedWeightsLoader, |
| | PrefixedDataset, |
| | extract_submodules_state_dict, |
| | load_offloaded_weight, |
| | offload_state_dict, |
| | offload_weight, |
| | save_offload_index, |
| | ) |
| | from .operations import ( |
| | CannotPadNestedTensorWarning, |
| | GatheredParameters, |
| | broadcast, |
| | broadcast_object_list, |
| | concatenate, |
| | convert_outputs_to_fp32, |
| | convert_to_fp32, |
| | copy_tensor_to_devices, |
| | find_batch_size, |
| | find_device, |
| | gather, |
| | gather_object, |
| | get_data_structure, |
| | honor_type, |
| | ignorant_find_batch_size, |
| | initialize_tensors, |
| | is_namedtuple, |
| | is_tensor_information, |
| | is_torch_tensor, |
| | listify, |
| | pad_across_processes, |
| | pad_input_tensors, |
| | recursively_apply, |
| | reduce, |
| | send_to_device, |
| | slice_tensors, |
| | ) |
| | from .versions import compare_versions, is_torch_version |
| |
|
| |
|
| | if is_deepspeed_available(): |
| | from .deepspeed import ( |
| | DeepSpeedEngineWrapper, |
| | DeepSpeedOptimizerWrapper, |
| | DeepSpeedSchedulerWrapper, |
| | DummyOptim, |
| | DummyScheduler, |
| | HfDeepSpeedConfig, |
| | get_active_deepspeed_plugin, |
| | map_pytorch_optim_to_deepspeed, |
| | ) |
| |
|
| | from .bnb import has_4bit_bnb_layers, load_and_quantize_model |
| | from .fsdp_utils import ( |
| | disable_fsdp_ram_efficient_loading, |
| | enable_fsdp_ram_efficient_loading, |
| | ensure_weights_retied, |
| | fsdp2_apply_ac, |
| | fsdp2_canonicalize_names, |
| | fsdp2_load_full_state_dict, |
| | fsdp2_prepare_model, |
| | fsdp2_switch_optimizer_parameters, |
| | get_fsdp2_grad_scaler, |
| | load_fsdp_model, |
| | load_fsdp_optimizer, |
| | merge_fsdp_weights, |
| | save_fsdp_model, |
| | save_fsdp_optimizer, |
| | ) |
| | from .launch import ( |
| | PrepareForLaunch, |
| | _filter_args, |
| | prepare_deepspeed_cmd_env, |
| | prepare_multi_gpu_env, |
| | prepare_sagemager_args_inputs, |
| | prepare_simple_launcher_cmd_env, |
| | prepare_tpu, |
| | ) |
| |
|
| | |
| | from .megatron_lm import ( |
| | AbstractTrainStep, |
| | BertTrainStep, |
| | GPTTrainStep, |
| | MegatronLMDummyDataLoader, |
| | MegatronLMDummyScheduler, |
| | T5TrainStep, |
| | avg_losses_across_data_parallel_group, |
| | ) |
| |
|
| |
|
| | if is_megatron_lm_available(): |
| | from .megatron_lm import ( |
| | MegatronEngine, |
| | MegatronLMOptimizerWrapper, |
| | MegatronLMSchedulerWrapper, |
| | gather_across_data_parallel_groups, |
| | ) |
| | from .megatron_lm import initialize as megatron_lm_initialize |
| | from .megatron_lm import prepare_data_loader as megatron_lm_prepare_data_loader |
| | from .megatron_lm import prepare_model_optimizer_scheduler as megatron_lm_prepare_model_optimizer_scheduler |
| | from .megatron_lm import prepare_optimizer as megatron_lm_prepare_optimizer |
| | from .megatron_lm import prepare_scheduler as megatron_lm_prepare_scheduler |
| | from .memory import find_executable_batch_size, release_memory |
| | from .other import ( |
| | check_os_kernel, |
| | clean_state_dict_for_safetensors, |
| | compile_regions, |
| | compile_regions_deepspeed, |
| | convert_bytes, |
| | extract_model_from_parallel, |
| | get_module_children_bottom_up, |
| | get_pretty_name, |
| | has_compiled_regions, |
| | is_compiled_module, |
| | is_port_in_use, |
| | load, |
| | merge_dicts, |
| | model_has_dtensor, |
| | recursive_getattr, |
| | save, |
| | wait_for_everyone, |
| | write_basic_config, |
| | ) |
| | from .random import set_seed, synchronize_rng_state, synchronize_rng_states |
| | from .torch_xla import install_xla |
| | from .tqdm import tqdm |
| | from .transformer_engine import ( |
| | apply_fp8_autowrap, |
| | contextual_fp8_autocast, |
| | convert_model, |
| | has_transformer_engine_layers, |
| | ) |
| |
|