Buckets:
MisterAI/LocalAI_Demo_backends / cpu-diffusers.upgrade-tmp /venv /lib /python3.10 /site-packages /peft /utils /hotswap.py
| # Copyright 2024-present the HuggingFace Inc. team. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from __future__ import annotations | |
| import math | |
| import warnings | |
| from operator import attrgetter | |
| from typing import Literal, Optional | |
| import torch | |
| from peft.config import PeftConfig | |
| from peft.mapping import PEFT_TYPE_TO_CONFIG_MAPPING, PEFT_TYPE_TO_PREFIX_MAPPING | |
| from peft.tuners.lora import Conv2d, Linear, LoraConfig, LoraLayer | |
| from .other import get_pattern_key, infer_device | |
| from .peft_types import PeftType | |
| from .save_and_load import _insert_adapter_name_into_state_dict, load_peft_weights | |
| # so far only LoRA is supported | |
| CONFIG_KEYS_TO_CHECK = {PeftType.LORA: ["use_rslora", "lora_dropout", "alpha_pattern", "use_dora"]} | |
| def _update_scaling(lora_module, adapter_name, scaling=None): | |
| """ | |
| Update the value of the scalings of the LoRA module. | |
| Takes into consideration that scalings can be tensors from prepare_model_for_compiled_hotswap. | |
| """ | |
| if lora_module.scaling[adapter_name] == scaling: | |
| return | |
| if isinstance(lora_module.scaling[adapter_name], torch.Tensor): | |
| lora_module.scaling[adapter_name].fill_(scaling) | |
| elif isinstance(lora_module.scaling[adapter_name], (float, int)): | |
| lora_module.scaling[adapter_name] = scaling | |
| else: | |
| raise ValueError( | |
| "Something went wrong when trying to set the new scale value, expected to find the old value to be of type " | |
| f"float or torch.Tensor, got {type(lora_module.scaling[adapter_name])} instead." | |
| ) | |
| def _convert_scalings_to_tensor(model) -> bool: | |
| """ | |
| Convert the LoRA scaling values into torch.tensors to prevent recompilation if they change. | |
| Returns: | |
| bool: | |
| Returns `True` if an appropriate adapter was found, else `False`. | |
| """ | |
| found_adapter = False | |
| for module in model.modules(): | |
| if not isinstance(module, LoraLayer): | |
| continue | |
| found_adapter = True | |
| scaling = module.scaling | |
| for key, val in scaling.items(): | |
| if isinstance(val, float): | |
| # no need to deal with dtype as scalars are coerced | |
| scaling[key] = torch.tensor(val, device=module.weight.device) | |
| elif not isinstance(val, torch.Tensor): | |
| raise ValueError( | |
| "Something went wrong while trying to convert the scalings, expected to find values of type float " | |
| f"but found {type(val)} instead." | |
| ) | |
| return found_adapter | |
| def _get_padded_linear(lora_module: torch.nn.Module, target_rank: int, is_lora_A: bool) -> torch.nn.Linear: | |
| """ | |
| Get a new Linear layer for LoRA with padded weights according to the target rank. | |
| Args: | |
| lora_module (nn.Module): | |
| The LoRA sub-module (e.g. module.lora_A[adapter_name]). | |
| target_rank (int): | |
| The desired rank to pad to. | |
| is_lora_A (bool): | |
| True if this is the LoRA A matrix, False if LoRA B. | |
| Returns: | |
| nn.Linear: | |
| A newly created and padded Linear layer. If the rank already fit, the original layer is returned. | |
| """ | |
| weight = lora_module.weight | |
| # For LoRA A, the "rank dimension" is weight.size(0) (out_features). | |
| # For LoRA B, it is weight.size(1) (in_features). | |
| original_rank = weight.size(0) if is_lora_A else weight.size(1) | |
| # If no padding needed | |
| if original_rank == target_rank: | |
| return lora_module | |
| if original_rank > target_rank: | |
| raise ValueError( | |
| f"Trying to pad the adapter to the target rank {target_rank}, but the original rank is larger " | |
| f"({original_rank}). This is not possible." | |
| ) | |
| out_features, in_features = weight.shape | |
| # lora_A and lora_B are always nn.Linear | |
| if is_lora_A: | |
| # LoRA A affects out_features | |
| padded = torch.zeros(target_rank, in_features, device=weight.device, dtype=weight.dtype) | |
| padded[:original_rank, :] = weight | |
| new_layer = torch.nn.Linear(in_features, target_rank, bias=lora_module.bias is not None) | |
| else: | |
| # LoRA B affects in_features | |
| padded = torch.zeros(out_features, target_rank, device=weight.device, dtype=weight.dtype) | |
| padded[:, :original_rank] = weight | |
| new_layer = torch.nn.Linear(target_rank, out_features, bias=lora_module.bias is not None) | |
| # Sanity check | |
| if new_layer.weight.shape != padded.shape: | |
| raise ValueError( | |
| "Something went wrong when trying to pad the LoRA Linear weights, the new shape should be " | |
| f"{padded.shape} but {new_layer.weight.shape} was found. Please open an issue on PEFT " | |
| "(https://github.com/huggingface/peft/issues) and report this error." | |
| ) | |
| if (lora_module.bias is not None) and (new_layer.bias.shape != lora_module.bias.shape): | |
| raise ValueError( | |
| "Something went wrong when trying to pad the LoRA Linear bias, the new shape should be " | |
| f"{lora_module.bias.shape} but {new_layer.bias.shape} was found. Please open an issue on PEFT " | |
| "(https://github.com/huggingface/peft/issues) and report this error." | |
| ) | |
| new_layer.weight.data = padded | |
| # Copy bias if present | |
| if lora_module.bias is not None: | |
| new_layer.bias.data = lora_module.bias.data | |
| return new_layer | |
| def _get_padded_conv2d(lora_module: torch.nn.Module, target_rank: int, is_lora_A: bool) -> torch.nn.Conv2d: | |
| """ | |
| Get a new Conv2d layer for LoRA with padded weights according to the target rank. | |
| Args: | |
| lora_module (nn.Module): | |
| The LoRA sub-module (e.g. module.lora_A[adapter_name]). | |
| target_rank (int): | |
| The desired rank to pad to. | |
| is_lora_A (bool): | |
| True if this is the LoRA A matrix, False if LoRA B. | |
| Returns: | |
| nn.Conv2d: | |
| A newly created and padded Conv2d layer. If the rank already fit, the original layer is returned. | |
| """ | |
| weight = lora_module.weight | |
| # For Conv2d: [out_channels, in_channels, kernel_height, kernel_width] | |
| out_channels, in_channels, kh, kw = weight.shape | |
| original_rank = out_channels if is_lora_A else in_channels | |
| if original_rank == target_rank: | |
| return lora_module | |
| if original_rank > target_rank: | |
| raise ValueError( | |
| f"Trying to pad the adapter to the target rank {target_rank}, but the original rank is larger " | |
| f"({original_rank}). This is not possible." | |
| ) | |
| # lora_A and lora_B are always nn.Conv2d | |
| if is_lora_A: | |
| # LoRA A affects out_channels | |
| padded = torch.zeros(target_rank, in_channels, kh, kw, device=weight.device, dtype=weight.dtype) | |
| padded[:out_channels, :, :, :] = weight | |
| new_layer = torch.nn.Conv2d( | |
| in_channels, | |
| target_rank, | |
| kernel_size=lora_module.kernel_size, | |
| stride=lora_module.stride, | |
| padding=lora_module.padding, | |
| bias=lora_module.bias is not None, | |
| groups=lora_module.groups, | |
| ) | |
| else: | |
| # LoRA B affects in_channels | |
| padded = torch.zeros(out_channels, target_rank, kh, kw, device=weight.device, dtype=weight.dtype) | |
| padded[:, :in_channels, :, :] = weight | |
| new_layer = torch.nn.Conv2d( | |
| target_rank, | |
| out_channels, | |
| kernel_size=lora_module.kernel_size, | |
| stride=lora_module.stride, | |
| padding=lora_module.padding, | |
| bias=lora_module.bias is not None, | |
| groups=lora_module.groups, | |
| ) | |
| # Sanity check | |
| if new_layer.weight.shape != padded.shape: | |
| raise ValueError( | |
| "Something went wrong when trying to pad the LoRA weights, the new shape should be " | |
| f"{padded.shape} but {new_layer.weight.shape} was found. Please open an issue on PEFT " | |
| "(https://github.com/huggingface/peft/issues) and report this error." | |
| ) | |
| if (lora_module.bias is not None) and (new_layer.bias.shape != lora_module.bias.shape): | |
| raise ValueError( | |
| "Something went wrong when trying to pad the LoRA Conv2d bias, the new shape should be " | |
| f"{lora_module.bias.shape} but {new_layer.bias.shape} was found. Please open an issue on PEFT " | |
| "(https://github.com/huggingface/peft/issues) and report this error." | |
| ) | |
| new_layer.weight.data = padded | |
| # Copy bias if present | |
| if lora_module.bias is not None: | |
| new_layer.bias.data = lora_module.bias.data | |
| return new_layer | |
| def _pad_lora_weights(model: torch.nn.Module, target_rank: int) -> bool: | |
| """ | |
| Pad LoRA weights in a model to a target rank while preserving the original behavior. | |
| Args: | |
| model (nn.Module): The model containing LoRA modules (with lora_A and lora_B). | |
| target_rank (int): The target rank to pad to. | |
| Returns: | |
| bool: | |
| Returns `True` if an appropriate adapter was found, else `False`. | |
| """ | |
| found_adapter = False | |
| for module in model.modules(): | |
| # Decide which pad function to call based on module type | |
| if isinstance(module, Linear): | |
| pad_fn = _get_padded_linear | |
| elif isinstance(module, Conv2d): | |
| pad_fn = _get_padded_conv2d | |
| else: | |
| # Skip any other module types | |
| continue | |
| # Pad LoRA A | |
| for adapter_name, lora_A_module in module.lora_A.items(): | |
| new_layer = pad_fn(lora_A_module, target_rank=target_rank, is_lora_A=True) | |
| module.lora_A[adapter_name] = new_layer | |
| # Pad LoRA B | |
| for adapter_name, lora_B_module in module.lora_B.items(): | |
| new_layer = pad_fn(lora_B_module, target_rank=target_rank, is_lora_A=False) | |
| module.lora_B[adapter_name] = new_layer | |
| found_adapter = True | |
| return found_adapter | |
| def prepare_model_for_compiled_hotswap( | |
| model: torch.nn.Module, | |
| *, | |
| target_rank: Optional[int] = None, | |
| config: Optional[LoraConfig | dict[str, LoraConfig]] = None, | |
| check_compiled: Literal["error", "warn", "ignore"] = "error", | |
| ) -> None: | |
| """ | |
| Helper function that prepares the model so that it can later be compiled and then used with hot-swapping. | |
| It is necessary to call this function on the model for hot-swapping to work if both of these are true: | |
| - the different LoRA adapters have different ranks and/or different alpha values (i.e. scalings) | |
| - you plan to torch.compile the model and want to avoid re-compilation | |
| It is important to call this function *after* the first LoRA adapter has been loaded (i.e. the one that will be | |
| swapped out) but *before* the model is compiled. | |
| Even with this function, hot-swapping LoRA adapters that target different layers is still not supported. | |
| Note: This function modifies the model in-place. If you want to restore the model to its initial state, you will | |
| have to reload it. | |
| Args: | |
| model (`nn.Module`): | |
| The model with the loaded adapter, before compilation. | |
| target_rank (`int`, *optional*): | |
| The target rank to pad the LoRA weights to. Should be the maximum rank among all LoRA adapters that will be | |
| hot-swapped. If not specified, the target ranks will not be changed. | |
| config (`LoraConfig` or `dict[str, LoraConfig]`, *optional*): | |
| Optionally pass the `LoraConfig`s of the LoRA adapters. If passed, the rank in the configs will be updated | |
| to `target_rank`. | |
| check_compiled (`str`, *optional*, defaults to `"error"`): | |
| How to handle the case when the model is already compiled, which should generally be avoided. The options | |
| are: | |
| - "error" (default): raise an error | |
| - "warn": issue a warning | |
| - "ignore": do nothing | |
| Raises: | |
| ValueError | |
| If the model is already compiled or if no adpater layer was found, raise an error. | |
| Example: | |
| ```py | |
| base_model = ... | |
| model = PeftModel.from_pretrained(base_model, path_adapter_0) | |
| # Prepare the model to allow hotswapping even if ranks/scalings of 2nd adapter differ. | |
| # You can skip this step if all ranks and scalings are identical. | |
| prepare_model_for_compiled_hotswap(model, target_rank=highest_lora_rank) | |
| model = torch.compile(model) | |
| # do inference with adapter 0 | |
| # replace the "default" lora adapter with the new one | |
| hotswap_adapter(model, path_adapter_1, adapter_name="default", torch_device=device) | |
| # do inference with adapter 1 | |
| ``` | |
| """ | |
| is_compiled = hasattr(model, "_orig_mod") or getattr(model, "_compiled_call_impl", False) | |
| if is_compiled: | |
| if check_compiled == "error": | |
| raise ValueError("Call prepare_model_for_compiled_hotswap *before* compiling the model") | |
| elif check_compiled == "warn": | |
| warnings.warn( | |
| "prepare_model_for_compiled_hotswap was called with a model that is already compiled. This will likely " | |
| "result in re-compilation, hurting performance. Call the function before compiling the model." | |
| ) | |
| elif check_compiled != "ignore": | |
| raise ValueError( | |
| f"check_compiles should be one of 'error', 'warn', or 'ignore', got '{check_compiled}' instead." | |
| ) | |
| conversion_found_adapter = _convert_scalings_to_tensor(model) | |
| if target_rank is not None: | |
| padding_found_adapter = _pad_lora_weights(model, target_rank=target_rank) | |
| else: | |
| padding_found_adapter = False | |
| if not (conversion_found_adapter or padding_found_adapter): | |
| raise ValueError( | |
| "No adapter layers found on the model, make sure call `prepare_model_for_compiled_hotswap` after loading " | |
| "the first adapter and before loading the second adapter." | |
| ) | |
| if not config: | |
| return | |
| if target_rank is None: | |
| return | |
| if not isinstance(config, dict): | |
| # config can be either a PeftConfig, or a dict of PeftConfigs like PeftModel.peft_config | |
| config = {"dummy": config} | |
| for lora_config in config.values(): | |
| lora_config.r = target_rank | |
| if lora_config.rank_pattern: | |
| for key in lora_config.rank_pattern: | |
| lora_config.rank_pattern[key] = target_rank | |
| def hotswap_adapter_from_state_dict( | |
| model: torch.nn.Module, | |
| state_dict: dict[str, torch.Tensor], | |
| adapter_name: str, | |
| config: LoraConfig, | |
| parameter_prefix: str = "lora_", | |
| ): | |
| """ | |
| Swap out the adapter weights from the model with the weights from state_dict. | |
| As of now, only LoRA is supported. | |
| This is a low-level function that assumes that the adapters have been checked for compatibility and that the | |
| state_dict has been correctly mapped to work with PEFT. For a high level function that performs this work for you, | |
| use `hotswap_adapter` instead. | |
| Args: | |
| model (`nn.Module`): | |
| The model with the loaded adapter. | |
| state_dict (`dict[str, torch.Tensor]`): | |
| The state dict of the new adapter, which needs to be compatible (targeting same modules etc.). | |
| adapter_name (`str`): | |
| The name of the adapter that should be hot-swapped, e.g. `"default"`. The name will remain the same after | |
| swapping. | |
| config (`LoraConfig`): | |
| The config of the LoRA adapter. This is used to determine the scaling and rank of the adapter. | |
| parameter_prefix (`str`, *optional*, defaults to `"lora_"`) | |
| The prefix used to identify the adapter's keys in the state dict. For LoRA, this would be `"lora_"` (the | |
| default). | |
| Raises: | |
| RuntimeError | |
| If the old and the new adapter are not compatible, a RuntimeError is raised. | |
| """ | |
| # Ensure that all the keys of the new adapter correspond exactly to the keys of the old adapter, otherwise | |
| # hot-swapping is not possible | |
| # _orig_mod is for torch.compile(model) and _compiled_call_impl is for model.compile() (not wrapped) | |
| is_compiled = hasattr(model, "_orig_mod") | |
| is_compiled_inplace = bool(getattr(model, "_compiled_call_impl", None)) | |
| # TODO: there is probably a more precise way to identify the adapter keys | |
| missing_keys = {k for k in model.state_dict() if (parameter_prefix in k) and (adapter_name in k)} | |
| unexpected_keys = [] | |
| # first: dry run, not swapping anything | |
| for key, new_val in state_dict.items(): | |
| try: | |
| old_val = attrgetter(key)(model) | |
| except AttributeError: | |
| unexpected_keys.append(key) | |
| continue | |
| if is_compiled: | |
| missing_keys.remove("_orig_mod." + key) | |
| else: | |
| missing_keys.remove(key) | |
| # Right now, we don't deal with unexpected keys, i.e. if the adapter being swapped in targeting new layers. We could | |
| # probably add LoRA to these layers ad hoc, but that would not work with compiled models. | |
| if unexpected_keys: | |
| msg = f"Hot swapping the adapter did not succeed, unexpected keys found: {', '.join(unexpected_keys)}." | |
| raise RuntimeError(msg) | |
| # If the adapter that is being swapped in is missing some keys, this is fine. We just need to ensure that those LoRA | |
| # weights from the previous adapter are set to 0 so that they don't influence the output. We don't need to worry | |
| # about ranks are alphas. | |
| for key in missing_keys: | |
| # in case it's a compiled model | |
| key = key.removeprefix("_orig_mod.") | |
| # get LoRA parent module name by removing the 'lora_*.<adapter-name>.weight' part | |
| module_name = ".".join(key.split(".")[:-3]) | |
| module = model.get_submodule(module_name) | |
| old_val = attrgetter(key)(model) | |
| old_val.data.fill_(0.0) | |
| # actual swapping | |
| for key, new_val in state_dict.items(): | |
| # get LoRA parent module name by removing the 'lora_*.<adapter-name>.weight' part | |
| module_name = ".".join(key.split(".")[:-3]) | |
| module = model.get_submodule(module_name) | |
| # swap alpha/scaling | |
| r_key = get_pattern_key(config.rank_pattern.keys(), key) | |
| alpha_key = get_pattern_key(config.alpha_pattern.keys(), key) | |
| rank = config.rank_pattern.get(r_key, config.r) | |
| alpha = config.alpha_pattern.get(alpha_key, config.lora_alpha) | |
| if config.use_rslora: | |
| scaling = alpha / math.sqrt(rank) | |
| else: | |
| scaling = alpha / rank | |
| _update_scaling(module, adapter_name=adapter_name, scaling=scaling) | |
| # swap actual weights | |
| # no need to account for potential _orig_mod in key here, as torch handles that | |
| old_val = attrgetter(key)(model) | |
| new_val = new_val.to(old_val.data.device) | |
| # We try to detect if the model is compiled but it does not always work, e.g. if hotswapping is called from | |
| # within the model itself. In this case, swap_tensors raises RuntimeError and should continue without | |
| # swap_tensors. | |
| if not is_compiled and not is_compiled_inplace: | |
| try: | |
| torch.utils.swap_tensors(old_val, new_val) | |
| continue | |
| except RuntimeError: | |
| is_compiled = True | |
| # Compiled models don't work with swap_tensors because there are weakrefs for the tensor. It is unclear if | |
| # this workaround could not cause trouble but the tests indicate that it works. | |
| if old_val.shape == new_val.shape: | |
| # either | |
| # - adapters had the same rank | |
| # - adapters were padded with prepare_model_for_compiled_hotswap and 2nd adapter was larger | |
| old_val.data.copy_(new_val.data) | |
| else: | |
| # if 2nd adapter was smaller, ensure to fill up to adapter dimension and set the rest to zeros | |
| if old_val.dim() not in (2, 4): | |
| raise NotImplementedError( | |
| f"Trying to hotswap an adapter whose weight has {old_val.dim()} dimensions, but only Conv2d and " | |
| "Linear are supported" | |
| ) | |
| # Linear or Conv2d: the check for dim 0 or 1 works for both of these layer types | |
| if old_val.shape[0] > new_val.shape[0]: | |
| old_val.data.fill_(0) | |
| old_val.data[: new_val.shape[0]].copy_(new_val.data) | |
| elif old_val.shape[1] > new_val.shape[1]: | |
| old_val.data.fill_(0) | |
| old_val.data[:, : new_val.shape[1]].copy_(new_val.data) | |
| else: | |
| raise ValueError( | |
| f"Incompatible shapes found for LoRA weights {key}: {old_val.shape} vs {new_val.shape}. Please " | |
| "ensure that all ranks are padded to the largest rank among all LoRA adapters by using " | |
| "peft.utils.hotswap.prepare_model_for_compiled_hotswap." | |
| ) | |
| def check_hotswap_configs_compatible(config0: PeftConfig, config1: PeftConfig) -> None: | |
| """ | |
| Check if two configs are compatible for hot-swapping. | |
| Only LoRA parameters are checked for now. | |
| To hot-swap two adapters, their configs must be compatible. Otherwise, the results could be false. E.g. if they use | |
| different alpha values, after hot-swapping, the alphas from the first adapter would still be used with the weights | |
| from the 2nd adapter, which would result in incorrect behavior. There is probably a way to swap these values as | |
| well, but that's not implemented yet, and we need to be careful not to trigger re-compilation if the model is | |
| compiled (so no modification of the dict). | |
| """ | |
| if config0.peft_type != config1.peft_type: | |
| msg = f"Incompatible PEFT types found: {config0.peft_type.value} and {config1.peft_type.value}" | |
| raise ValueError(msg) | |
| if config0.peft_type not in CONFIG_KEYS_TO_CHECK: | |
| msg = ( | |
| f"Hotswapping only supports {', '.join(CONFIG_KEYS_TO_CHECK.keys())} but " | |
| f"{config0.peft_type.value} was passed." | |
| ) | |
| raise ValueError(msg) | |
| config_keys_to_check = CONFIG_KEYS_TO_CHECK[config0.peft_type] | |
| # TODO: This is a very rough check only for LoRA at the moment. Also, there might be some options that don't | |
| # necessarily require an error. | |
| config0 = config0.to_dict() | |
| config1 = config1.to_dict() | |
| sentinel = object() | |
| for key in config_keys_to_check: | |
| val0 = config0.get(key, sentinel) | |
| val1 = config1.get(key, sentinel) | |
| if val0 != val1: | |
| raise ValueError(f"Configs are incompatible: for {key}, {val0} != {val1}") | |
| def hotswap_adapter(model, model_name_or_path, adapter_name, torch_device=None, **kwargs): | |
| """Substitute old adapter data with new adapter data, keeping the rest the same. | |
| As of now, only LoRA is supported. | |
| This function is useful when you want to replace the loaded adapter with a new adapter. The adapter name will | |
| remain the same, but the weights and other parameters will be swapped out. | |
| If the adapters are incomptabile, e.g. targeting different layers or having different alpha values, an error will | |
| be raised. | |
| Example: | |
| ```py | |
| >>> import torch | |
| >>> from transformers import AutoModelForCausalLM | |
| >>> from peft import PeftModel | |
| >>> from peft.utils.hotswap import hotswap_adapter | |
| >>> model_id = ... | |
| >>> inputs = ... | |
| >>> device = ... | |
| >>> model = AutoModelForCausalLM.from_pretrained(model_id).to(device) | |
| >>> # load lora 0 | |
| >>> model = PeftModel.from_pretrained(model, "path-adapter-0") | |
| >>> model = torch.compile(model) # optionally compile the model | |
| >>> with torch.inference_mode(): | |
| ... output_adapter_0 = model(inputs) | |
| >>> # replace the "default" lora adapter with the new one | |
| >>> hotswap_adapter(model, "path-adapter-1", adapter_name="default", torch_device=device) | |
| >>> with torch.inference_mode(): | |
| ... output_adapter_1 = model(inputs).logits | |
| ``` | |
| Args: | |
| model ([`~PeftModel`]): | |
| The PEFT model with the loaded adapter. | |
| model_name_or_path (`str`): | |
| The name or path of the model to load the new adapter from. | |
| adapter_name (`str`): | |
| The name of the adapter to swap, e.g. `"default"`. The name will stay the same after swapping. | |
| torch_device: (`str`, *optional*, defaults to None): | |
| The device to load the new adapter onto. | |
| **kwargs (`optional`): | |
| Additional keyword arguments used for loading the config and weights. | |
| """ | |
| if torch_device is None: | |
| torch_device = infer_device() | |
| ############################ | |
| # LOAD CONFIG AND VALIDATE # | |
| ############################ | |
| hf_kwargs = { | |
| "subfolder": kwargs.get("subfolder", None), | |
| "revision": kwargs.get("revision", None), | |
| "cache_dir": kwargs.get("cache_dir", None), | |
| "token": kwargs.get("token", None), | |
| } | |
| if use_auth_token := kwargs.get("use_auth_token", None): | |
| hf_kwargs["use_auth_token"] = use_auth_token | |
| config_cls = PEFT_TYPE_TO_CONFIG_MAPPING[PeftConfig._get_peft_type(model_name_or_path, **hf_kwargs)] | |
| config = config_cls.from_pretrained(model_name_or_path, **kwargs) | |
| # config keys that could affect the model output besides what is determined by the state_dict | |
| check_hotswap_configs_compatible(model.active_peft_config, config) | |
| state_dict = load_peft_weights(model_name_or_path, device=torch_device, **kwargs) | |
| ########################### | |
| # LOAD & REMAP STATE_DICT # | |
| ########################### | |
| parameter_prefix = PEFT_TYPE_TO_PREFIX_MAPPING[config.peft_type] | |
| peft_model_state_dict = _insert_adapter_name_into_state_dict( | |
| state_dict, adapter_name=adapter_name, parameter_prefix=parameter_prefix | |
| ) | |
| hotswap_adapter_from_state_dict( | |
| model=model, | |
| state_dict=peft_model_state_dict, | |
| adapter_name=adapter_name, | |
| parameter_prefix=parameter_prefix, | |
| config=config, | |
| ) | |
Xet Storage Details
- Size:
- 26.5 kB
- Xet hash:
- 104af2cfcffe26128356b261145cf590d7cfbd584ba5399177102c7d9d9fd59d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.