Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / peft /pr_3206 /en /developer_guides /checkpoint.html

HuggingFaceDocBuilder

2 months ago

download

raw

49.3 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"PEFT checkpoint format","local":"peft-checkpoint-format","sections":[{"title":"PEFT files","local":"peft-files","sections":[],"depth":2},{"title":"Convert to PEFT format","local":"convert-to-peft-format","sections":[{"title":"adapter_model","local":"adaptermodel","sections":[],"depth":3},{"title":"adapter_config","local":"adapterconfig","sections":[],"depth":3}],"depth":2},{"title":"Model storage","local":"model-storage","sections":[{"title":"Merge the weights","local":"merge-the-weights","sections":[],"depth":3},{"title":"Convert to a Transformers model","local":"convert-to-a-transformers-model","sections":[],"depth":3}],"depth":2}],"depth":1}">
	<link href="/docs/peft/pr_3206/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/entry/start.06e1cdbd.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/scheduler.78382b47.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/singletons.25ed789f.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/index.fadd215c.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/paths.d98268af.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/entry/app.cb2a6689.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/preload-helper.1df7c689.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/index.6dd35eb6.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/nodes/0.fa4b1245.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/nodes/8.19786ecd.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.db10b59f.js">
	<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/CodeBlock.e9241c92.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"PEFT checkpoint format","local":"peft-checkpoint-format","sections":[{"title":"PEFT files","local":"peft-files","sections":[],"depth":2},{"title":"Convert to PEFT format","local":"convert-to-peft-format","sections":[{"title":"adapter_model","local":"adaptermodel","sections":[],"depth":3},{"title":"adapter_config","local":"adapterconfig","sections":[],"depth":3}],"depth":2},{"title":"Model storage","local":"model-storage","sections":[{"title":"Merge the weights","local":"merge-the-weights","sections":[],"depth":3},{"title":"Convert to a Transformers model","local":"convert-to-a-transformers-model","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="peft-checkpoint-format" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#peft-checkpoint-format"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PEFT checkpoint format</span></h1> <p data-svelte-h="svelte-i846eh">This document describes how PEFT’s checkpoint files are structured and how to convert between the PEFT format and other formats.</p> <h2 class="relative group"><a id="peft-files" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#peft-files"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PEFT files</span></h2> <p data-svelte-h="svelte-1jlobya">PEFT (parameter-efficient fine-tuning) methods only update a small subset of a model’s parameters rather than all of them. This is nice because checkpoint files can generally be much smaller than the original model files and are easier to store and share. However, this also means that to load a PEFT model, you need to have the original model available as well.</p> <p data-svelte-h="svelte-15b81d7">When you call <a href="/docs/peft/pr_3206/en/package_reference/peft_model#peft.PeftModel.save_pretrained">save_pretrained()</a> on a PEFT model, the PEFT model saves three files, described below:</p> <ol data-svelte-h="svelte-1buhd3i"><li><code>adapter_model.safetensors</code> or <code>adapter_model.bin</code></li></ol> <p data-svelte-h="svelte-12xl22z">By default, the model is saved in the <code>safetensors</code> format, a secure alternative to the <code>bin</code> format, which is known to be susceptible to <a href="https://huggingface.co/docs/hub/security-pickle" rel="nofollow">security vulnerabilities</a> because it uses the pickle utility under the hood. Both formats store the same <code>state_dict</code> though, and are interchangeable.</p> <p data-svelte-h="svelte-1uhgqli">The <code>state_dict</code> only contains the parameters of the adapter module, not the base model. To illustrate the difference in size, a normal BERT model requires ~420MB of disk space, whereas an IA³ adapter on top of this BERT model only requires ~260KB.</p> <ol start="2" data-svelte-h="svelte-u9vwkh"><li><code>adapter_config.json</code></li></ol> <p data-svelte-h="svelte-ubabis">The <code>adapter_config.json</code> file contains the configuration of the adapter module, which is necessary to load the model. Below is an example of an <code>adapter_config.json</code> for an IA³ adapter with standard settings applied to a BERT model:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
	<span class="hljs-attr">"auto_mapping"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
	<span class="hljs-attr">"base_model_class"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"BertModel"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"parent_library"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"transformers.models.bert.modeling_bert"</span>
	<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"base_model_name_or_path"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"bert-base-uncased"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"fan_in_fan_out"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"feedforward_modules"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
	<span class="hljs-string">"output.dense"</span>
	<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"inference_mode"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"init_ia3_weights"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"modules_to_save"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"peft_type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"IA3"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"revision"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"target_modules"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
	<span class="hljs-string">"key"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-string">"value"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-string">"output.dense"</span>
	<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"task_type"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span>
	<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-18i1sdg">The configuration file contains:</p> <ul data-svelte-h="svelte-1fi0uuu"><li>the adapter module type stored, <code>"peft_type": "IA3"</code></li> <li>information about the base model like <code>"base_model_name_or_path": "bert-base-uncased"</code></li> <li>the revision of the model (if any), <code>"revision": null</code></li></ul> <p data-svelte-h="svelte-1nfr73w">If the base model is not a pretrained Transformers model, the latter two entries will be <code>null</code>. Other than that, the settings are all related to the specific IA³ adapter that was used to fine-tune the model.</p> <ol start="3" data-svelte-h="svelte-11vms8t"><li><code>README.md</code></li></ol> <p data-svelte-h="svelte-bfnv50">The generated <code>README.md</code> is the model card of a PEFT model and contains a few pre-filled entries. The intent of this is to make it easier to share the model with others and to provide some basic information about the model. This file is not needed to load the model.</p> <h2 class="relative group"><a id="convert-to-peft-format" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#convert-to-peft-format"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Convert to PEFT format</span></h2> <p data-svelte-h="svelte-1k4p1z7">When converting from another format to the PEFT format, we require both the <code>adapter_model.safetensors</code> (or <code>adapter_model.bin</code>) file and the <code>adapter_config.json</code> file.</p> <h3 class="relative group"><a id="adaptermodel" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#adaptermodel"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>adapter_model</span></h3> <p data-svelte-h="svelte-kd80je">For the model weights, it is important to use the correct mapping from parameter name to value for PEFT to load the file. Getting this mapping right is an exercise in checking the implementation details, as there is no generally agreed upon format for PEFT adapters.</p> <p data-svelte-h="svelte-vjr9ez">Fortunately, figuring out this mapping is not overly complicated for common base cases. Let’s look at a concrete example, the <a href="https://github.com/huggingface/peft/blob/main/src/peft/tuners/lora/layer.py" rel="nofollow"><code>LoraLayer</code></a>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># showing only part of the code</span>

	<span class="hljs-keyword">class</span> <span class="hljs-title class_">LoraLayer</span>(<span class="hljs-title class_ inherited__">BaseTunerLayer</span>):
	<span class="hljs-comment"># All names of layers that may contain (trainable) adapter weights</span>
	adapter_layer_names = (<span class="hljs-string">"lora_A"</span>, <span class="hljs-string">"lora_B"</span>, <span class="hljs-string">"lora_embedding_A"</span>, <span class="hljs-string">"lora_embedding_B"</span>)
	<span class="hljs-comment"># All names of other parameters that may contain adapter-related parameters</span>
	other_param_names = (<span class="hljs-string">"r"</span>, <span class="hljs-string">"lora_alpha"</span>, <span class="hljs-string">"scaling"</span>, <span class="hljs-string">"lora_dropout"</span>)

	<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, base_layer: nn.Module, **kwargs</span>) -> <span class="hljs-literal">None</span>:
	self.base_layer = base_layer
	self.r = {}
	self.lora_alpha = {}
	self.scaling = {}
	self.lora_dropout = nn.ModuleDict({})
	self.lora_A = nn.ModuleDict({})
	self.lora_B = nn.ModuleDict({})
	<span class="hljs-comment"># For Embedding layer</span>
	self.lora_embedding_A = nn.ParameterDict({})
	self.lora_embedding_B = nn.ParameterDict({})
	<span class="hljs-comment"># Mark the weight as unmerged</span>
	self._disable_adapters = <span class="hljs-literal">False</span>
	self.merged_adapters = []
	self.use_dora: <span class="hljs-built_in">dict</span>[<span class="hljs-built_in">str</span>, <span class="hljs-built_in">bool</span>] = {}
	self.lora_magnitude_vector: <span class="hljs-type">Optional</span>[torch.nn.ParameterDict] = <span class="hljs-literal">None</span> <span class="hljs-comment"># for DoRA</span>
	self._caches: <span class="hljs-built_in">dict</span>[<span class="hljs-built_in">str</span>, <span class="hljs-type">Any</span>] = {}
	self.kwargs = kwargs<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bnpl7i">In the <code>__init__</code> code used by all <code>LoraLayer</code> classes in PEFT, there are a bunch of parameters used to initialize the model, but only a few are relevant for the checkpoint file: <code>lora_A</code>, <code>lora_B</code>, <code>lora_embedding_A</code>, and <code>lora_embedding_B</code>. These parameters are listed in the class attribute <code>adapter_layer_names</code> and contain the learnable parameters, so they must be included in the checkpoint file. All the other parameters, like the rank <code>r</code>, are derived from the <code>adapter_config.json</code> and must be included there (unless the default value is used).</p> <p data-svelte-h="svelte-my5lpq">Let’s check the <code>state_dict</code> of a PEFT LoRA model applied to BERT. When printing the first five keys using the default LoRA settings (the remaining keys are the same, just with different layer numbers), we get:</p> <ul data-svelte-h="svelte-18g5t5x"><li><code>base_model.model.encoder.layer.0.attention.self.query.lora_A.weight</code></li> <li><code>base_model.model.encoder.layer.0.attention.self.query.lora_B.weight</code></li> <li><code>base_model.model.encoder.layer.0.attention.self.value.lora_A.weight</code></li> <li><code>base_model.model.encoder.layer.0.attention.self.value.lora_B.weight</code></li> <li><code>base_model.model.encoder.layer.1.attention.self.query.lora_A.weight</code></li> <li>etc.</li></ul> <p data-svelte-h="svelte-1cey2tw">Let’s break this down:</p> <ul data-svelte-h="svelte-12zgb0p"><li>By default, for BERT models, LoRA is applied to the <code>query</code> and <code>value</code> layers of the attention module. This is why you see <code>attention.self.query</code> and <code>attention.self.value</code> in the key names for each layer.</li> <li>LoRA decomposes the weights into two low-rank matrices, <code>lora_A</code> and <code>lora_B</code>. This is where <code>lora_A</code> and <code>lora_B</code> come from in the key names.</li> <li>These LoRA matrices are implemented as <code>nn.Linear</code> layers, so the parameters are stored in the <code>.weight</code> attribute (<code>lora_A.weight</code>, <code>lora_B.weight</code>).</li> <li>By default, LoRA isn’t applied to BERT’s embedding layer, so there are <em>no entries</em> for <code>lora_A_embedding</code> and <code>lora_B_embedding</code>.</li> <li>The keys of the <code>state_dict</code> always start with <code>"base_model.model."</code>. The reason is that, in PEFT, we wrap the base model inside a tuner-specific model (<code>LoraModel</code> in this case), which itself is wrapped in a general PEFT model (<code>PeftModel</code>). For this reason, these two prefixes are added to the keys. When converting to the PEFT format, it is required to add these prefixes.</li></ul> <blockquote class="tip" data-svelte-h="svelte-xj9vfo"><p>This last point is not true for prefix tuning techniques like prompt tuning. There, the extra embeddings are directly stored in the <code>state_dict</code> without any prefixes added to the keys.</p></blockquote> <p data-svelte-h="svelte-1g2h6ud">When inspecting the parameter names in the loaded model, you might be surprised to find that they look a bit different, e.g. <code>base_model.model.encoder.layer.0.attention.self.query.lora_A.default.weight</code>. The difference is the <em><code>.default</code></em> part in the second to last segment. This part exists because PEFT generally allows the addition of multiple adapters at once (using an <code>nn.ModuleDict</code> or <code>nn.ParameterDict</code> to store them). For example, if you add another adapter called “other”, the key for that adapter would be <code>base_model.model.encoder.layer.0.attention.self.query.lora_A.other.weight</code>.</p> <p data-svelte-h="svelte-135ci5j">When you call <a href="/docs/peft/pr_3206/en/package_reference/peft_model#peft.PeftModel.save_pretrained">save_pretrained()</a>, the adapter name is stripped from the keys. The reason is that the adapter name is not an important part of the model architecture; it is just an arbitrary name. When loading the adapter, you could choose a totally different name, and the model would still work the same way. This is why the adapter name is not stored in the checkpoint file.</p> <blockquote class="tip" data-svelte-h="svelte-a8hmi4"><p>If you call <code>save_pretrained("some/path")</code> and the adapter name is not <code>"default"</code>, the adapter is stored in a sub-directory with the same name as the adapter. So if the name is “other”, it would be stored inside of <code>some/path/other</code>.</p></blockquote> <p data-svelte-h="svelte-1bo4lop">In some circumstances, deciding which values to add to the checkpoint file can become a bit more complicated. For example, in PEFT, DoRA is implemented as a special case of LoRA. If you want to convert a DoRA model to PEFT, you should create a LoRA checkpoint with extra entries for DoRA. You can see this in the <code>__init__</code> of the previous <code>LoraLayer</code> code:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->self.lora_magnitude_vector: <span class="hljs-type">Optional</span>[torch.nn.ParameterDict] = <span class="hljs-literal">None</span> <span class="hljs-comment"># for DoRA</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-34pnf6">This indicates that there is an optional extra parameter per layer for DoRA.</p> <h3 class="relative group"><a id="adapterconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#adapterconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>adapter_config</span></h3> <p data-svelte-h="svelte-6fxao8">All the other information needed to load a PEFT model is contained in the <code>adapter_config.json</code> file. Let’s check this file for a LoRA model applied to BERT:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
	<span class="hljs-attr">"alpha_pattern"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span><span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"auto_mapping"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span>
	<span class="hljs-attr">"base_model_class"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"BertModel"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"parent_library"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"transformers.models.bert.modeling_bert"</span>
	<span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"base_model_name_or_path"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"bert-base-uncased"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"bias"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"none"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"fan_in_fan_out"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"inference_mode"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"init_lora_weights"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"layer_replication"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"layers_pattern"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"layers_to_transform"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"loftq_config"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span><span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"lora_alpha"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">8</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"lora_dropout"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"megatron_config"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"megatron_core"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"megatron.core"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"modules_to_save"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"peft_type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"LORA"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"r"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">8</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"rank_pattern"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span><span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"revision"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"target_modules"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
	<span class="hljs-string">"query"</span><span class="hljs-punctuation">,</span>
	<span class="hljs-string">"value"</span>
	<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"task_type"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"use_dora"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"use_rslora"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span>
	<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6hfyn3">This contains a lot of entries, and at first glance, it could feel overwhelming to figure out all the right values to put in there. However, most of the entries are not necessary to load the model. This is either because they use the default values and don’t need to be added or because they only affect the initialization of the LoRA weights, which is irrelevant when it comes to loading the model. If you find that you don’t know what a specific parameter does, e.g., <code>"use_rslora",</code> don’t add it, and you should be fine. Also note that as more options are added, this file will get more entries in the future, but it should be backward compatible.</p> <p data-svelte-h="svelte-190wzyy">At the minimum, you should include the following entries:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
	<span class="hljs-attr">"target_modules"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">"query"</span><span class="hljs-punctuation">,</span> <span class="hljs-string">"value"</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
	<span class="hljs-attr">"peft_type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"LORA"</span>
	<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19ijgni">However, adding as many entries as possible, like the rank <code>r</code> or the <code>base_model_name_or_path</code> (if it’s a Transformers model) is recommended. This information can help others understand the model better and share it more easily. To check which keys and values are expected, check out the <a href="https://github.com/huggingface/peft/blob/main/src/peft/tuners/lora/config.py" rel="nofollow">config.py</a> file (as an example, this is the config file for LoRA) in the PEFT source code.</p> <h2 class="relative group"><a id="model-storage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-storage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model storage</span></h2> <p data-svelte-h="svelte-1bwnzmb">In some circumstances, you might want to store the whole PEFT model, including the base weights. This can be necessary if, for instance, the base model is not available to the users trying to load the PEFT model. You can merge the weights first or convert it into a Transformer model.</p> <h3 class="relative group"><a id="merge-the-weights" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#merge-the-weights"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Merge the weights</span></h3> <p data-svelte-h="svelte-1k0zco1">The most straightforward way to store the whole PEFT model is to merge the adapter weights into the base weights:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->merged_model = model.merge_and_unload()
	merged_model.save_pretrained(...)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-scpch6">There are some disadvantages to this approach, though:</p> <ul data-svelte-h="svelte-xzb47x"><li>Once <a href="/docs/peft/pr_3206/en/package_reference/tuners#peft.tuners.tuners_utils.BaseTuner.merge_and_unload">merge_and_unload()</a> is called, you get a basic model without any PEFT-specific functionality. This means you can’t use any of the PEFT-specific methods anymore.</li> <li>You cannot unmerge the weights, load multiple adapters at once, disable the adapter, etc.</li> <li>Not all PEFT methods support merging weights.</li> <li>Some PEFT methods may generally allow merging, but not with specific settings (e.g. when using certain quantization techniques).</li> <li>The whole model will be much larger than the PEFT model, as it will contain all the base weights as well.</li></ul> <p data-svelte-h="svelte-ohejod">But inference with a merged model should be a bit faster.</p> <h3 class="relative group"><a id="convert-to-a-transformers-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#convert-to-a-transformers-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Convert to a Transformers model</span></h3> <p data-svelte-h="svelte-lzrdax">Another way to save the whole model, assuming the base model is a Transformers model, is to use this hacky approach to directly insert the PEFT weights into the base model and save it, which only works if you “trick” Transformers into believing the PEFT model is not a PEFT model. This only works with LoRA because other adapters are not implemented in Transformers.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model = ... <span class="hljs-comment"># the PEFT model</span>
	...
	<span class="hljs-comment"># after you finish training the model, save it in a temporary location</span>
	model.save_pretrained(<temp_location>)
	<span class="hljs-comment"># now load this model directly into a transformers model, without the PEFT wrapper</span>
	<span class="hljs-comment"># the PEFT weights are directly injected into the base model</span>
	model_loaded = AutoModel.from_pretrained(<temp_location>)
	<span class="hljs-comment"># now make the loaded model believe that it is _not_ a PEFT model</span>
	model_loaded._hf_peft_config_loaded = <span class="hljs-literal">False</span>
	<span class="hljs-comment"># now when we save it, it will save the whole model</span>
	model_loaded.save_pretrained(<final_location>)
	<span class="hljs-comment"># or upload to Hugging Face Hub</span>
	model_loaded.push_to_hub(<final_location>)<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/peft/blob/main/docs/source/developer_guides/checkpoint.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_1pyekj7 = {
	assets: "/docs/peft/pr_3206/en",
	base: "/docs/peft/pr_3206/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/peft/pr_3206/en/_app/immutable/entry/start.06e1cdbd.js"),
	import("/docs/peft/pr_3206/en/_app/immutable/entry/app.cb2a6689.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 8],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 49.3 kB
Xet hash:: c71e02b63b22262caf0a06e21b42be8bd488d6ccf15f1f8d3a359c82db05eb11

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.