Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Troubleshooting","local":"troubleshooting","sections":[{"title":"Examples don’t work","local":"examples-dont-work","sections":[],"depth":2},{"title":"Dtype-related issues","local":"dtype-related-issues","sections":[{"title":"ValueError: Attempting to unscale FP16 gradients","local":"valueerror-attempting-to-unscale-fp16-gradients","sections":[],"depth":3},{"title":"Selecting the dtype of the adapter","local":"selecting-the-dtype-of-the-adapter","sections":[],"depth":3}],"depth":2},{"title":"Bad results from a loaded PEFT model","local":"bad-results-from-a-loaded-peft-model","sections":[{"title":"Random deviations","local":"random-deviations","sections":[],"depth":3},{"title":"Incorrectly loaded model","local":"incorrectly-loaded-model","sections":[],"depth":3},{"title":"Randomly initialized layers","local":"randomly-initialized-layers","sections":[],"depth":3},{"title":"Extending the vocabulary","local":"extending-the-vocabulary","sections":[{"title":"Using trainable tokens","local":"using-trainable-tokens","sections":[],"depth":4},{"title":"Using an adapter, e.g. LoRA","local":"using-an-adapter-eg-lora","sections":[],"depth":4},{"title":"Full fine-tuning","local":"full-fine-tuning","sections":[],"depth":4}],"depth":3},{"title":"Getting a warning about “weights not being initialized from the model checkpoint”","local":"getting-a-warning-about-weights-not-being-initialized-from-the-model-checkpoint","sections":[],"depth":3},{"title":"Check layer and model status","local":"check-layer-and-model-status","sections":[],"depth":3}],"depth":2},{"title":"Speed","local":"speed","sections":[{"title":"Loading adapter weights is slow","local":"loading-adapter-weights-is-slow","sections":[],"depth":3}],"depth":2},{"title":"Reproducibility","local":"reproducibility","sections":[{"title":"Models using batch norm","local":"models-using-batch-norm","sections":[],"depth":3}],"depth":2},{"title":"Version mismatch","local":"version-mismatch","sections":[{"title":"Error while loading the config because of an unexpected keyword argument","local":"error-while-loading-the-config-because-of-an-unexpected-keyword-argument","sections":[],"depth":3}],"depth":2},{"title":"Adapter handling","local":"adapter-handling","sections":[{"title":"Using multiple adapters at the same time","local":"using-multiple-adapters-at-the-same-time","sections":[],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/peft/pr_3205/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/entry/start.6e4f7154.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/scheduler.78382b47.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/singletons.fa874020.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/index.fadd215c.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/paths.ca1902b3.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/entry/app.968b774b.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/preload-helper.db85a81e.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/index.6dd35eb6.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/nodes/0.02e4cb39.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/nodes/17.a2e3d949.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.184d38d3.js"> | |
| <link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/CodeBlock.7e3c9fac.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Troubleshooting","local":"troubleshooting","sections":[{"title":"Examples don’t work","local":"examples-dont-work","sections":[],"depth":2},{"title":"Dtype-related issues","local":"dtype-related-issues","sections":[{"title":"ValueError: Attempting to unscale FP16 gradients","local":"valueerror-attempting-to-unscale-fp16-gradients","sections":[],"depth":3},{"title":"Selecting the dtype of the adapter","local":"selecting-the-dtype-of-the-adapter","sections":[],"depth":3}],"depth":2},{"title":"Bad results from a loaded PEFT model","local":"bad-results-from-a-loaded-peft-model","sections":[{"title":"Random deviations","local":"random-deviations","sections":[],"depth":3},{"title":"Incorrectly loaded model","local":"incorrectly-loaded-model","sections":[],"depth":3},{"title":"Randomly initialized layers","local":"randomly-initialized-layers","sections":[],"depth":3},{"title":"Extending the vocabulary","local":"extending-the-vocabulary","sections":[{"title":"Using trainable tokens","local":"using-trainable-tokens","sections":[],"depth":4},{"title":"Using an adapter, e.g. LoRA","local":"using-an-adapter-eg-lora","sections":[],"depth":4},{"title":"Full fine-tuning","local":"full-fine-tuning","sections":[],"depth":4}],"depth":3},{"title":"Getting a warning about “weights not being initialized from the model checkpoint”","local":"getting-a-warning-about-weights-not-being-initialized-from-the-model-checkpoint","sections":[],"depth":3},{"title":"Check layer and model status","local":"check-layer-and-model-status","sections":[],"depth":3}],"depth":2},{"title":"Speed","local":"speed","sections":[{"title":"Loading adapter weights is slow","local":"loading-adapter-weights-is-slow","sections":[],"depth":3}],"depth":2},{"title":"Reproducibility","local":"reproducibility","sections":[{"title":"Models using batch norm","local":"models-using-batch-norm","sections":[],"depth":3}],"depth":2},{"title":"Version mismatch","local":"version-mismatch","sections":[{"title":"Error while loading the config because of an unexpected keyword argument","local":"error-while-loading-the-config-because-of-an-unexpected-keyword-argument","sections":[],"depth":3}],"depth":2},{"title":"Adapter handling","local":"adapter-handling","sections":[{"title":"Using multiple adapters at the same time","local":"using-multiple-adapters-at-the-same-time","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="troubleshooting" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#troubleshooting"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Troubleshooting</span></h1> <p data-svelte-h="svelte-wwms7a">If you encounter any issue when using PEFT, please check the following list of common issues and their solutions.</p> <h2 class="relative group"><a id="examples-dont-work" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#examples-dont-work"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Examples don’t work</span></h2> <p data-svelte-h="svelte-18nlz3r">Examples often rely on the most recent package versions, so please ensure they’re up-to-date. In particular, check the following package versions:</p> <ul data-svelte-h="svelte-ccv654"><li><code>peft</code></li> <li><code>transformers</code></li> <li><code>accelerate</code></li> <li><code>torch</code></li></ul> <p data-svelte-h="svelte-1cuqjyb">In general, you can update the package version by running this command inside your Python environment:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m pip install -U <package_name><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-h5ykrw">Installing PEFT from source is useful for keeping up with the latest developments:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m pip install git+https://github.com/huggingface/peft<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="dtype-related-issues" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dtype-related-issues"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dtype-related issues</span></h2> <h3 class="relative group"><a id="valueerror-attempting-to-unscale-fp16-gradients" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#valueerror-attempting-to-unscale-fp16-gradients"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ValueError: Attempting to unscale FP16 gradients</span></h3> <p data-svelte-h="svelte-ic0zsn">This error probably occurred because the model was loaded with <code>dtype=torch.float16</code> and then used in an automatic mixed precision (AMP) context, e.g. by setting <code>fp16=True</code> in the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer" rel="nofollow">Trainer</a> class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->peft_model = get_peft_model(...) | |
| <span class="hljs-comment"># add this:</span> | |
| <span class="hljs-keyword">for</span> param <span class="hljs-keyword">in</span> model.parameters(): | |
| <span class="hljs-keyword">if</span> param.requires_grad: | |
| param.data = param.data.<span class="hljs-built_in">float</span>() | |
| <span class="hljs-comment"># proceed as usual</span> | |
| trainer = Trainer(model=peft_model, fp16=<span class="hljs-literal">True</span>, ...) | |
| trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13fukq1">Alternatively, you can use the <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.cast_mixed_precision_params">cast_mixed_precision_params()</a> function to correctly cast the weights:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> cast_mixed_precision_params | |
| peft_model = get_peft_model(...) | |
| cast_mixed_precision_params(peft_model, dtype=torch.float16) | |
| <span class="hljs-comment"># proceed as usual</span> | |
| trainer = Trainer(model=peft_model, fp16=<span class="hljs-literal">True</span>, ...) | |
| trainer.train()<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-zy19bd"><p>Starting from PEFT version v0.12.0, PEFT automatically promotes the dtype of adapter weights from <code>torch.float16</code> and <code>torch.bfloat16</code> to <code>torch.float32</code> where appropriate. To <em>prevent</em> this behavior, you can pass <code>autocast_adapter_dtype=False</code> to <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.get_peft_model">~get_peft_model()</a>, to <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.from_pretrained">from_pretrained()</a>, and to <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.load_adapter">load_adapter()</a>.</p></blockquote> <h3 class="relative group"><a id="selecting-the-dtype-of-the-adapter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#selecting-the-dtype-of-the-adapter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Selecting the dtype of the adapter</span></h3> <p data-svelte-h="svelte-1w3ww7i">Most PEFT methods, like LoRA, work by adding trainable adapter weights. By default, those weights are stored in float32 dtype (fp32), i.e. at a relatively high precision. Therefore, even if the base model is loaded in float16 (fp16) or bfloat16 (bf16), the adapter weights are float32. When the adapter results are calculated during the forward pass, the input will typically be in the dtype of the base model, thus it will be upcast to float32 if necessary, then cast back to the original dtype.</p> <p data-svelte-h="svelte-170pwa5">If you prefer to have the adapter weights in the lower precision of the base model, i.e. in float16 or bfloat16, you can pass <code>autocast_adapter_dtype=False</code> when creating the model (<a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.get_peft_model">~get_peft_model()</a>) or loading the model (<a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.from_pretrained">from_pretrained()</a>). There are some advantages and disadvantages to this:</p> <p data-svelte-h="svelte-3n96y7">Advantages of half precision adapter:</p> <ul data-svelte-h="svelte-1ghjkcl"><li>computation slightly faster</li> <li>slightly less memory</li> <li>smaller file size of checkpoint (half the size)</li></ul> <p data-svelte-h="svelte-1yqgo19">Disadvantages of half precision adapter:</p> <ul data-svelte-h="svelte-pdypmd"><li>slightly worse loss</li> <li>higher risk of overflow or underflow</li></ul> <p data-svelte-h="svelte-1gmj2pd">Note that for most use cases, overall runtime and memory cost will be determined by the size of the base model and by the dataset, while the dtype of the PEFT adapter will only have a small impact.</p> <h2 class="relative group"><a id="bad-results-from-a-loaded-peft-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bad-results-from-a-loaded-peft-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Bad results from a loaded PEFT model</span></h2> <p data-svelte-h="svelte-ns8zue">There can be several reasons for getting a poor result from a loaded PEFT model which are listed below. If you’re still unable to troubleshoot the problem, see if anyone else had a similar <a href="https://github.com/huggingface/peft/issues" rel="nofollow">issue</a> on GitHub, and if you can’t find any, open a new issue.</p> <p data-svelte-h="svelte-lfc2ft">When opening an issue, it helps a lot if you provide a minimal code example that reproduces the issue. Also, please report if the loaded model performs at the same level as the model did before fine-tuning, if it performs at a random level, or if it is only slightly worse than expected. This information helps us identify the problem more quickly.</p> <h3 class="relative group"><a id="random-deviations" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#random-deviations"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Random deviations</span></h3> <p data-svelte-h="svelte-16feh">If your model outputs are not exactly the same as previous runs, there could be an issue with random elements. For example:</p> <ol data-svelte-h="svelte-1fvhlks"><li>please ensure it is in <code>.eval()</code> mode, which is important, for instance, if the model uses dropout</li> <li>if you use <a href="https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate" rel="nofollow">generate</a> on a language model, there could be random sampling, so obtaining the same result requires setting a random seed</li> <li>if you used quantization and merged the weights, small deviations are expected due to rounding errors</li></ol> <h3 class="relative group"><a id="incorrectly-loaded-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#incorrectly-loaded-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Incorrectly loaded model</span></h3> <p data-svelte-h="svelte-1y8gnlx">Please ensure that you load the model correctly. A common error is trying to load a <em>trained</em> model with <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.get_peft_model">get_peft_model()</a> which is incorrect. Instead, the loading code should look like this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel, PeftConfig | |
| base_model = ... <span class="hljs-comment"># to load the base model, use the same code as when you trained it</span> | |
| config = PeftConfig.from_pretrained(peft_model_id) | |
| peft_model = PeftModel.from_pretrained(base_model, peft_model_id)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="randomly-initialized-layers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#randomly-initialized-layers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Randomly initialized layers</span></h3> <p data-svelte-h="svelte-5heszj">For some tasks, it is important to correctly configure <code>modules_to_save</code> in the config to account for randomly initialized layers.</p> <p data-svelte-h="svelte-c4824l">As an example, this is necessary if you use LoRA to fine-tune a language model for sequence classification because 🤗 Transformers adds a randomly initialized classification head on top of the model. If you do not add this layer to <code>modules_to_save</code>, the classification head won’t be saved. The next time you load the model, you’ll get a <em>different</em> randomly initialized classification head, resulting in completely different results.</p> <p data-svelte-h="svelte-1ou3c35">PEFT tries to correctly guess the <code>modules_to_save</code> if you provide the <code>task_type</code> argument in the config. This should work for transformers models that follow the standard naming scheme. It is always a good idea to double check though because we can’t guarantee all models follow the naming scheme.</p> <p data-svelte-h="svelte-1g4txs2">When you load a transformers model that has randomly initialized layers, you should see a warning along the lines of:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">Some</span> weights <span class="hljs-keyword">of</span> <MODEL> were <span class="hljs-keyword">not</span> initialized <span class="hljs-keyword">from</span> the model <span class="hljs-keyword">checkpoint</span> at <ID> <span class="hljs-keyword">and</span> are newly initialized: [<LAYER_NAMES>]. | |
| You should probably TRAIN this model <span class="hljs-keyword">on</span> a down-stream task <span class="hljs-keyword">to</span> be able <span class="hljs-keyword">to</span> use it <span class="hljs-keyword">for</span> predictions <span class="hljs-keyword">and</span> inference.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1fbeum6">The mentioned layers should be added to <code>modules_to_save</code> in the config to avoid the described problem.</p> <blockquote class="tip" data-svelte-h="svelte-lmc43w"><p>As an example, when loading a model that is using the DeBERTa architecture for sequence classification, you’ll see a warning that the following weights are newly initialized: <code>['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']</code>. From this, it follows that the <code>classifier</code> and <code>pooler</code> layers should be added to: <code>modules_to_save=["classifier", "pooler"]</code>.</p></blockquote> <h3 class="relative group"><a id="extending-the-vocabulary" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#extending-the-vocabulary"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Extending the vocabulary</span></h3> <p data-svelte-h="svelte-1ftgm9w">For many language fine-tuning tasks, extending the model’s vocabulary is necessary since new tokens are being introduced. This requires extending the embedding layer to account for the new tokens and, depending on the fine-tuning method, also storing the embedding layer in addition to the adapter weights when saving the adapter. There are a few ways of achieving this ordered by parameter effectiveness:</p> <ul data-svelte-h="svelte-1cu50cp"><li><a href="../package_reference/trainable_tokens">trainable tokens</a>, train only the specified tokens, optionally store only the updated values</li> <li>training an adapter on the embedding matrix, optionally store only the updated values</li> <li>full-finetuning of the embedding layer</li></ul> <h4 class="relative group"><a id="using-trainable-tokens" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-trainable-tokens"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using trainable tokens</span></h4> <p data-svelte-h="svelte-p4bcqv">Let’s start with trainable tokens, in this case its <a href="../developer_guides/lora#efficiently-train-tokens-alongside-lora">LoRA integration</a>. If you’re interested in only training the new embeddings and nothing else, refer to the <a href="../package_reference/trainable_tokens">standalone documentation</a>.</p> <p data-svelte-h="svelte-ffklcx">To enable selective token training of the embedding layer, you’ll need to supply the token ids of your newly added tokens via the <code>trainable_token_indices</code> parameter. Optionally you can specify which layer to target if there is more than one embedding layer. For a Mistral model this could look like this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->new_tokens = [<span class="hljs-string">'<think>'</span>, <span class="hljs-string">'</think>'</span>] | |
| tokenizer.add_tokens(new_tokens) | |
| base_model.resize_token_embeddings(<span class="hljs-built_in">len</span>(tokenizer)) | |
| lora_config = LoraConfig( | |
| ..., | |
| trainable_token_indices={<span class="hljs-string">'embed_tokens'</span>: tokenizer.convert_tokens_to_ids(new_tokens)}, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12n0r5k">If your model uses tied weights (such as the <code>lm_head</code>), trainable tokens will try to resolve those and keep them updated as well, so in that case there should be no need for adding <code>modules_to_save=["lm_head"]</code>. This only works if the model uses the Transformers convention for tying weights.</p> <p data-svelte-h="svelte-1r5fwm9">Saving the model with <code>model.save_pretrained</code> may save the full embedding matrix instead of | |
| only the difference as a precaution because the embedding matrix was resized. To save space you can disable this behavior by setting <code>save_embedding_layers=False</code> when calling <code>save_pretrained</code>. This is safe to do as long as you don’t modify the embedding matrix through other means as well, as such changes will be not tracked by trainable tokens.</p> <h4 class="relative group"><a id="using-an-adapter-eg-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-an-adapter-eg-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using an adapter, e.g. LoRA</span></h4> <p data-svelte-h="svelte-1bq6qvj">Prepare the embedding layer by adding it to the <code>target_modules</code> of your adapter config. For example, the Mistral config could look like this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->config = LoraConfig(..., target_modules=[<span class="hljs-string">"embed_tokens"</span>, <span class="hljs-string">"lm_head"</span>, <span class="hljs-string">"q_proj"</span>, <span class="hljs-string">"v_proj"</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1lr155g">Once added to <code>target_modules</code>, PEFT automatically stores the embedding layer when saving the adapter if the model has the <code>get_input_embeddings</code> and <code>get_output_embeddings</code>. This is generally the case for Transformers models.</p> <p data-svelte-h="svelte-1kaw5hl">If the model’s embedding layer doesn’t follow the Transformer’s naming scheme but nevertheless implements <code>get_input_embeddings</code>, you can still save it by manually passing <code>save_embedding_layers=True</code> when saving the adapter:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model = get_peft_model(...) | |
| <span class="hljs-comment"># train the model</span> | |
| model.save_pretrained(<span class="hljs-string">"my_adapter"</span>, save_embedding_layers=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-nkv7o7">For inference, load the base model first and resize it the same way you did before you trained the model. After you’ve resized the base model, you can load the PEFT checkpoint.</p> <p data-svelte-h="svelte-a0ibfa">For a complete example, please check out <a href="https://github.com/huggingface/peft/blob/main/examples/causal_language_modeling/peft_lora_clm_with_additional_tokens.ipynb" rel="nofollow">this notebook</a>.</p> <h4 class="relative group"><a id="full-fine-tuning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#full-fine-tuning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Full fine-tuning</span></h4> <p data-svelte-h="svelte-u2918x">Full fine-tuning is more costly in terms of VRAM or storage space but if all else fails, you can fall back to this and see if it works for you. Achieve it by adding the name of the embedding layer to <code>modules_to_save</code>. Note that you need to add tied layers as well, e.g. <code>lm_head</code>. Example for a Mistral model with LoRA:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->config = LoraConfig(..., modules_to_save=[<span class="hljs-string">"embed_tokens"</span>, <span class="hljs-string">"lm_head"</span>], target_modules=[<span class="hljs-string">"q_proj"</span>, <span class="hljs-string">"v_proj"</span>])<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="getting-a-warning-about-weights-not-being-initialized-from-the-model-checkpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#getting-a-warning-about-weights-not-being-initialized-from-the-model-checkpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Getting a warning about “weights not being initialized from the model checkpoint”</span></h3> <p data-svelte-h="svelte-q2uzbp">When you load your PEFT model which has been trained on a task (for example, classification), you may get a warning like:</p> <blockquote data-svelte-h="svelte-ray7nl"><p>Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: [‘score.weight’]. You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.</p></blockquote> <p data-svelte-h="svelte-1cejahg">Although this looks scary, it is most likely nothing to worry about. This warning comes from Transformers, and it isn’t a PEFT specific warning. It lets you know that a randomly initialized classification head (<code>score</code>) is attached to the base model, and the head must be trained to produce sensible predictions.</p> <p data-svelte-h="svelte-m24cow">When you get this warning <em>before</em> training the model, PEFT automatically takes care of making the classification head trainable if you correctly passed the <code>task_type</code> argument to the PEFT config.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, TaskType | |
| lora_config = LoraConfig(..., task_type=TaskType.SEQ_CLS)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1c6v178">If your classification head does not follow the usual naming conventions from Transformers (which is rare), you have to explicitly tell PEFT the name of the head in <code>modules_to_save</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->lora_config = LoraConfig(..., modules_to_save=[<span class="hljs-string">"name-of-classification-head"</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-f59lgd">To check the name of the classification head, print the model and it should be the last module.</p> <p data-svelte-h="svelte-k7fvy1">If you get this warning from your inference code, i.e. <em>after</em> training the model, when you load the PEFT model, you always have to load the Transformers model first. Since Transformers does not know that you will load PEFT weights afterwards, it still gives the warning.</p> <p data-svelte-h="svelte-16spx00">As always, it is best practice to ensure the model works correctly for inference by running some validation on it.</p> <h3 class="relative group"><a id="check-layer-and-model-status" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#check-layer-and-model-status"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Check layer and model status</span></h3> <p data-svelte-h="svelte-8fndjb">Sometimes a PEFT model can end up in a bad state, especially when handling multiple adapters. There can be some confusion around what adapters exist, which one is active, which one is merged, etc. To help investigate this issue, call the <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.get_layer_status">get_layer_status()</a> and the <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.get_model_status">get_model_status()</a> methods.</p> <p data-svelte-h="svelte-1qvbi8f">The <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.get_layer_status">get_layer_status()</a> method gives you a detailed overview of each targeted layer’s active, merged, and available adapters.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> get_peft_model, LoraConfig | |
| <span class="hljs-meta">>>> </span>model_id = <span class="hljs-string">"google/flan-t5-small"</span> | |
| <span class="hljs-meta">>>> </span>model = AutoModel.from_pretrained(model_id) | |
| <span class="hljs-meta">>>> </span>model = get_peft_model(model, LoraConfig()) | |
| <span class="hljs-meta">>>> </span>model.get_layer_status() | |
| [TunerLayerStatus(name=<span class="hljs-string">'model.encoder.block.0.layer.0.SelfAttention.q'</span>, | |
| module_type=<span class="hljs-string">'lora.Linear'</span>, | |
| enabled=<span class="hljs-literal">True</span>, | |
| active_adapters=[<span class="hljs-string">'default'</span>], | |
| merged_adapters=[], | |
| requires_grad={<span class="hljs-string">'default'</span>: <span class="hljs-literal">True</span>}, | |
| available_adapters=[<span class="hljs-string">'default'</span>]), | |
| TunerLayerStatus(name=<span class="hljs-string">'model.encoder.block.0.layer.0.SelfAttention.v'</span>, | |
| module_type=<span class="hljs-string">'lora.Linear'</span>, | |
| enabled=<span class="hljs-literal">True</span>, | |
| active_adapters=[<span class="hljs-string">'default'</span>], | |
| merged_adapters=[], | |
| requires_grad={<span class="hljs-string">'default'</span>: <span class="hljs-literal">True</span>}, | |
| available_adapters=[<span class="hljs-string">'default'</span>]), | |
| ...] | |
| <span class="hljs-meta">>>> </span>model.get_model_status() | |
| TunerModelStatus( | |
| base_model_type=<span class="hljs-string">'T5Model'</span>, | |
| adapter_model_type=<span class="hljs-string">'LoraModel'</span>, | |
| peft_types={<span class="hljs-string">'default'</span>: <span class="hljs-string">'LORA'</span>}, | |
| trainable_params=<span class="hljs-number">344064</span>, | |
| total_params=<span class="hljs-number">60855680</span>, | |
| num_adapter_layers=<span class="hljs-number">48</span>, | |
| enabled=<span class="hljs-literal">True</span>, | |
| active_adapters=[<span class="hljs-string">'default'</span>], | |
| merged_adapters=[], | |
| requires_grad={<span class="hljs-string">'default'</span>: <span class="hljs-literal">True</span>}, | |
| available_adapters=[<span class="hljs-string">'default'</span>], | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1gb1r7d">In the model state output, you should look out for entries that say <code>"irregular"</code>. This means PEFT detected an inconsistent state in the model. For instance, if <code>merged_adapters="irregular"</code>, it means that for at least one adapter, it was merged on some target modules but not on others. The inference results will most likely be incorrect as a result.</p> <p data-svelte-h="svelte-19sfzdf">The best way to resolve this issue is to reload the whole model and adapter checkpoint(s). Ensure that you don’t perform any incorrect operations on the model, e.g. manually merging adapters on some modules but not others.</p> <p data-svelte-h="svelte-18r4lni">Convert the layer status into a pandas <code>DataFrame</code> for an easier visual inspection.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> dataclasses <span class="hljs-keyword">import</span> asdict | |
| <span class="hljs-keyword">import</span> pandas <span class="hljs-keyword">as</span> pd | |
| df = pd.DataFrame(asdict(layer) <span class="hljs-keyword">for</span> layer <span class="hljs-keyword">in</span> model.get_layer_status())<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-17rnlhh">It is possible to get this information for non-PEFT models if they are using PEFT layers under the hood, but some information like the <code>base_model_type</code> or the <code>peft_types</code> cannot be determined in that case. As an example, you can call this on a <a href="https://huggingface.co/docs/diffusers/index" rel="nofollow">diffusers</a> model like so:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> get_model_status, get_layer_status | |
| <span class="hljs-meta">>>> </span>path = <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-meta">>>> </span>lora_id = <span class="hljs-string">"takuma104/lora-test-text-encoder-lora-target"</span> | |
| <span class="hljs-meta">>>> </span>pipe = StableDiffusionPipeline.from_pretrained(path, dtype=torch.float16) | |
| <span class="hljs-meta">>>> </span>pipe.load_lora_weights(lora_id, adapter_name=<span class="hljs-string">"adapter-1"</span>) | |
| <span class="hljs-meta">>>> </span>pipe.load_lora_weights(lora_id, adapter_name=<span class="hljs-string">"adapter-2"</span>) | |
| <span class="hljs-meta">>>> </span>pipe.set_lora_device([<span class="hljs-string">"adapter-2"</span>], <span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-meta">>>> </span>get_layer_status(pipe.text_encoder) | |
| [TunerLayerStatus(name=<span class="hljs-string">'text_model.encoder.layers.0.self_attn.k_proj'</span>, | |
| module_type=<span class="hljs-string">'lora.Linear'</span>, | |
| enabled=<span class="hljs-literal">True</span>, | |
| active_adapters=[<span class="hljs-string">'adapter-2'</span>], | |
| merged_adapters=[], | |
| requires_grad={<span class="hljs-string">'adapter-1'</span>: <span class="hljs-literal">False</span>, <span class="hljs-string">'adapter-2'</span>: <span class="hljs-literal">True</span>}, | |
| available_adapters=[<span class="hljs-string">'adapter-1'</span>, <span class="hljs-string">'adapter-2'</span>], | |
| devices={<span class="hljs-string">'adapter-1'</span>: [<span class="hljs-string">'cpu'</span>], <span class="hljs-string">'adapter-2'</span>: [<span class="hljs-string">'cuda'</span>]}), | |
| TunerLayerStatus(name=<span class="hljs-string">'text_model.encoder.layers.0.self_attn.v_proj'</span>, | |
| module_type=<span class="hljs-string">'lora.Linear'</span>, | |
| enabled=<span class="hljs-literal">True</span>, | |
| active_adapters=[<span class="hljs-string">'adapter-2'</span>], | |
| merged_adapters=[], | |
| requires_grad={<span class="hljs-string">'adapter-1'</span>: <span class="hljs-literal">False</span>, <span class="hljs-string">'adapter-2'</span>: <span class="hljs-literal">True</span>}, | |
| devices={<span class="hljs-string">'adapter-1'</span>: [<span class="hljs-string">'cpu'</span>], <span class="hljs-string">'adapter-2'</span>: [<span class="hljs-string">'cuda'</span>]}), | |
| ...] | |
| <span class="hljs-meta">>>> </span>get_model_status(pipe.unet) | |
| TunerModelStatus( | |
| base_model_type=<span class="hljs-string">'other'</span>, | |
| adapter_model_type=<span class="hljs-string">'None'</span>, | |
| peft_types={}, | |
| trainable_params=<span class="hljs-number">797184</span>, | |
| total_params=<span class="hljs-number">861115332</span>, | |
| num_adapter_layers=<span class="hljs-number">128</span>, | |
| enabled=<span class="hljs-literal">True</span>, | |
| active_adapters=[<span class="hljs-string">'adapter-2'</span>], | |
| merged_adapters=[], | |
| requires_grad={<span class="hljs-string">'adapter-1'</span>: <span class="hljs-literal">False</span>, <span class="hljs-string">'adapter-2'</span>: <span class="hljs-literal">True</span>}, | |
| available_adapters=[<span class="hljs-string">'adapter-1'</span>, <span class="hljs-string">'adapter-2'</span>], | |
| devices={<span class="hljs-string">'adapter-1'</span>: [<span class="hljs-string">'cpu'</span>], <span class="hljs-string">'adapter-2'</span>: [<span class="hljs-string">'cuda'</span>]}, | |
| )<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="speed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#speed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Speed</span></h2> <h3 class="relative group"><a id="loading-adapter-weights-is-slow" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#loading-adapter-weights-is-slow"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Loading adapter weights is slow</span></h3> <p data-svelte-h="svelte-z2lslk">Loading adapters like LoRA weights should generally be fast compared to loading the base model. However, there can be use cases where the adapter weights are quite large or where users need to load a large number of adapters — the loading time can add up in this case. The reason for this is that the adapter weights are first initialized and then overridden by the loaded weights, which is wasteful. To speed up the loading time, you can pass the <code>low_cpu_mem_usage=True</code> argument to <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.from_pretrained">from_pretrained()</a> and <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.load_adapter">load_adapter()</a>.</p> <blockquote class="tip" data-svelte-h="svelte-gscfhd"><p>If this option works well across different use cases, it may become the default for adapter loading in the future.</p></blockquote> <h2 class="relative group"><a id="reproducibility" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#reproducibility"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Reproducibility</span></h2> <h3 class="relative group"><a id="models-using-batch-norm" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#models-using-batch-norm"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Models using batch norm</span></h3> <p data-svelte-h="svelte-yt67c7">When loading a trained PEFT model where the base model uses batch norm (e.g. <code>torch.nn.BatchNorm1d</code> or <code>torch.nn.BatchNorm2d</code>), you may find that you cannot reproduce the exact same outputs. This is because the batch norm layers keep track of running stats during training, but these stats are not part of the PEFT checkpoint. Therefore, when you load the PEFT model, the running stats of the base model will be used (i.e. from before training with PEFT).</p> <p data-svelte-h="svelte-wblxho">Depending on your use case, this may not be a big deal. If, however, you need your outputs to be 100% reproducible, you can achieve this by adding the batch norm layers to <code>modules_to_save</code>. Below is an example of this using resnet and LoRA. Notice that we set <code>modules_to_save=["classifier", "normalization"]</code>. We need the <code>"classifier"</code> argument because our task is image classification, and we add the <code>"normalization"</code> argument to ensure that the batch norm layers are saved in the PEFT checkpoint.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForImageClassification | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, get_peft_model | |
| model_id = <span class="hljs-string">"microsoft/resnet-18"</span> | |
| base_model = AutoModelForImageClassification.from_pretrained(self.model_id) | |
| config = LoraConfig( | |
| target_modules=[<span class="hljs-string">"convolution"</span>], | |
| modules_to_save=[<span class="hljs-string">"classifier"</span>, <span class="hljs-string">"normalization"</span>], | |
| ),<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1e3stb1">Depending on the type of model you use, the batch norm layers could have different names than <code>"normalization"</code>, so please ensure that the name matches your model architecture.</p> <h2 class="relative group"><a id="version-mismatch" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#version-mismatch"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Version mismatch</span></h2> <h3 class="relative group"><a id="error-while-loading-the-config-because-of-an-unexpected-keyword-argument" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#error-while-loading-the-config-because-of-an-unexpected-keyword-argument"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Error while loading the config because of an unexpected keyword argument</span></h3> <p data-svelte-h="svelte-5zz36e">When you encounter an error like the one shown below, it means the adapter you’re trying to load was trained with a more recent version of PEFT than the version you have installed on your system.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->TypeError: LoraConfig.__init__() got an unexpected keyword <span class="hljs-variable language_">argument</span> <<span class="hljs-variable language_">argument</span>-name><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1dqw1qx">The best way to resolve this issue is to install the latest PEFT version:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m pip install -U PEFT<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1k207nl">If the adapter was trained from a source install of PEFT (an unreleased version of PEFT), then you also need to install PEFT from source.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m pip install -U git+https://github.com/huggingface/peft.git<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jt6x31">If it is not possible for you to upgrade PEFT, there is a workaround you can try.</p> <p data-svelte-h="svelte-1a81pzz">Assume the error message says that the unknown keyword argument is named <code>foobar</code>. Search inside the <code>adapter_config.json</code> of this PEFT adapter for the <code>foobar</code> entry and delete it from the file. Then save the file and try loading the model again.</p> <p data-svelte-h="svelte-1wt4m5g">This solution works most of the time. As long as it is the default value for <code>foobar</code>, it can be ignored. However, when it is set to some other value, you will get incorrect results. Upgrading PEFT is the recommended solution.</p> <h2 class="relative group"><a id="adapter-handling" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#adapter-handling"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Adapter handling</span></h2> <h3 class="relative group"><a id="using-multiple-adapters-at-the-same-time" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-multiple-adapters-at-the-same-time"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using multiple adapters at the same time</span></h3> <p data-svelte-h="svelte-11stw7g">PEFT allows you to create more than one adapter on the same model. This can be useful in many situations. For example, for inference, you may want to serve two fine-tuned models from the same base model instead of loading the base model once for each fine-tuned model, which would cost more memory. However, multiple adapters can be activated at the same time. This way, the model may leverage the learnings from all those adapters at the same time. As an example, if you have a diffusion model, you may want to use one LoRA adapter to change the style and a different one to change the subject.</p> <p data-svelte-h="svelte-a9hnjs">Activating multiple adapters at the same time is generally possible on all PEFT methods (LoRA, LoHa, IA³, etc.) except for prompt learning methods (p-tuning, prefix tuning, etc.). The following example illustrates how to achieve this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel | |
| model_id = ... | |
| base_model = AutoModelForCausalLM.from_pretrained(model_id) | |
| model = PeftModel.from_pretrained(base_model, lora_path_0) <span class="hljs-comment"># default adapter_name is 'default'</span> | |
| model.load_adapter(lora_path_1, adapter_name=<span class="hljs-string">"other"</span>) | |
| <span class="hljs-comment"># the 'other' adapter was loaded but it's not active yet, so to activate both adapters:</span> | |
| model.base_model.set_adapter([<span class="hljs-string">"default"</span>, <span class="hljs-string">"other"</span>])<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-t4lclb"><p>In the example above, you can see that we need to call <code>model.base_model.set_adapter(["default", "other"])</code>. Why can we not call <code>model.set_adapter(["default", "other"])</code>? This is unfortunately not possible because, as explained earlier, some PEFT methods don’t support activating more than one adapter at a time.</p></blockquote> <p data-svelte-h="svelte-164a5vl">It is also possible to train two adapters at the same time, but you should be careful to ensure that the weights of both adapters are known to the optimizer. Otherwise, only one adapter will receive updates.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, get_peft_model | |
| model_id = ... | |
| base_model = AutoModelForCausalLM.from_pretrained(model_id) | |
| lora_config_0 = LoraConfig(...) | |
| lora_config_1 = LoraConfig(...) | |
| model = get_peft_model(base_model, lora_config_0) | |
| model.add_adapter(adapter_name=<span class="hljs-string">"other"</span>, peft_config=lora_config_1)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1p5ltyy">If we would now call:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Trainer | |
| trainer = Trainer(model=model, ...) | |
| trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-93sdsf">or</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->optimizer = torch.optim.AdamW([param <span class="hljs-keyword">for</span> param <span class="hljs-keyword">in</span> model.parameters() <span class="hljs-keyword">if</span> param.requires_grad], ...)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-176115b">then the second LoRA adapter (<code>"other"</code>) would not be trained. This is because it is inactive at this moment, which means the <code>requires_grad</code> attribute on its parameters is set to <code>False</code> and the optimizer will ignore it. Therefore, make sure to activate all adapters that should be trained <em>before</em> initializing the optimizer:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># activate all adapters</span> | |
| model.base_model.set_adapter([<span class="hljs-string">"default"</span>, <span class="hljs-string">"other"</span>]) | |
| trainer = Trainer(model=model, ...) | |
| trainer.train()<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-awfkct"><p>This section deals with using multiple adapters <em>of the same type</em> on the same model, for example, using multiple LoRA adapters at the same time. It does not apply to using <em>different types</em> of adapters on the same model, for example one LoRA adapter and one LoHa adapter. For this, please check <a href="https://huggingface.co/docs/peft/developer_guides/mixed_models" rel="nofollow"><code>PeftMixedModel</code></a>.</p></blockquote> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/peft/blob/main/docs/source/developer_guides/troubleshooting.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_k1ait3 = { | |
| assets: "/docs/peft/pr_3205/en", | |
| base: "/docs/peft/pr_3205/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/peft/pr_3205/en/_app/immutable/entry/start.6e4f7154.js"), | |
| import("/docs/peft/pr_3205/en/_app/immutable/entry/app.968b774b.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 17], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 103 kB
- Xet hash:
- 12308e48a28f1d24f347958316244ecd3bbb2f809e06bd4c29f46d4aab2ec441
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.