Buckets:

download
raw
56.6 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Custom models&quot;,&quot;local&quot;:&quot;custom-models&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Multilayer perceptron&quot;,&quot;local&quot;:&quot;multilayer-perceptron&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;timm models&quot;,&quot;local&quot;:&quot;timm-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;New transformers architectures&quot;,&quot;local&quot;:&quot;new-transformers-architectures&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Verify parameters and layers&quot;,&quot;local&quot;:&quot;verify-parameters-and-layers&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Unsupported module types&quot;,&quot;local&quot;:&quot;unsupported-module-types&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Experimental support for dynamic dispatch of custom modules in LoRA&quot;,&quot;local&quot;:&quot;experimental-support-for-dynamic-dispatch-of-custom-modules-in-lora&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/peft/pr_3206/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/entry/start.06e1cdbd.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/scheduler.78382b47.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/singletons.25ed789f.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/index.fadd215c.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/paths.d98268af.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/entry/app.cb2a6689.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/preload-helper.1df7c689.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/index.6dd35eb6.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/nodes/0.fa4b1245.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/nodes/10.b04f3785.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.db10b59f.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/CodeBlock.e9241c92.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Custom models&quot;,&quot;local&quot;:&quot;custom-models&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Multilayer perceptron&quot;,&quot;local&quot;:&quot;multilayer-perceptron&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;timm models&quot;,&quot;local&quot;:&quot;timm-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;New transformers architectures&quot;,&quot;local&quot;:&quot;new-transformers-architectures&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Verify parameters and layers&quot;,&quot;local&quot;:&quot;verify-parameters-and-layers&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Unsupported module types&quot;,&quot;local&quot;:&quot;unsupported-module-types&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Experimental support for dynamic dispatch of custom modules in LoRA&quot;,&quot;local&quot;:&quot;experimental-support-for-dynamic-dispatch-of-custom-modules-in-lora&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="custom-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#custom-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Custom models</span></h1> <p data-svelte-h="svelte-1twj2ga">Some fine-tuning techniques, such as prompt tuning, are specific to language models. That means in 🤗 PEFT, it is
assumed a 🤗 Transformers model is being used. However, other fine-tuning techniques - like
<a href="../conceptual_guides/lora">LoRA</a> - are not restricted to specific model types.</p> <p data-svelte-h="svelte-14u0q1">In this guide, we will see how LoRA can be applied to a multilayer perceptron, a computer vision model from the <a href="https://huggingface.co/docs/timm/index" rel="nofollow">timm</a> library, or a new 🤗 Transformers architecture.</p> <h2 class="relative group"><a id="multilayer-perceptron" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#multilayer-perceptron"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Multilayer perceptron</span></h2> <p data-svelte-h="svelte-1vzm18b">Let’s assume that we want to fine-tune a multilayer perceptron with LoRA. Here is the definition:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch <span class="hljs-keyword">import</span> nn
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MLP</span>(nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, num_units_hidden=<span class="hljs-number">2000</span></span>):
<span class="hljs-built_in">super</span>().__init__()
self.seq = nn.Sequential(
nn.Linear(<span class="hljs-number">20</span>, num_units_hidden),
nn.ReLU(),
nn.Linear(num_units_hidden, num_units_hidden),
nn.ReLU(),
nn.Linear(num_units_hidden, <span class="hljs-number">2</span>),
nn.LogSoftmax(dim=-<span class="hljs-number">1</span>),
)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, X</span>):
<span class="hljs-keyword">return</span> self.seq(X)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ifflo">This is a straightforward multilayer perceptron with an input layer, a hidden layer, and an output layer.</p> <blockquote class="tip" data-svelte-h="svelte-z1rh00"><p>For this toy example, we choose an exceedingly large number of hidden units to highlight the efficiency gains
from PEFT, but those gains are in line with more realistic examples.</p></blockquote> <p data-svelte-h="svelte-19hnh19">There are a few linear layers in this model that could be tuned with LoRA. When working with common 🤗 Transformers
models, PEFT will know which layers to apply LoRA to, but in this case, it is up to us as a user to choose the layers.
To determine the names of the layers to tune:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">print</span>([(n, <span class="hljs-built_in">type</span>(m)) <span class="hljs-keyword">for</span> n, m <span class="hljs-keyword">in</span> MLP().named_modules()])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-47m0b6">This should print:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->[(&#x27;&#x27;, __main__.MLP),
(<span class="hljs-symbol">&#x27;seq</span>&#x27;, torch.nn.modules.container.Sequential),
(<span class="hljs-symbol">&#x27;seq.0</span>&#x27;, torch.nn.modules.linear.Linear),
(<span class="hljs-symbol">&#x27;seq.1</span>&#x27;, torch.nn.modules.activation.ReLU),
(<span class="hljs-symbol">&#x27;seq.2</span>&#x27;, torch.nn.modules.linear.Linear),
(<span class="hljs-symbol">&#x27;seq.3</span>&#x27;, torch.nn.modules.activation.ReLU),
(<span class="hljs-symbol">&#x27;seq.4</span>&#x27;, torch.nn.modules.linear.Linear),
(<span class="hljs-symbol">&#x27;seq.5</span>&#x27;, torch.nn.modules.activation.LogSoftmax)]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hvbljg">Let’s say we want to apply LoRA to the input layer and to the hidden layer, those are <code>&#39;seq.0&#39;</code> and <code>&#39;seq.2&#39;</code>. Moreover,
let’s assume we want to update the output layer without LoRA, that would be <code>&#39;seq.4&#39;</code>. The corresponding config would
be:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig
config = LoraConfig(
target_modules=[<span class="hljs-string">&quot;seq.0&quot;</span>, <span class="hljs-string">&quot;seq.2&quot;</span>],
modules_to_save=[<span class="hljs-string">&quot;seq.4&quot;</span>],
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12m4713">With that, we can create our PEFT model and check the fraction of parameters trained:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> get_peft_model
model = MLP()
peft_model = get_peft_model(model, config)
peft_model.print_trainable_parameters()
<span class="hljs-comment"># prints trainable params: 56,164 || all params: 4,100,164 || trainable%: 1.369798866581922</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3iyeuu">Finally, we can use any training framework we like, or write our own fit loop, to train the <code>peft_model</code>.</p> <p data-svelte-h="svelte-12a8po">For a complete example, check out <a href="https://github.com/huggingface/peft/blob/main/examples/multilayer_perceptron/multilayer_perceptron_lora.ipynb" rel="nofollow">this notebook</a>.</p> <h2 class="relative group"><a id="timm-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#timm-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>timm models</span></h2> <p data-svelte-h="svelte-1el2qhw">The <a href="https://huggingface.co/docs/timm/index" rel="nofollow">timm</a> library contains a large number of pretrained computer vision models.
Those can also be fine-tuned with PEFT. Let’s check out how this works in practice.</p> <p data-svelte-h="svelte-132k8n8">To start, ensure that timm is installed in the Python environment:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m pip install -U timm<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1i7knza">Next we load a timm model for an image classification task:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> timm
num_classes = ...
model_id = <span class="hljs-string">&quot;timm/poolformer_m36.sail_in1k&quot;</span>
model = timm.create_model(model_id, pretrained=<span class="hljs-literal">True</span>, num_classes=num_classes)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-66d7qt">Again, we need to make a decision about what layers to apply LoRA to. Since LoRA supports 2D conv layers, and since
those are a major building block of this model, we should apply LoRA to the 2D conv layers. To identify the names of
those layers, let’s look at all the layer names:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">print</span>([(n, <span class="hljs-built_in">type</span>(m)) <span class="hljs-keyword">for</span> n, m <span class="hljs-keyword">in</span> model.named_modules()])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-132x2pj">This will print a very long list, we’ll only show the first few:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->[(<span class="hljs-string">&#x27;&#x27;</span>, timm.models.metaformer.MetaFormer),
(<span class="hljs-string">&#x27;stem&#x27;</span>, timm.models.metaformer.Stem),
(<span class="hljs-string">&#x27;stem.conv&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.conv.Conv2d),
(<span class="hljs-string">&#x27;stem.norm&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;stages&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.container.Sequential),
(<span class="hljs-string">&#x27;stages.0&#x27;</span>, timm.models.metaformer.MetaFormerStage),
(<span class="hljs-string">&#x27;stages.0.downsample&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;stages.0.blocks&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.container.Sequential),
(<span class="hljs-string">&#x27;stages.0.blocks.0&#x27;</span>, timm.models.metaformer.MetaFormerBlock),
(<span class="hljs-string">&#x27;stages.0.blocks.0.norm1&#x27;</span>, timm.layers.<span class="hljs-keyword">norm</span>.GroupNorm1),
(<span class="hljs-string">&#x27;stages.0.blocks.0.token_mixer&#x27;</span>, timm.models.metaformer.Pooling),
(<span class="hljs-string">&#x27;stages.0.blocks.0.token_mixer.pool&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.pooling.AvgPool2d),
(<span class="hljs-string">&#x27;stages.0.blocks.0.drop_path1&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;stages.0.blocks.0.layer_scale1&#x27;</span>, timm.models.metaformer.Scale),
(<span class="hljs-string">&#x27;stages.0.blocks.0.res_scale1&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;stages.0.blocks.0.norm2&#x27;</span>, timm.layers.<span class="hljs-keyword">norm</span>.GroupNorm1),
(<span class="hljs-string">&#x27;stages.0.blocks.0.mlp&#x27;</span>, timm.layers.mlp.Mlp),
(<span class="hljs-string">&#x27;stages.0.blocks.0.mlp.fc1&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.conv.Conv2d),
(<span class="hljs-string">&#x27;stages.0.blocks.0.mlp.act&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.activation.GELU),
(<span class="hljs-string">&#x27;stages.0.blocks.0.mlp.drop1&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.dropout.Dropout),
(<span class="hljs-string">&#x27;stages.0.blocks.0.mlp.norm&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;stages.0.blocks.0.mlp.fc2&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.conv.Conv2d),
(<span class="hljs-string">&#x27;stages.0.blocks.0.mlp.drop2&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.dropout.Dropout),
(<span class="hljs-string">&#x27;stages.0.blocks.0.drop_path2&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;stages.0.blocks.0.layer_scale2&#x27;</span>, timm.models.metaformer.Scale),
(<span class="hljs-string">&#x27;stages.0.blocks.0.res_scale2&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;stages.0.blocks.1&#x27;</span>, timm.models.metaformer.MetaFormerBlock),
(<span class="hljs-string">&#x27;stages.0.blocks.1.norm1&#x27;</span>, timm.layers.<span class="hljs-keyword">norm</span>.GroupNorm1),
(<span class="hljs-string">&#x27;stages.0.blocks.1.token_mixer&#x27;</span>, timm.models.metaformer.Pooling),
(<span class="hljs-string">&#x27;stages.0.blocks.1.token_mixer.pool&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.pooling.AvgPool2d),
...
(<span class="hljs-string">&#x27;head.global_pool.flatten&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;head.norm&#x27;</span>, timm.layers.<span class="hljs-keyword">norm</span>.LayerNorm2d),
(<span class="hljs-string">&#x27;head.flatten&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.flatten.Flatten),
(<span class="hljs-string">&#x27;head.drop&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Identity),
(<span class="hljs-string">&#x27;head.fc&#x27;</span>, torch.<span class="hljs-keyword">nn</span>.modules.linear.Linear)]
]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-odd6w6">Upon closer inspection, we see that the 2D conv layers have names such as <code>&quot;stages.0.blocks.0.mlp.fc1&quot;</code> and
<code>&quot;stages.0.blocks.0.mlp.fc2&quot;</code>. How can we match those layer names specifically? You can write a <a href="https://docs.python.org/3/library/re.html" rel="nofollow">regular
expressions</a> to match the layer names. For our case, the regex
<code>r&quot;.*\.mlp\.fc\d&quot;</code> should do the job.</p> <p data-svelte-h="svelte-vtvaop">Furthermore, as in the first example, we should ensure that the output layer, in this case the classification head, is
also updated. Looking at the end of the list printed above, we can see that it’s named <code>&#39;head.fc&#39;</code>. With that in mind,
here is our LoRA config:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->config = LoraConfig(target_modules=<span class="hljs-string">r&quot;.*\.mlp\.fc\d&quot;</span>, modules_to_save=[<span class="hljs-string">&quot;head.fc&quot;</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ym7j5n">Then we only need to create the PEFT model by passing our base model and the config to <code>get_peft_model</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->peft_model = get_peft_model(model, config)
peft_model.print_trainable_parameters()
<span class="hljs-comment"># prints trainable params: 1,064,454 || all params: 56,467,974 || trainable%: 1.88505789139876</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1a6hk12">This shows us that we only need to train less than 2% of all parameters, which is a huge efficiency gain.</p> <p data-svelte-h="svelte-13ceymq">For a complete example, check out <a href="https://github.com/huggingface/peft/blob/main/examples/image_classification/image_classification_timm_peft_lora.ipynb" rel="nofollow">this notebook</a>.</p> <h2 class="relative group"><a id="new-transformers-architectures" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-transformers-architectures"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>New transformers architectures</span></h2> <p data-svelte-h="svelte-zh7y7t">When new popular transformers architectures are released, we do our best to quickly add them to PEFT. If you come across a transformers model that is not supported out of the box, don’t worry, it will most likely still work if the config is set correctly. Specifically, you have to identify the layers that should be adapted and set them correctly when initializing the corresponding config class, e.g. <code>LoraConfig</code>. Here are some tips to help with this.</p> <p data-svelte-h="svelte-1hpkhh2">As a first step, it is a good idea to check the existing models for inspiration. You can find them inside of <a href="https://github.com/huggingface/peft/blob/main/src/peft/utils/constants.py" rel="nofollow">constants.py</a> in the PEFT repository. Often, you’ll find a similar architecture that uses the same names. For example, if the new model architecture is a variation of the “mistral” model and you want to apply LoRA, you can see that the entry for “mistral” in <code>TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING</code> contains <code>[&quot;q_proj&quot;, &quot;v_proj&quot;]</code>. This tells you that for “mistral” models, the <code>target_modules</code> for LoRA should be <code>[&quot;q_proj&quot;, &quot;v_proj&quot;]</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, get_peft_model
my_mistral_model = ...
config = LoraConfig(
target_modules=[<span class="hljs-string">&quot;q_proj&quot;</span>, <span class="hljs-string">&quot;v_proj&quot;</span>],
..., <span class="hljs-comment"># other LoRA arguments</span>
)
peft_model = get_peft_model(my_mistral_model, config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-tkcysf">If that doesn’t help, check the existing modules in your model architecture with the <code>named_modules</code> method and try to identify the attention layers, especially the key, query, and value layers. Those will often have names such as <code>c_attn</code>, <code>query</code>, <code>q_proj</code>, etc. The key layer is not always adapted, and ideally, you should check whether including it results in better performance.</p> <p data-svelte-h="svelte-1x641jx">Additionally, linear layers are common targets to be adapted (e.g. in <a href="https://huggingface.co/papers/2305.14314" rel="nofollow">QLoRA paper</a>, authors suggest to adapt them as well). Their names will often contain the strings <code>fc</code> or <code>dense</code>.</p> <p data-svelte-h="svelte-485c9z">If you want to add a new model to PEFT, please create an entry in <a href="https://github.com/huggingface/peft/blob/main/src/peft/utils/constants.py" rel="nofollow">constants.py</a> and open a pull request on the <a href="https://github.com/huggingface/peft/pulls" rel="nofollow">repository</a>. Don’t forget to update the <a href="https://github.com/huggingface/peft#models-support-matrix" rel="nofollow">README</a> as well.</p> <h2 class="relative group"><a id="verify-parameters-and-layers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#verify-parameters-and-layers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Verify parameters and layers</span></h2> <p data-svelte-h="svelte-g4uxja">You can verify whether you’ve correctly applied a PEFT method to your model in a few ways.</p> <ul data-svelte-h="svelte-1eynrjm"><li>Check the fraction of parameters that are trainable with the <a href="/docs/peft/pr_3206/en/package_reference/peft_model#peft.PeftModel.print_trainable_parameters">print_trainable_parameters()</a> method. If this number is lower or higher than expected, check the model <code>repr</code> by printing the model. This shows the names of all the layer types in the model. Ensure that only the intended target layers are replaced by the adapter layers. For example, if LoRA is applied to <code>nn.Linear</code> layers, then you should only see <code>lora.Linear</code> layers being used.</li></ul> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->peft_model.print_trainable_parameters()<!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-1lks9wd"><li>Another way you can view the adapted layers is to use the <code>targeted_module_names</code> attribute to list the name of each module that was adapted.</li></ul> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">print</span>(peft_model.targeted_module_names)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="unsupported-module-types" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unsupported-module-types"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Unsupported module types</span></h2> <p data-svelte-h="svelte-1nivrxo">Methods like LoRA only work if the target modules are supported by PEFT. For example, it’s possible to apply LoRA to <code>nn.Linear</code> and <code>nn.Conv2d</code> layers, but not, for instance, to <code>nn.LSTM</code>. If you find a layer class you want to apply PEFT to is not supported, you can:</p> <ul data-svelte-h="svelte-1pyzk0w"><li>define a custom mapping to dynamically dispatch custom modules in LoRA</li> <li>open an <a href="https://github.com/huggingface/peft/issues" rel="nofollow">issue</a> and request the feature where maintainers will implement it or guide you on how to implement it yourself if demand for this module type is sufficiently high</li></ul> <h3 class="relative group"><a id="experimental-support-for-dynamic-dispatch-of-custom-modules-in-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#experimental-support-for-dynamic-dispatch-of-custom-modules-in-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Experimental support for dynamic dispatch of custom modules in LoRA</span></h3> <blockquote class="warning" data-svelte-h="svelte-1vrnyaz"><p>This feature is experimental and subject to change, depending on its reception by the community. We will introduce a public and stable API if there is significant demand for it.</p></blockquote> <p data-svelte-h="svelte-1ve93t1">PEFT supports an experimental API for custom module types for LoRA. Let’s assume you have a LoRA implementation for LSTMs. Normally, you would not be able to tell PEFT to use it, even if it would theoretically work with PEFT. However, this is possible with dynamic dispatch of custom layers.</p> <p data-svelte-h="svelte-hzxzie">The experimental API currently looks like this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">class</span> <span class="hljs-title class_">MyLoraLSTMLayer</span>:
...
base_model = ... <span class="hljs-comment"># load the base model that uses LSTMs</span>
<span class="hljs-comment"># add the LSTM layer names to target_modules</span>
config = LoraConfig(..., target_modules=[<span class="hljs-string">&quot;lstm&quot;</span>])
<span class="hljs-comment"># define a mapping from base layer type to LoRA layer type</span>
custom_module_mapping = {nn.LSTM: MyLoraLSTMLayer}
<span class="hljs-comment"># register the new mapping</span>
config._register_custom_module(custom_module_mapping)
<span class="hljs-comment"># after registration, create the PEFT model</span>
peft_model = get_peft_model(base_model, config)
<span class="hljs-comment"># do training</span><!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-1t64b01"><p>When you call <a href="/docs/peft/pr_3206/en/package_reference/peft_model#peft.get_peft_model">get_peft_model()</a>, you will see a warning because PEFT does not recognize the targeted module type. In this case, you can ignore this warning.</p></blockquote> <p data-svelte-h="svelte-1rh0r2d">By supplying a custom mapping, PEFT first checks the base model’s layers against the custom mapping and dispatches to the custom LoRA layer type if there is a match. If there is no match, PEFT checks the built-in LoRA layer types for a match.</p> <p data-svelte-h="svelte-1p8m775">Therefore, this feature can also be used to override existing dispatch logic, e.g. if you want to use your own LoRA layer for <code>nn.Linear</code> instead of using the one provided by PEFT.</p> <p data-svelte-h="svelte-qp8f4l">When creating your custom LoRA module, please follow the same rules as the <a href="https://github.com/huggingface/peft/blob/main/src/peft/tuners/lora/layer.py" rel="nofollow">existing LoRA modules</a>. Some important constraints to consider:</p> <ul data-svelte-h="svelte-c8ydbv"><li>The custom module should inherit from <code>nn.Module</code> and <code>peft.tuners.lora.layer.LoraLayer</code>.</li> <li>The <code>__init__</code> method of the custom module should have the positional arguments <code>base_layer</code> and <code>adapter_name</code>. After this, there are additional <code>**kwargs</code> that you are free to use or ignore.</li> <li>The learnable parameters should be stored in an <code>nn.ModuleDict</code> or <code>nn.ParameterDict</code>, where the key corresponds to the name of the specific adapter (remember that a model can have more than one adapter at a time).</li> <li>The name of these learnable parameter attributes should start with <code>&quot;lora_&quot;</code>, e.g. <code>self.lora_new_param = ...</code>.</li> <li>Some methods are optional, e.g. you only need to implement <code>merge</code> and <code>unmerge</code> if you want to support weight merging.</li></ul> <p data-svelte-h="svelte-1bmvvdg">Currently, the information about the custom module does not persist when you save the model. When loading the model, you have to register the custom modules again.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># saving works as always and includes the parameters of the custom modules</span>
peft_model.save_pretrained(&lt;model-path&gt;)
<span class="hljs-comment"># loading the model later:</span>
base_model = ...
<span class="hljs-comment"># load the LoRA config that you saved earlier</span>
config = LoraConfig.from_pretrained(&lt;model-path&gt;)
<span class="hljs-comment"># register the custom module again, the same way as the first time</span>
custom_module_mapping = {nn.LSTM: MyLoraLSTMLayer}
config._register_custom_module(custom_module_mapping)
<span class="hljs-comment"># pass the config instance to from_pretrained:</span>
peft_model = PeftModel.from_pretrained(model, tmp_path / <span class="hljs-string">&quot;lora-custom-module&quot;</span>, config=config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-n2s6f5">If you use this feature and find it useful, or if you encounter problems, let us know by creating an issue or a discussion on GitHub. This allows us to estimate the demand for this feature and add a public API if it is sufficiently high.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/peft/blob/main/docs/source/developer_guides/custom_models.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1pyekj7 = {
assets: "/docs/peft/pr_3206/en",
base: "/docs/peft/pr_3206/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/peft/pr_3206/en/_app/immutable/entry/start.06e1cdbd.js"),
import("/docs/peft/pr_3206/en/_app/immutable/entry/app.cb2a6689.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 10],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
56.6 kB
·
Xet hash:
5fd6e1bc63a68ea2e8d34feed0afb592392093e9cf266d1adbc818768fa124a9

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.