Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / peft /main /en /developer_guides /lora.html

rtrm

3 months ago

download

raw

84.4 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"LoRA","local":"lora","sections":[{"title":"Initialization","local":"initialization","sections":[{"title":"PiSSA","local":"pissa","sections":[],"depth":3},{"title":"OLoRA","local":"olora","sections":[],"depth":3},{"title":"LoftQ","local":"loftq","sections":[{"title":"Standard approach","local":"standard-approach","sections":[],"depth":4},{"title":"A more convenient way","local":"a-more-convenient-way","sections":[],"depth":4}],"depth":3},{"title":"Rank-stabilized LoRA","local":"rank-stabilized-lora","sections":[],"depth":3},{"title":"Weight-Decomposed Low-Rank Adaptation (DoRA)","local":"weight-decomposed-low-rank-adaptation-dora","sections":[{"title":"Caveats","local":"caveats","sections":[],"depth":4}],"depth":3},{"title":"QLoRA-style training","local":"qlora-style-training","sections":[],"depth":3},{"title":"Memory efficient Layer Replication with LoRA","local":"memory-efficient-layer-replication-with-lora","sections":[],"depth":3}],"depth":2},{"title":"Optimizers","local":"optimizers","sections":[{"title":"LoRA+ optimized LoRA","local":"lora-optimized-lora","sections":[],"depth":3}],"depth":2},{"title":"Merge LoRA weights into the base model","local":"merge-lora-weights-into-the-base-model","sections":[],"depth":2},{"title":"Load adapters","local":"load-adapters","sections":[],"depth":2},{"title":"Inference with different LoRA adapters in the same batch","local":"inference-with-different-lora-adapters-in-the-same-batch","sections":[{"title":"Caveats","local":"caveats","sections":[],"depth":3}],"depth":2}],"depth":1}">
	<link href="/docs/peft/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/entry/start.b1913684.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/scheduler.c57aa7ef.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/singletons.6b6c8b9d.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/index.c4ac7298.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/paths.62c814b0.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/entry/app.ca8eed50.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/index.c50cb18e.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/nodes/0.f0f7697b.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/nodes/11.98775eb0.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/Tip.9268b0ca.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/CodeBlock.34f0a53d.js">
	<link rel="modulepreload" href="/docs/peft/main/en/_app/immutable/chunks/EditOnGithub.958a8a49.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"LoRA","local":"lora","sections":[{"title":"Initialization","local":"initialization","sections":[{"title":"PiSSA","local":"pissa","sections":[],"depth":3},{"title":"OLoRA","local":"olora","sections":[],"depth":3},{"title":"LoftQ","local":"loftq","sections":[{"title":"Standard approach","local":"standard-approach","sections":[],"depth":4},{"title":"A more convenient way","local":"a-more-convenient-way","sections":[],"depth":4}],"depth":3},{"title":"Rank-stabilized LoRA","local":"rank-stabilized-lora","sections":[],"depth":3},{"title":"Weight-Decomposed Low-Rank Adaptation (DoRA)","local":"weight-decomposed-low-rank-adaptation-dora","sections":[{"title":"Caveats","local":"caveats","sections":[],"depth":4}],"depth":3},{"title":"QLoRA-style training","local":"qlora-style-training","sections":[],"depth":3},{"title":"Memory efficient Layer Replication with LoRA","local":"memory-efficient-layer-replication-with-lora","sections":[],"depth":3}],"depth":2},{"title":"Optimizers","local":"optimizers","sections":[{"title":"LoRA+ optimized LoRA","local":"lora-optimized-lora","sections":[],"depth":3}],"depth":2},{"title":"Merge LoRA weights into the base model","local":"merge-lora-weights-into-the-base-model","sections":[],"depth":2},{"title":"Load adapters","local":"load-adapters","sections":[],"depth":2},{"title":"Inference with different LoRA adapters in the same batch","local":"inference-with-different-lora-adapters-in-the-same-batch","sections":[{"title":"Caveats","local":"caveats","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoRA</span></h1> <p data-svelte-h="svelte-38uxl6">LoRA is low-rank decomposition method to reduce the number of trainable parameters which speeds up finetuning large models and uses less memory. In PEFT, using LoRA is as easy as setting up a <a href="/docs/peft/main/en/package_reference/lora#peft.LoraConfig">LoraConfig</a> and wrapping it with <a href="/docs/peft/main/en/package_reference/peft_model#peft.get_peft_model">get_peft_model()</a> to create a trainable <a href="/docs/peft/main/en/package_reference/peft_model#peft.PeftModel">PeftModel</a>.</p> <p data-svelte-h="svelte-ovsn32">This guide explores in more detail other options and features for using LoRA.</p> <h2 class="relative group"><a id="initialization" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#initialization"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Initialization</span></h2> <p data-svelte-h="svelte-j883mf">The initialization of LoRA weights is controlled by the parameter <code>init_lora_weights</code> in <a href="/docs/peft/main/en/package_reference/lora#peft.LoraConfig">LoraConfig</a>. By default, PEFT initializes LoRA weights with Kaiming-uniform for weight A and zeros for weight B resulting in an identity transform (same as the reference <a href="https://github.com/microsoft/LoRA" rel="nofollow">implementation</a>).</p> <p data-svelte-h="svelte-1fujb7a">It is also possible to pass <code>init_lora_weights="gaussian"</code>. As the name suggests, this initializes weight A with a Gaussian distribution and zeros for weight B (this is how <a href="https://huggingface.co/docs/diffusers/index" rel="nofollow">Diffusers</a> initializes LoRA weights).</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig

	config = LoraConfig(init_lora_weights=<span class="hljs-string">"gaussian"</span>, ...)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-mwvui1">There is also an option to set <code>init_lora_weights=False</code> which is useful for debugging and testing. This should be the only time you use this option. When choosing this option, the LoRA weights are initialized such that they do <em>not</em> result in an identity transform.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig

	config = LoraConfig(init_lora_weights=<span class="hljs-literal">False</span>, ...)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="pissa" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pissa"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PiSSA</span></h3> <p data-svelte-h="svelte-1skluc"><a href="https://arxiv.org/abs/2404.02948" rel="nofollow">PiSSA</a> initializes the LoRA adapter using the principal singular values and singular vectors. This straightforward modification allows PiSSA to converge more rapidly than LoRA and ultimately attain superior performance. Moreover, PiSSA reduces the quantization error compared to QLoRA, leading to further enhancements.</p> <p data-svelte-h="svelte-1hy0lgx">Configure the initialization method to “pissa”, which may take several minutes to execute SVD on the pre-trained model:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig
	config = LoraConfig(init_lora_weights=<span class="hljs-string">"pissa"</span>, ...)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-auzrbb">Alternatively, execute fast SVD, which takes only a few seconds. The number of iterations determines the trade-off between the error and computation time:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->lora_config = LoraConfig(init_lora_weights=<span class="hljs-string">"pissa_niter_[number of iters]"</span>, ...) <!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-u5znzv">For detailed instruction on using PiSSA, please follow <a href="https://github.com/fxmeng/peft/tree/main/examples/pissa_finetuning" rel="nofollow">these instructions</a>.</p> <h3 class="relative group"><a id="olora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#olora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>OLoRA</span></h3> <p data-svelte-h="svelte-17kc0j8"><a href="https://arxiv.org/abs/2406.01775" rel="nofollow">OLoRA</a> utilizes QR decomposition to initialize the LoRA adapters. OLoRA translates the base weights of the model by a factor of their QR decompositions, i.e., it mutates the weights before performing any training on them. This approach significantly improves stability, accelerates convergence speed, and ultimately achieves superior performance.</p> <p data-svelte-h="svelte-1j9g8qn">You just need to pass a single additional option to use OLoRA:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig
	config = LoraConfig(init_lora_weights=<span class="hljs-string">"olora"</span>, ...)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-sulxp4">For more advanced usage, please refer to our <a href="https://github.com/huggingface/peft/tree/main/examples/olora_finetuning" rel="nofollow">documentation</a>.</p> <h3 class="relative group"><a id="loftq" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#loftq"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoftQ</span></h3> <h4 class="relative group"><a id="standard-approach" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#standard-approach"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Standard approach</span></h4> <p data-svelte-h="svelte-1ts8vpq">When quantizing the base model for QLoRA training, consider using the <a href="https://arxiv.org/abs/2310.08659" rel="nofollow">LoftQ initialization</a>, which has been shown to improve performance when training quantized models. The idea is that the LoRA weights are initialized such that the quantization error is minimized. To use LoftQ, follow <a href="https://github.com/huggingface/peft/tree/main/examples/loftq_finetuning" rel="nofollow">these instructions</a>.</p> <p data-svelte-h="svelte-7f0aai">In general, for LoftQ to work best, it is recommended to target as many layers with LoRA as possible, since those not targeted cannot have LoftQ applied. This means that passing <code>LoraConfig(..., target_modules="all-linear")</code> will most likely give the best results. Also, you should use <code>nf4</code> as quant type in your quantization config when using 4bit quantization, i.e. <code>BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4")</code>.</p> <h4 class="relative group"><a id="a-more-convenient-way" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a-more-convenient-way"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>A more convenient way</span></h4> <p data-svelte-h="svelte-14vtcq9">An easier but more limited way to apply LoftQ initialization is to use the convenience function <code>replace_lora_weights_loftq</code>. This takes the quantized PEFT model as input and replaces the LoRA weights in-place with their LoftQ-initialized counterparts.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> replace_lora_weights_loftq
	<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig

	bnb_config = BitsAndBytesConfig(load_in_4bit=<span class="hljs-literal">True</span>, ...)
	base_model = AutoModelForCausalLM.from_pretrained(..., quantization_config=bnb_config)
	<span class="hljs-comment"># note: don't pass init_lora_weights="loftq" or loftq_config!</span>
	lora_config = LoraConfig(task_type=<span class="hljs-string">"CAUSAL_LM"</span>)
	peft_model = get_peft_model(base_model, lora_config)
	replace_lora_weights_loftq(peft_model)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1nywyxh"><code>replace_lora_weights_loftq</code> also allows you to pass a <code>callback</code> argument to give you more control over which layers should be modified or not, which empirically can improve the results quite a lot. To see a more elaborate example of this, check out <a href="https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/LoftQ_weight_replacement.ipynb" rel="nofollow">this notebook</a>.</p> <p data-svelte-h="svelte-1d2jix1"><code>replace_lora_weights_loftq</code> implements only one iteration step of LoftQ. This means that only the LoRA weights are updated, instead of iteratevily updating LoRA weights and quantized base model weights. This may lead to lower performance but has the advantage that we can use the original quantized weights derived from the base model, instead of having to keep an extra copy of modified quantized weights. Whether this tradeoff is worthwhile depends on the use case.</p> <p data-svelte-h="svelte-yl8kok">At the moment, <code>replace_lora_weights_loftq</code> has these additional limitations:</p> <ul data-svelte-h="svelte-12tl4fy"><li>Model files must be stored as a <code>safetensors</code> file.</li> <li>Only bitsandbytes 4bit quantization is supported.</li></ul> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1nbpolx">Learn more about how PEFT works with quantization in the <a href="quantization">Quantization</a> guide.</p></div> <h3 class="relative group"><a id="rank-stabilized-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#rank-stabilized-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Rank-stabilized LoRA</span></h3> <p data-svelte-h="svelte-omuw13">Another way to initialize <a href="/docs/peft/main/en/package_reference/lora#peft.LoraConfig">LoraConfig</a> is with the <a href="https://huggingface.co/papers/2312.03732" rel="nofollow">rank-stabilized LoRA (rsLoRA)</a> method. The LoRA architecture scales each adapter during every forward pass by a fixed scalar which is set at initialization and depends on the rank <code>r</code>. The scalar is given by <code>lora_alpha/r</code> in the original implementation, but rsLoRA uses <code>lora_alpha/math.sqrt(r)</code> which stabilizes the adapters and increases the performance potential from using a higher <code>r</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig

	config = LoraConfig(use_rslora=<span class="hljs-literal">True</span>, ...)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="weight-decomposed-low-rank-adaptation-dora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#weight-decomposed-low-rank-adaptation-dora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Weight-Decomposed Low-Rank Adaptation (DoRA)</span></h3> <p data-svelte-h="svelte-1ukfkwt">This technique decomposes the updates of the weights into two parts, magnitude and direction. Direction is handled by normal LoRA, whereas the magnitude is handled by a separate learnable parameter. This can improve the performance of LoRA, especially at low ranks. For more information on DoRA, see <a href="https://arxiv.org/abs/2402.09353" rel="nofollow">https://arxiv.org/abs/2402.09353</a>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig

	config = LoraConfig(use_dora=<span class="hljs-literal">True</span>, ...)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-8wzhde">If parts of the model or the DoRA adapter are offloaded to CPU you can get a significant speedup at the cost of some temporary (ephemeral) VRAM overhead by using <code>ephemeral_gpu_offload=True</code> in <code>config.runtime_config</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, LoraRuntimeConfig

	config = LoraConfig(use_dora=<span class="hljs-literal">True</span>, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=<span class="hljs-literal">True</span>), ...)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16lyt50">A <code>PeftModel</code> with a DoRA adapter can also be loaded with <code>ephemeral_gpu_offload=True</code> flag using the <code>from_pretrained</code> method as well as the <code>load_adapter</code> method.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel

	model = PeftModel.from_pretrained(base_model, peft_model_id, ephemeral_gpu_offload=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="caveats" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#caveats"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Caveats</span></h4> <ul data-svelte-h="svelte-azzgzc"><li>DoRA only supports linear and Conv2d layers at the moment.</li> <li>DoRA introduces a bigger overhead than pure LoRA, so it is recommended to merge weights for inference, see <a href="/docs/peft/main/en/package_reference/lora#peft.LoraModel.merge_and_unload">LoraModel.merge_and_unload()</a>.</li> <li>DoRA should work with weights quantized with bitsandbytes (“QDoRA”). However, issues have been reported when using QDoRA with DeepSpeed Zero2.</li></ul> <h3 class="relative group"><a id="qlora-style-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#qlora-style-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>QLoRA-style training</span></h3> <p data-svelte-h="svelte-15vxux2">The default LoRA settings in PEFT add trainable weights to the query and value layers of each attention block. But <a href="https://hf.co/papers/2305.14314" rel="nofollow">QLoRA</a>, which adds trainable weights to all the linear layers of a transformer model, can provide performance equal to a fully finetuned model. To apply LoRA to all the linear layers, like in QLoRA, set <code>target_modules="all-linear"</code> (easier than specifying individual modules by name which can vary depending on the architecture).</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->config = LoraConfig(target_modules=<span class="hljs-string">"all-linear"</span>, ...)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="memory-efficient-layer-replication-with-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#memory-efficient-layer-replication-with-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Memory efficient Layer Replication with LoRA</span></h3> <p data-svelte-h="svelte-1mosto0">An approach used to improve the performance of models is to expand a model by duplicating layers in the model to build a larger model from a pretrained model of a given size. For example increasing a 7B model to a 10B model as described in the <a href="https://arxiv.org/abs/2312.15166" rel="nofollow">SOLAR</a> paper. PEFT LoRA supports this kind of expansion in a memory efficient manner that supports further fine-tuning using LoRA adapters attached to the layers post replication of the layers. The replicated layers do not take additional memory as they share the underlying weights so the only additional memory required is the memory for the adapter weights. To use this feature you would create a config with the <code>layer_replication</code> argument.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->config = LoraConfig(layer_replication=[[<span class="hljs-number">0</span>,<span class="hljs-number">4</span>], [<span class="hljs-number">2</span>,<span class="hljs-number">5</span>]], ...)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ttqqw0">Assuming the original model had 5 layers <code>[0, 1, 2 ,3, 4]</code>, this would create a model with 7 layers arranged as <code>[0, 1, 2, 3, 2, 3, 4]</code>. This follows the <a href="https://github.com/arcee-ai/mergekit" rel="nofollow">mergekit</a> pass through merge convention where sequences of layers specified as start inclusive and end exclusive tuples are stacked to build the final model. Each layer in the final model gets its own distinct set of LoRA adapters.</p> <p data-svelte-h="svelte-foj0wd"><a href="https://huggingface.co/abacusai/Fewshot-Metamath-OrcaVicuna-Mistral-10B" rel="nofollow">Fewshot-Metamath-OrcaVicuna-Mistral-10B</a> is an example of a model trained using this method on Mistral-7B expanded to 10B. The
	<a href="https://huggingface.co/abacusai/Fewshot-Metamath-OrcaVicuna-Mistral-10B/blob/main/adapter_config.json" rel="nofollow">adapter_config.json</a> shows a sample LoRA adapter config applying this method for fine-tuning.</p> <h2 class="relative group"><a id="optimizers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#optimizers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Optimizers</span></h2> <p data-svelte-h="svelte-rkzqzi">LoRA training can optionally include special purpose optimizers. Currently the only such optimizer is LoRA+.</p> <h3 class="relative group"><a id="lora-optimized-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lora-optimized-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoRA+ optimized LoRA</span></h3> <p data-svelte-h="svelte-9dmo2s">LoRA training can be optimized using <a href="https://arxiv.org/abs/2402.12354" rel="nofollow">LoRA+</a>, which uses different learning rates for the adapter matrices A and B, shown to increase finetuning speed by up to 2x and performance by 1-2%.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, get_peft_model
	<span class="hljs-keyword">from</span> peft.optimizers <span class="hljs-keyword">import</span> create_loraplus_optimizer
	<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Trainer
	<span class="hljs-keyword">import</span> bitsandbytes <span class="hljs-keyword">as</span> bnb

	base_model = ...
	config = LoraConfig(...)
	model = get_peft_model(base_model, config)

	optimizer = create_loraplus_optimizer(
	model=model,
	optimizer_cls=bnb.optim.Adam8bit,
	lr=<span class="hljs-number">5e-5</span>,
	loraplus_lr_ratio=<span class="hljs-number">16</span>,
	)
	scheduler = <span class="hljs-literal">None</span>

	...
	trainer = Trainer(
	...,
	optimizers=(optimizer, scheduler),
	)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="merge-lora-weights-into-the-base-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#merge-lora-weights-into-the-base-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Merge LoRA weights into the base model</span></h2> <p data-svelte-h="svelte-1c3h9aa">While LoRA is significantly smaller and faster to train, you may encounter latency issues during inference due to separately loading the base model and the LoRA adapter. To eliminate latency, use the <a href="/docs/peft/main/en/package_reference/lora#peft.LoraModel.merge_and_unload">merge_and_unload()</a> function to merge the adapter weights with the base model. This allows you to use the newly merged model as a standalone model. The <a href="/docs/peft/main/en/package_reference/lora#peft.LoraModel.merge_and_unload">merge_and_unload()</a> function doesn’t keep the adapter weights in memory.</p> <p data-svelte-h="svelte-1wwmf5r">Below is a diagram that explains the intuition of LoRA adapter merging:</p> <div class="flex justify-center" data-svelte-h="svelte-1f6iuw5"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/peft/lora_diagram.png"></div> <p data-svelte-h="svelte-hlzwis">We show in the snippets below how to run that using PEFT.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
	<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel

	base_model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"mistralai/Mistral-7B-v0.1"</span>)
	peft_model_id = <span class="hljs-string">"alignment-handbook/zephyr-7b-sft-lora"</span>
	model = PeftModel.from_pretrained(base_model, peft_model_id)
	model.merge_and_unload()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-m3cr6r">If you need to keep a copy of the weights so you can unmerge the adapter later or delete and load different ones, you should use the <a href="/docs/peft/main/en/package_reference/tuners#peft.tuners.tuners_utils.BaseTuner.merge_adapter">merge_adapter()</a> function instead. Now you have the option to use <a href="/docs/peft/main/en/package_reference/tuners#peft.tuners.tuners_utils.BaseTuner.unmerge_adapter">unmerge_adapter()</a> to return the base model.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
	<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel

	base_model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"mistralai/Mistral-7B-v0.1"</span>)
	peft_model_id = <span class="hljs-string">"alignment-handbook/zephyr-7b-sft-lora"</span>
	model = PeftModel.from_pretrained(base_model, peft_model_id)
	model.merge_adapter()

	<span class="hljs-comment"># unmerge the LoRA layers from the base model</span>
	model.unmerge_adapter()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-kbqhpl">The <a href="/docs/peft/main/en/package_reference/lora#peft.LoraModel.add_weighted_adapter">add_weighted_adapter()</a> function is useful for merging multiple LoRAs into a new adapter based on a user provided weighting scheme in the <code>weights</code> parameter. Below is an end-to-end example.</p> <p data-svelte-h="svelte-1mwdwt1">First load the base model:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
	<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel
	<span class="hljs-keyword">import</span> torch

	base_model = AutoModelForCausalLM.from_pretrained(
	<span class="hljs-string">"mistralai/Mistral-7B-v0.1"</span>, torch_dtype=torch.float16, device_map=<span class="hljs-string">"auto"</span>
	)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-nuhu05">Then we load the first adapter:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->peft_model_id = <span class="hljs-string">"alignment-handbook/zephyr-7b-sft-lora"</span>
	model = PeftModel.from_pretrained(base_model, peft_model_id, adapter_name=<span class="hljs-string">"sft"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1n53hd9">Then load a different adapter and merge it with the first one:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->weighted_adapter_name = <span class="hljs-string">"sft-dpo"</span>
	model.load_adapter(<span class="hljs-string">"alignment-handbook/zephyr-7b-dpo-lora"</span>, adapter_name=<span class="hljs-string">"dpo"</span>)
	model.add_weighted_adapter(
	adapters=[<span class="hljs-string">"sft"</span>, <span class="hljs-string">"dpo"</span>],
	weights=[<span class="hljs-number">0.7</span>, <span class="hljs-number">0.3</span>],
	adapter_name=weighted_adapter_name,
	combination_type=<span class="hljs-string">"linear"</span>
	)
	model.set_adapter(weighted_adapter_name)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-19y7vpj">There are several supported methods for <code>combination_type</code>. Refer to the <a href="../package_reference/lora#peft.LoraModel.add_weighted_adapter">documentation</a> for more details. Note that “svd” as the <code>combination_type</code> is not supported when using <code>torch.float16</code> or <code>torch.bfloat16</code> as the datatype.</p></div> <p data-svelte-h="svelte-qwbdkg">Now, perform inference:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"mistralai/Mistral-7B-v0.1"</span>)

	prompt = <span class="hljs-string">"Hey, are you conscious? Can you talk to me?"</span>
	inputs = tokenizer(prompt, return_tensors=<span class="hljs-string">"pt"</span>)
	inputs = {k: v.to(<span class="hljs-string">"cuda"</span>) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> inputs.items()}

	<span class="hljs-keyword">with</span> torch.no_grad():
	generate_ids = model.generate(**inputs, max_length=<span class="hljs-number">30</span>)
	outputs = tokenizer.batch_decode(generate_ids, skip_special_tokens=<span class="hljs-literal">True</span>, clean_up_tokenization_spaces=<span class="hljs-literal">False</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-built_in">print</span>(outputs)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="load-adapters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-adapters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Load adapters</span></h2> <p data-svelte-h="svelte-ke62q2">Adapters can be loaded onto a pretrained model with <a href="/docs/peft/main/en/package_reference/peft_model#peft.PeftModel.load_adapter">load_adapter()</a>, which is useful for trying out different adapters whose weights aren’t merged. Set the active adapter weights with the <a href="/docs/peft/main/en/package_reference/lora#peft.LoraModel.set_adapter">set_adapter()</a> function.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
	<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel

	base_model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"mistralai/Mistral-7B-v0.1"</span>)
	peft_model_id = <span class="hljs-string">"alignment-handbook/zephyr-7b-sft-lora"</span>
	model = PeftModel.from_pretrained(base_model, peft_model_id)

	<span class="hljs-comment"># load different adapter</span>
	model.load_adapter(<span class="hljs-string">"alignment-handbook/zephyr-7b-dpo-lora"</span>, adapter_name=<span class="hljs-string">"dpo"</span>)

	<span class="hljs-comment"># set adapter as active</span>
	model.set_adapter(<span class="hljs-string">"dpo"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-cvmoj6">To return the base model, you could use <a href="/docs/peft/main/en/package_reference/lora#peft.LoraModel.unload">unload()</a> to unload all of the LoRA modules or <a href="/docs/peft/main/en/package_reference/lora#peft.LoraModel.delete_adapter">delete_adapter()</a> to delete the adapter entirely.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># unload adapter</span>
	model.unload()

	<span class="hljs-comment"># delete adapter</span>
	model.delete_adapter(<span class="hljs-string">"dpo"</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="inference-with-different-lora-adapters-in-the-same-batch" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-with-different-lora-adapters-in-the-same-batch"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference with different LoRA adapters in the same batch</span></h2> <p data-svelte-h="svelte-1fq5tpu">Normally, each inference batch has to use the same adapter(s) in PEFT. This can sometimes be annoying, because we may have batches that contain samples intended to be used with different LoRA adapters. For example, we could have a base model that works well in English and two more LoRA adapters, one for French and one for German. Usually, we would have to split our batches such that each batch only contains samples of one of the languages, we cannot combine different languages in the same batch.</p> <p data-svelte-h="svelte-lcywla">Thankfully, it is possible to mix different LoRA adapters in the same batch using the <code>adapter_name</code> argument. Below, we show an example of how this works in practice. First, let’s load the base model, English, and the two adapters, French and German, like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForCausalLM
	<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel

	model_id = ...
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	model = AutoModelForCausalLM.from_pretrained(model_id)
	<span class="hljs-comment"># load the LoRA adapter for French</span>
	peft_model = PeftModel.from_pretrained(model, <path>, adapter_name=<span class="hljs-string">"adapter_fr"</span>)
	<span class="hljs-comment"># next, load the LoRA adapter for German</span>
	peft_model.load_adapter(<path>, adapter_name=<span class="hljs-string">"adapter_de"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6wkyom">Now, we want to generate text on a sample that contains all three languages: The first three samples are in English, the next three are in French, and the last three are in German. We can use the <code>adapter_names</code> argument to specify which adapter to use for each sample. Since our base model is used for English, we use the special string <code>"__base__"</code> for these samples. For the next three samples, we indicate the adapter name of the French LoRA fine-tune, in this case <code>"adapter_fr"</code>. For the last three samples, we indicate the adapter name of the German LoRA fine-tune, in this case <code>"adapter_de"</code>. This way, we can use the base model and the two adapters in a single batch.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->inputs = tokenizer(
	[
	<span class="hljs-string">"Hello, my dog is cute"</span>,
	<span class="hljs-string">"Hello, my cat is awesome"</span>,
	<span class="hljs-string">"Hello, my fish is great"</span>,
	<span class="hljs-string">"Salut, mon chien est mignon"</span>,
	<span class="hljs-string">"Salut, mon chat est génial"</span>,
	<span class="hljs-string">"Salut, mon poisson est super"</span>,
	<span class="hljs-string">"Hallo, mein Hund ist süß"</span>,
	<span class="hljs-string">"Hallo, meine Katze ist toll"</span>,
	<span class="hljs-string">"Hallo, mein Fisch ist großartig"</span>,
	],
	return_tensors=<span class="hljs-string">"pt"</span>,
	padding=<span class="hljs-literal">True</span>,
	)

	adapter_names = [
	<span class="hljs-string">"__base__"</span>, <span class="hljs-string">"__base__"</span>, <span class="hljs-string">"__base__"</span>,
	<span class="hljs-string">"adapter_fr"</span>, <span class="hljs-string">"adapter_fr"</span>, <span class="hljs-string">"adapter_fr"</span>,
	<span class="hljs-string">"adapter_de"</span>, <span class="hljs-string">"adapter_de"</span>, <span class="hljs-string">"adapter_de"</span>,
	]
	output = peft_model.generate(**inputs, adapter_names=adapter_names, max_new_tokens=<span class="hljs-number">20</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1o40ch6">Note that the order does not matter here, i.e. the samples in the batch don’t need to be grouped by adapter as in the example above. We just need to ensure that the <code>adapter_names</code> argument is aligned correctly with the samples.</p> <h3 class="relative group"><a id="caveats" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#caveats"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Caveats</span></h3> <p data-svelte-h="svelte-fpz5mj">Using this features has some drawbacks, namely:</p> <ul data-svelte-h="svelte-1jk1r7d"><li>It only works for inference, not for training.</li> <li>Disabling adapters using the <code>with model.disable_adapter()</code> context takes precedence over <code>adapter_names</code>.</li> <li>You cannot pass <code>adapter_names</code> when some adapter weights where merged with base weight using the <code>merge_adapter</code> method. Please unmerge all adapters first by calling <code>model.unmerge_adapter()</code>.</li> <li>For obvious reasons, this cannot be used after calling <code>merge_and_unload()</code>, since all the LoRA adapters will be merged into the base weights in this case.</li> <li>This feature does not currently work with DoRA, so set <code>use_dora=False</code> in your <code>LoraConfig</code> if you want to use it.</li> <li>There is an expected overhead for inference with <code>adapter_names</code>, especially if the amount of different adapters in the batch is high. This is because the batch size is effectively reduced to the number of samples per adapter. If runtime performance is your top priority, try the following:<ul><li>Increase the batch size.</li> <li>Try to avoid having a large number of different adapters in the same batch, prefer homogeneous batches. This can be achieved by buffering samples with the same adapter and only perform inference with a small handfull of different adapters.</li> <li>Take a look at alternative implementations such as <a href="https://github.com/predibase/lorax" rel="nofollow">LoRAX</a>, <a href="https://github.com/punica-ai/punica" rel="nofollow">punica</a>, or <a href="https://github.com/S-LoRA/S-LoRA" rel="nofollow">S-LoRA</a>, which are specialized to work with a large number of different adapters.</li></ul></li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/peft/blob/main/docs/source/developer_guides/lora.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_nv4ir6 = {
	assets: "/docs/peft/main/en",
	base: "/docs/peft/main/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/peft/main/en/_app/immutable/entry/start.b1913684.js"),
	import("/docs/peft/main/en/_app/immutable/entry/app.ca8eed50.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 11],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 84.4 kB
Xet hash:: 5a63854ce7ef3280ecca253e4bf489e206bcba5c6a57e2897b7c8e3ca6db4710

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.