Buckets:

hf-doc-build/doc-dev / peft /pr_3205 /en /tutorial /peft_model_config.html
download
raw
33.4 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;PEFT configurations and models&quot;,&quot;local&quot;:&quot;peft-configurations-and-models&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;PEFT configurations&quot;,&quot;local&quot;:&quot;peft-configurations&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;PEFT models&quot;,&quot;local&quot;:&quot;peft-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/peft/pr_3205/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/entry/start.6e4f7154.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/scheduler.78382b47.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/singletons.fa874020.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/index.fadd215c.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/paths.ca1902b3.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/entry/app.968b774b.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/preload-helper.db85a81e.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/index.6dd35eb6.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/nodes/0.02e4cb39.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/nodes/74.2e9ce7ec.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.184d38d3.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/CodeBlock.7e3c9fac.js">
<link rel="modulepreload" href="/docs/peft/pr_3205/en/_app/immutable/chunks/HfOption.a1db6210.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;PEFT configurations and models&quot;,&quot;local&quot;:&quot;peft-configurations-and-models&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;PEFT configurations&quot;,&quot;local&quot;:&quot;peft-configurations&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;PEFT models&quot;,&quot;local&quot;:&quot;peft-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="peft-configurations-and-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#peft-configurations-and-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PEFT configurations and models</span></h1> <p data-svelte-h="svelte-hwy4y1">The sheer size of today’s large pretrained models - which commonly have billions of parameters - presents a significant training challenge because they require more storage space and more computational power to crunch all those calculations. You’ll need access to powerful GPUs or TPUs to train these large pretrained models which is expensive, not widely accessible to everyone, not environmentally friendly, and not very practical. PEFT methods address many of these challenges. There are several types of PEFT methods (soft prompting, matrix decomposition, adapters), but they all focus on the same thing, reduce the number of trainable parameters. This makes it more accessible to train and store large models on consumer hardware.</p> <p data-svelte-h="svelte-1lzwkqs">The PEFT library is designed to help you quickly train large models on free or low-cost GPUs, and in this tutorial, you’ll learn how to setup a configuration to apply a PEFT method to a pretrained base model for training. Once the PEFT configuration is setup, you can use any training framework you like (Transformer’s <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer" rel="nofollow">Trainer</a> class, <a href="https://hf.co/docs/accelerate" rel="nofollow">Accelerate</a>, a custom PyTorch training loop).</p> <h2 class="relative group"><a id="peft-configurations" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#peft-configurations"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PEFT configurations</span></h2> <blockquote class="tip" data-svelte-h="svelte-1ttb3zu"><p>Learn more about the parameters you can configure for each PEFT method in their respective API reference page.</p></blockquote> <p data-svelte-h="svelte-16b4gu">A configuration stores important parameters that specify how a particular PEFT method should be applied.</p> <p data-svelte-h="svelte-2s9teg">For example, take a look at the following <a href="https://huggingface.co/ybelkada/opt-350m-lora/blob/main/adapter_config.json" rel="nofollow"><code>LoraConfig</code></a> for applying LoRA and <a href="https://huggingface.co/smangrul/roberta-large-peft-p-tuning/blob/main/adapter_config.json" rel="nofollow"><code>PromptEncoderConfig</code></a> for applying p-tuning (these configuration files are already JSON-serialized). Whenever you load a PEFT adapter, it is a good idea to check whether it has an associated adapter_config.json file which is required.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">LoraConfig </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">PromptEncoderConfig </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span>
<span class="hljs-attr">&quot;base_model_name_or_path&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;facebook/opt-350m&quot;</span><span class="hljs-punctuation">,</span> #base model to apply LoRA to
<span class="hljs-attr">&quot;bias&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;none&quot;</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;fan_in_fan_out&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;inference_mode&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;init_lora_weights&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;layers_pattern&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;layers_to_transform&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;lora_alpha&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">32</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;lora_dropout&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.05</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;modules_to_save&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;peft_type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;LORA&quot;</span><span class="hljs-punctuation">,</span> #PEFT method type
<span class="hljs-attr">&quot;r&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">16</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;revision&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;target_modules&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span>
<span class="hljs-string">&quot;q_proj&quot;</span><span class="hljs-punctuation">,</span> #model modules to apply LoRA to (query and value projection layers)
<span class="hljs-string">&quot;v_proj&quot;</span>
<span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span>
<span class="hljs-attr">&quot;task_type&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;CAUSAL_LM&quot;</span> #type of task to train model on
<span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3wqwzt">You can create your own configuration for training by initializing a <a href="/docs/peft/pr_3205/en/package_reference/lora#peft.LoraConfig">LoraConfig</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, TaskType
lora_config = LoraConfig(
r=<span class="hljs-number">16</span>,
target_modules=[<span class="hljs-string">&quot;q_proj&quot;</span>, <span class="hljs-string">&quot;v_proj&quot;</span>],
task_type=TaskType.CAUSAL_LM,
lora_alpha=<span class="hljs-number">32</span>,
lora_dropout=<span class="hljs-number">0.05</span>
)<!-- HTML_TAG_END --></pre></div> </div> <h2 class="relative group"><a id="peft-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#peft-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PEFT models</span></h2> <p data-svelte-h="svelte-1mk5ta6">With a PEFT configuration in hand, you can now apply it to any pretrained model to create a <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel">PeftModel</a>. Choose from any of the state-of-the-art models from the <a href="https://hf.co/docs/transformers" rel="nofollow">Transformers</a> library, a custom model, and even new and unsupported transformer architectures.</p> <p data-svelte-h="svelte-1d979vk">For this tutorial, load a base <a href="https://huggingface.co/facebook/opt-350m" rel="nofollow">facebook/opt-350m</a> model to finetune.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;facebook/opt-350m&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1uou435">Use the <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.get_peft_model">get_peft_model()</a> function to create a <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel">PeftModel</a> from the base facebook/opt-350m model and the <code>lora_config</code> you created earlier.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> get_peft_model
lora_model = get_peft_model(model, lora_config)
lora_model.print_trainable_parameters()
<span class="hljs-string">&quot;trainable params: 1,572,864 || all params: 332,769,280 || trainable%: 0.472659014678278&quot;</span><!-- HTML_TAG_END --></pre></div> <blockquote class="warning" data-svelte-h="svelte-7bjzb4"><p>When calling <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.get_peft_model">get_peft_model()</a>, the base model will be modified <em>in-place</em>. That means, when calling <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.get_peft_model">get_peft_model()</a> on a model that was already modified in the same way before, this model will be further mutated. Therefore, if you would like to modify your PEFT configuration after having called <code>get_peft_model()</code> before, you would first have to unload the model with <a href="/docs/peft/pr_3205/en/package_reference/tuners#peft.tuners.tuners_utils.BaseTuner.unload">unload()</a> and then call <code>get_peft_model()</code> with your new configuration. Alternatively, you can re-initialize the model to ensure a fresh, unmodified state before applying a new PEFT configuration.</p></blockquote> <p data-svelte-h="svelte-vjh34g">Now you can train the <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel">PeftModel</a> with your preferred training framework! After training, you can save your model locally with <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.save_pretrained">save_pretrained()</a> or upload it to the Hub with the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.push_to_hub" rel="nofollow">push_to_hub</a> method.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># save locally</span>
lora_model.save_pretrained(<span class="hljs-string">&quot;your-name/opt-350m-lora&quot;</span>)
<span class="hljs-comment"># push to Hub</span>
lora_model.push_to_hub(<span class="hljs-string">&quot;your-name/opt-350m-lora&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-elyjyu">To load a <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel">PeftModel</a> for inference, you’ll need to provide the <a href="/docs/peft/pr_3205/en/package_reference/config#peft.PeftConfig">PeftConfig</a> used to create it and the base model it was trained from.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel, PeftConfig
config = PeftConfig.from_pretrained(<span class="hljs-string">&quot;ybelkada/opt-350m-lora&quot;</span>)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
lora_model = PeftModel.from_pretrained(model, <span class="hljs-string">&quot;ybelkada/opt-350m-lora&quot;</span>)<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-1wdvgiy">By default, the <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel">PeftModel</a> is set for inference, but if you’d like to train the adapter some more you can set <code>is_trainable=True</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->lora_model = PeftModel.from_pretrained(model, <span class="hljs-string">&quot;ybelkada/opt-350m-lora&quot;</span>, is_trainable=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div></blockquote> <p data-svelte-h="svelte-1plhsqk">The <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel.from_pretrained">PeftModel.from_pretrained()</a> method is the most flexible way to load a <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel">PeftModel</a> because it doesn’t matter what model framework was used (Transformers, timm, a generic PyTorch model). Other classes, like <a href="/docs/peft/pr_3205/en/package_reference/auto_class#peft.AutoPeftModel">AutoPeftModel</a>, are just a convenient wrapper around the base <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel">PeftModel</a>, and makes it easier to load PEFT models directly from the Hub or locally where the PEFT weights are stored.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> AutoPeftModelForCausalLM
lora_model = AutoPeftModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;ybelkada/opt-350m-lora&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-u8iduq">Take a look at the <a href="package_reference/auto_class">AutoPeftModel</a> API reference to learn more about the <a href="/docs/peft/pr_3205/en/package_reference/auto_class#peft.AutoPeftModel">AutoPeftModel</a> classes.</p> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next steps</span></h2> <p data-svelte-h="svelte-lrwhz">With the appropriate <a href="/docs/peft/pr_3205/en/package_reference/config#peft.PeftConfig">PeftConfig</a>, you can apply it to any pretrained model to create a <a href="/docs/peft/pr_3205/en/package_reference/peft_model#peft.PeftModel">PeftModel</a> and train large powerful models faster on freely available GPUs! To learn more about PEFT configurations and models, the following guide may be helpful:</p> <ul data-svelte-h="svelte-imcrah"><li>Learn how to configure a PEFT method for models that aren’t from Transformers in the <a href="../developer_guides/custom_models">Working with custom models</a> guide.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/peft/blob/main/docs/source/tutorial/peft_model_config.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_k1ait3 = {
assets: "/docs/peft/pr_3205/en",
base: "/docs/peft/pr_3205/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/peft/pr_3205/en/_app/immutable/entry/start.6e4f7154.js"),
import("/docs/peft/pr_3205/en/_app/immutable/entry/app.968b774b.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 74],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
33.4 kB
·
Xet hash:
84b21008b3b8ae62c50ecc6d834a3868a33cdbbe46f665a1c9ba7dbd0b2b404d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.