Buckets:

download
raw
26.8 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Model merging&quot;,&quot;local&quot;:&quot;model-merging&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Merge method&quot;,&quot;local&quot;:&quot;merge-method&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Merging (IA)³ Models&quot;,&quot;local&quot;:&quot;merging-ia³-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/peft/pr_3206/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/entry/start.06e1cdbd.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/scheduler.78382b47.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/singletons.25ed789f.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/index.fadd215c.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/paths.d98268af.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/entry/app.cb2a6689.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/preload-helper.1df7c689.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/index.6dd35eb6.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/nodes/0.fa4b1245.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/nodes/14.f3f71d40.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.db10b59f.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/CodeBlock.e9241c92.js">
<link rel="modulepreload" href="/docs/peft/pr_3206/en/_app/immutable/chunks/HfOption.a1db6210.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Model merging&quot;,&quot;local&quot;:&quot;model-merging&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Merge method&quot;,&quot;local&quot;:&quot;merge-method&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Merging (IA)³ Models&quot;,&quot;local&quot;:&quot;merging-ia³-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="model-merging" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-merging"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model merging</span></h1> <p data-svelte-h="svelte-1aet04z">Training a model for each task can be costly, take up storage space, and the models aren’t able to learn new information to improve their performance. Multitask learning can overcome some of these limitations by training a model to learn several tasks, but it is expensive to train and designing a dataset for it is challenging. <em>Model merging</em> offers a solution to these challenges by combining multiple pretrained models into one model, giving it the combined abilities of each individual model without any additional training.</p> <p data-svelte-h="svelte-1c8162a">PEFT provides several methods for merging models like a linear or SVD combination. This guide focuses on two methods that are more efficient for merging LoRA adapters by eliminating redundant parameters:</p> <ul data-svelte-h="svelte-wlbyrl"><li><a href="https://hf.co/papers/2306.01708" rel="nofollow">TIES</a> - TrIm, Elect, and Merge (TIES) is a three-step method for merging models. First, redundant parameters are trimmed, then conflicting signs are resolved into an aggregated vector, and finally the parameters whose signs are the same as the aggregate sign are averaged. This method takes into account that some values (redundant and sign disagreement) can degrade performance in the merged model.</li> <li><a href="https://hf.co/papers/2311.03099" rel="nofollow">DARE</a> - Drop And REscale is a method that can be used to prepare for other model merging methods like TIES. It works by randomly dropping parameters according to a drop rate and rescaling the remaining parameters. This helps to reduce the number of redundant and potentially interfering parameters among multiple models.</li></ul> <p data-svelte-h="svelte-1281rh">Models are merged with the <a href="/docs/peft/pr_3206/en/package_reference/lora#peft.LoraModel.add_weighted_adapter">add_weighted_adapter()</a> method, and the specific model merging method is specified in the <code>combination_type</code> parameter.</p> <h2 class="relative group"><a id="merge-method" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#merge-method"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Merge method</span></h2> <p data-svelte-h="svelte-gz5qim">With TIES and DARE, merging is enabled by setting <code>combination_type</code> and <code>density</code> to a value of the weights to keep from the individual models. For example, let’s merge three finetuned <a href="https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" rel="nofollow">TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T</a> models: <a href="https://huggingface.co/smangrul/tinyllama_lora_norobots" rel="nofollow">tinyllama_lora_nobots</a>, <a href="https://huggingface.co/smangrul/tinyllama_lora_sql" rel="nofollow">tinyllama_lora_sql</a>, and <a href="https://huggingface.co/smangrul/tinyllama_lora_adcopy" rel="nofollow">tinyllama_lora_adcopy</a>.</p> <blockquote class="tip"><p data-svelte-h="svelte-1x85egj">When you’re attempting to merge fully trained models with TIES, you should be aware of any special tokens each model may have added to the embedding layer which are not a part of the original checkpoint’s vocabulary. This may cause an issue because each model may have added a special token to the same embedding position. If this is the case, you should use the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.resize_token_embeddings" rel="nofollow">resize_token_embeddings</a> method to avoid merging the special tokens at the same embedding index.</p> <br> <p data-svelte-h="svelte-1bgcfcq">This shouldn’t be an issue if you’re only merging LoRA adapters trained from the same base model.</p></blockquote> <p data-svelte-h="svelte-2519aw">Load a base model and can use the <a href="/docs/peft/pr_3206/en/package_reference/peft_model#peft.PeftModel.load_adapter">load_adapter()</a> method to load and assign each adapter a name:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftConfig, PeftModel
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer
<span class="hljs-keyword">import</span> torch
config = PeftConfig.from_pretrained(<span class="hljs-string">&quot;smangrul/tinyllama_lora_norobots&quot;</span>)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, load_in_4bit=<span class="hljs-literal">True</span>, device_map=<span class="hljs-string">&quot;auto&quot;</span>).<span class="hljs-built_in">eval</span>()
tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;smangrul/tinyllama_lora_norobots&quot;</span>)
model.config.vocab_size = <span class="hljs-number">32005</span>
model.resize_token_embeddings(<span class="hljs-number">32005</span>)
model = PeftModel.from_pretrained(model, <span class="hljs-string">&quot;smangrul/tinyllama_lora_norobots&quot;</span>, adapter_name=<span class="hljs-string">&quot;norobots&quot;</span>)
_ = model.load_adapter(<span class="hljs-string">&quot;smangrul/tinyllama_lora_sql&quot;</span>, adapter_name=<span class="hljs-string">&quot;sql&quot;</span>)
_ = model.load_adapter(<span class="hljs-string">&quot;smangrul/tinyllama_lora_adcopy&quot;</span>, adapter_name=<span class="hljs-string">&quot;adcopy&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jtxz8v">Set the adapters, weights, <code>adapter_name</code>, <code>combination_type</code>, and <code>density</code> with the <a href="/docs/peft/pr_3206/en/package_reference/lora#peft.LoraModel.add_weighted_adapter">add_weighted_adapter()</a> method.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">TIES </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">DARE </div></div> <div class="language-select"><p data-svelte-h="svelte-11ncbcn">Weight values greater than <code>1.0</code> typically produce better results because they preserve the correct scale. A good default starting value for the weights is to set all values to <code>1.0</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->adapters = [<span class="hljs-string">&quot;norobots&quot;</span>, <span class="hljs-string">&quot;adcopy&quot;</span>, <span class="hljs-string">&quot;sql&quot;</span>]
weights = [<span class="hljs-number">2.0</span>, <span class="hljs-number">1.0</span>, <span class="hljs-number">1.0</span>]
adapter_name = <span class="hljs-string">&quot;merge&quot;</span>
density = <span class="hljs-number">0.2</span>
model.add_weighted_adapter(adapters, weights, adapter_name, combination_type=<span class="hljs-string">&quot;ties&quot;</span>, density=density)<!-- HTML_TAG_END --></pre></div> </div> <p data-svelte-h="svelte-ovph0u">Set the newly merged model as the active model with the <a href="/docs/peft/pr_3206/en/package_reference/tuners#peft.tuners.tuners_utils.BaseTuner.set_adapter">set_adapter()</a> method.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model.set_adapter(<span class="hljs-string">&quot;merge&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12c7cn9">Now you can use the merged model as an instruction-tuned model to write ad copy or SQL queries!</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">instruct </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">ad copy </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">SQL </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->device = torch.accelerator.current_accelerator().<span class="hljs-built_in">type</span> <span class="hljs-keyword">if</span> <span class="hljs-built_in">hasattr</span>(torch, <span class="hljs-string">&quot;accelerator&quot;</span>) <span class="hljs-keyword">else</span> <span class="hljs-string">&quot;cuda&quot;</span>
messages = [
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Write an essay about Generative AI.&quot;</span>},
]
text = tokenizer.apply_chat_template(messages, add_generation_prompt=<span class="hljs-literal">True</span>, tokenize=<span class="hljs-literal">False</span>)
inputs = tokenizer(text, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
inputs = {k: v.to(device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> inputs.items()}
outputs = model.generate(**inputs, max_new_tokens=<span class="hljs-number">256</span>, do_sample=<span class="hljs-literal">True</span>, top_p=<span class="hljs-number">0.95</span>, temperature=<span class="hljs-number">0.2</span>, repetition_penalty=<span class="hljs-number">1.2</span>, eos_token_id=tokenizer.eos_token_id)
<span class="hljs-built_in">print</span>(tokenizer.decode(outputs[<span class="hljs-number">0</span>]))<!-- HTML_TAG_END --></pre></div> </div> <h2 class="relative group"><a id="merging-ia³-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#merging-ia³-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Merging (IA)³ Models</span></h2> <p data-svelte-h="svelte-r9tv7c">The (IA)³ models facilitate linear merging of adapters. To merge adapters in an (IA)³ model, utilize the <code>add_weighted_adapter</code> method from the <code>IA3Model</code> class. This method is analogous to the <code>add_weighted_adapter</code> method used in <code>LoraModel</code>, with the key difference being the absence of the <code>combination_type</code> parameter. For example, to merge three (IA)³ adapters into a PEFT model, you would proceed as follows:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->adapters = [<span class="hljs-string">&quot;adapter1&quot;</span>, <span class="hljs-string">&quot;adapter2&quot;</span>, <span class="hljs-string">&quot;adapter3&quot;</span>]
weights = [<span class="hljs-number">0.4</span>, <span class="hljs-number">0.3</span>, <span class="hljs-number">0.3</span>]
adapter_name = <span class="hljs-string">&quot;merge&quot;</span>
model.add_weighted_adapter(adapters, weights, adapter_name)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-dmzjk6">It is recommended that the weights sum to 1.0 to preserve the scale of the model. The merged model can then be set as the active model using the <code>set_adapter</code> method:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model.set_adapter(<span class="hljs-string">&quot;merge&quot;</span>)<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/peft/blob/main/docs/source/developer_guides/model_merging.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1pyekj7 = {
assets: "/docs/peft/pr_3206/en",
base: "/docs/peft/pr_3206/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/peft/pr_3206/en/_app/immutable/entry/start.06e1cdbd.js"),
import("/docs/peft/pr_3206/en/_app/immutable/entry/app.cb2a6689.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 14],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
26.8 kB
·
Xet hash:
4e98c6db2f65085a7effd17aadb1e31b92be1d456f22cce214ce653b043dff99

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.