Buckets:

download
raw
40.5 kB
import{s as fs,n as us,o as gs}from"../chunks/scheduler.78382b47.js";import{S as Us,i as ws,e as y,s as a,c as i,h as Js,a as M,d as s,b as l,f as be,g as r,j as d,k as je,l as u,m as n,n as o,t as p,o as m,p as c}from"../chunks/index.6dd35eb6.js";import{C as hs,H as f,E as Ts}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.db10b59f.js";import{D as Be}from"../chunks/Docstring.892c143d.js";import{C as b}from"../chunks/CodeBlock.e9241c92.js";function bs(Et){let g,Ve,ke,We,j,Ge,C,Fe,v,Ht='Orthogonal Subspace Fine-tuning (<a href="https://huggingface.co/papers/2504.07097" rel="nofollow">OSF</a>) is a PEFT method designed for continual learning that constrains parameter updates to be orthogonal to previously important directions. This approach enables full fine-tuning while preventing catastrophic forgetting without requiring additional parameters or storing previous gradients.',Se,$,qt="The abstract from the paper is:",Re,Z,At="<em>Continual learning in large language models (LLMs) is prone to catastrophic forgetting, where adapting to new tasks significantly degrades performance on previously learned ones. Existing methods typically rely on low-rank, parameter-efficient updates that limit the model’s expressivity and introduce additional parameters per task, leading to scalability issues. To address these limitations, we propose a novel continual full fine-tuning approach leveraging adaptive singular value decomposition (SVD). Our method dynamically identifies task-specific low-rank parameter subspaces and constrains updates to be orthogonal to critical directions associated with prior tasks, thus effectively minimizing interference without additional parameter overhead or storing previous task gradients. We evaluate our approach extensively on standard continual learning benchmarks using both encoder-decoder (T5-Large) and decoder-only (LLaMA-2 7B) models, spanning diverse tasks including classification, generation, and reasoning. Empirically, our method achieves state-of-the-art results, up to 7% higher average accuracy than recent baselines like O-LoRA, and notably maintains the model’s general linguistic capabilities, instruction-following accuracy, and safety throughout the continual learning process by reducing forgetting to near-negligible levels. Our adaptive SVD framework effectively balances model plasticity and knowledge retention, providing a practical, theoretically grounded, and computationally scalable solution for continual learning scenarios in large language models.</em>",Xe,I,Ne,k,Ot="OSF decomposes each weight matrix into high-rank (frozen) and low-rank (trainable) components using SVD:",ze,_,Qe,B,Lt="Where:",Ye,V,Dt="<li><code>U_high, S_high, V_high</code>: Preserve important directions from previous tasks (frozen)</li> <li><code>U_low, S_low, V_low</code>: Allow adaptation to new tasks (trainable)</li>",xe,W,Pt="During training, gradients are projected to be orthogonal to the high-rank subspace, ensuring updates don’t interfere with previously learned knowledge.",Ee,G,He,F,qe,S,Ae,R,Oe,X,Kt="You can specify target modules in several ways:",Le,N,De,z,Pe,Q,es="Control the preserved/trainable subspaces:",Ke,Y,et,x,ts="Note: OSF’s <code>effective_rank</code> is the preserved (frozen) rank, not the trainable rank. The trainable rank equals <code>min(weight.shape) - effective_rank</code>. This differs from LoRA’s <code>r</code>, which directly specifies the trainable rank.",tt,E,st,H,nt,q,ss="OSF is specifically designed for learning tasks sequentially. Between tasks, recompute the SVD so the preserved subspace reflects the latest weights. One simple way is to re-wrap the updated base model with OSF again:",at,A,lt,O,it,L,ns="When training on a known sequence of n tasks, one effective strategy is to progressively allocate model capacity to balance learning new tasks while preserving previous knowledge:",rt,D,as="<li><strong>Task 1</strong>: Use full capacity (train everything)</li> <li><strong>Task 2</strong>: Freeze 1/n of model capacity, train remaining (n-1)/n capacity</li> <li><strong>Task 3</strong>: Freeze 2/n of model capacity, train remaining (n-2)/n capacity</li> <li><strong>Task n</strong>: Freeze (n-1)/n of model capacity, use 1/n capacity for final task</li>",ot,P,ls="This approach ensures each task gets adequate learning capacity while progressively preserving more knowledge from previous tasks.",pt,K,mt,ee,ct,te,is="<li><strong>Effective Rank Selection</strong>: Start with <code>effective_rank=None</code> (auto sets rank to 50% of the smaller weight dimension per target module) and adjust based on task complexity</li> <li><strong>Learning Rate</strong>: Use smaller learning rates (1e-5 to 1e-4) compared to standard fine-tuning</li> <li><strong>Task Importance</strong>: Use <code>rank_pattern</code> to allocate more capacity to critical modules</li> <li><strong>Model Architecture</strong>: OSF works best with transformer architectures having clear attention and MLP separations</li> <li><strong>Capacity Planning</strong>: For known task sequences, use progressive budget allocation (1/n, 2/n, …, (n-1)/n freezing) to balance plasticity and stability</li>",yt,se,Mt,ne,rs="OSF modifies weights in-place and doesn’t add parameters, making it memory-efficient:",dt,ae,ft,le,ut,ie,gt,re,os="For models with non-standard architectures:",Ut,oe,wt,pe,Jt,me,ps="OSF can be combined with other techniques:",ht,ce,Tt,ye,bt,U,Me,Nt,Ce,ms="Configuration for Orthogonal Subspace Fine-tuning (OSF).",jt,de,Ct,w,fe,zt,ve,cs="A minimal tuner implementing Orthogonal Subspace Fine-tuning.",vt,ue,$t,ge,Zt,J,Ue,Qt,$e,ys="Perform an SVD of <code>weight</code> and split it into frozen and trainable parts.",It,h,we,Yt,Ze,Ms="Reconstruct a weight matrix from its SVD components.",kt,Je,_t,T,he,xt,Ie,ds="Project gradients of <code>U_low</code> and <code>V_low</code> to be orthogonal to the high rank space.",Bt,Te,Vt,_e,Wt;return j=new hs({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),C=new f({props:{title:"OSF (Orthogonal Subspace Fine-tuning)",local:"osf-orthogonal-subspace-fine-tuning",headingTag:"h1"}}),I=new f({props:{title:"How OSF Works",local:"how-osf-works",headingTag:"h2"}}),_=new b({props:{code:"VyUyMCUzRCUyMFVfaGlnaCUyMColMjBTX2hpZ2glMjAqJTIwVl9oaWdoJTVFVCUyMCUyQiUyMFVfbG93JTIwKiUyMFNfbG93JTIwKiUyMFZfbG93JTVFVA==",highlighted:'<span class="hljs-keyword">W</span> = U_high * S_high * V_high<span class="hljs-symbol">^T</span> + U_low * S_low * V_low<span class="hljs-symbol">^T</span>',wrap:!1}}),G=new f({props:{title:"Basic Usage",local:"basic-usage",headingTag:"h2"}}),F=new b({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0lMkMlMjBBdXRvVG9rZW5pemVyJTBBZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBPU0ZDb25maWclMkMlMjBnZXRfcGVmdF9tb2RlbCUwQSUwQSUyMyUyMExvYWQlMjBiYXNlJTIwbW9kZWwlMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMjJncHQyJTIyKSUwQSUwQSUyMyUyMENvbmZpZ3VyZSUyME9TRiUwQWNvbmZpZyUyMCUzRCUyME9TRkNvbmZpZyglMEElMjAlMjAlMjAlMjB0YXJnZXRfbW9kdWxlcyUzRCU1QiUyMmNfYXR0biUyMiUyQyUyMCUyMmNfcHJvaiUyMiU1RCUyQyUyMCUyMCUyMyUyMFRhcmdldCUyMGF0dGVudGlvbiUyMGxheWVycyUwQSUyMCUyMCUyMCUyMGVmZmVjdGl2ZV9yYW5rJTNEOCUyQyUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMERlZmF1bHQlMjByYW5rJTIwZm9yJTIwZGVjb21wb3NpdGlvbiUwQSUyMCUyMCUyMCUyMHJhbmtfcGF0dGVybiUzRCU3QiUyMmNfYXR0biUyMiUzQSUyMDE2JTdEJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwT3ZlcnJpZGUlMjByYW5rJTIwZm9yJTIwc3BlY2lmaWMlMjBtb2R1bGVzJTBBKSUwQSUwQSUyMyUyMEFwcGx5JTIwT1NGJTBBbW9kZWwlMjAlM0QlMjBnZXRfcGVmdF9tb2RlbChtb2RlbCUyQyUyMGNvbmZpZyklMEElMEElMjMlMjBUcmFpbiUyMGFzJTIwdXN1YWwlMEFvcHRpbWl6ZXIlMjAlM0QlMjB0b3JjaC5vcHRpbS5BZGFtVyhtb2RlbC5wYXJhbWV0ZXJzKCklMkMlMjBsciUzRDNlLTQpJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQoJTIyZ3B0MiUyMiklMEF0b2tlbml6ZXIucGFkX3Rva2VuJTIwJTNEJTIwdG9rZW5pemVyLmVvc190b2tlbiUwQSUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplciglMjJIZWxsbyUyMHdvcmxkJTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiUyQyUyMHBhZGRpbmclM0RUcnVlKSUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyUyQyUyMGxhYmVscyUzRGlucHV0cy5pbnB1dF9pZHMpLmxvc3MlMEFsb3NzLmJhY2t3YXJkKCklMEFvcHRpbWl6ZXIuc3RlcCgpJTBBb3B0aW1pemVyLnplcm9fZ3JhZCgp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer
<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> OSFConfig, get_peft_model
<span class="hljs-comment"># Load base model</span>
model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;gpt2&quot;</span>)
<span class="hljs-comment"># Configure OSF</span>
config = OSFConfig(
target_modules=[<span class="hljs-string">&quot;c_attn&quot;</span>, <span class="hljs-string">&quot;c_proj&quot;</span>], <span class="hljs-comment"># Target attention layers</span>
effective_rank=<span class="hljs-number">8</span>, <span class="hljs-comment"># Default rank for decomposition</span>
rank_pattern={<span class="hljs-string">&quot;c_attn&quot;</span>: <span class="hljs-number">16</span>} <span class="hljs-comment"># Override rank for specific modules</span>
)
<span class="hljs-comment"># Apply OSF</span>
model = get_peft_model(model, config)
<span class="hljs-comment"># Train as usual</span>
optimizer = torch.optim.AdamW(model.parameters(), lr=<span class="hljs-number">3e-4</span>)
tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;gpt2&quot;</span>)
tokenizer.pad_token = tokenizer.eos_token
inputs = tokenizer(<span class="hljs-string">&quot;Hello world&quot;</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>, padding=<span class="hljs-literal">True</span>)
loss = model(**inputs, labels=inputs.input_ids).loss
loss.backward()
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),S=new f({props:{title:"Configuration Options",local:"configuration-options",headingTag:"h2"}}),R=new f({props:{title:"Target Modules",local:"target-modules",headingTag:"h3"}}),N=new b({props:{code:"JTIzJTIwU3BlY2lmaWMlMjBtb2R1bGUlMjBuYW1lcyUwQWNvbmZpZyUyMCUzRCUyME9TRkNvbmZpZyh0YXJnZXRfbW9kdWxlcyUzRCU1QiUyMnFfcHJvaiUyMiUyQyUyMCUyMmtfcHJvaiUyMiUyQyUyMCUyMnZfcHJvaiUyMiUyQyUyMCUyMm9fcHJvaiUyMiU1RCklMEElMEElMjMlMjBBbGwlMjBsaW5lYXIlMjBsYXllcnMlMEFjb25maWclMjAlM0QlMjBPU0ZDb25maWcodGFyZ2V0X21vZHVsZXMlM0QlMjJhbGwtbGluZWFyJTIyKSUwQSUwQSUyMyUyME1vZGVsLXNwZWNpZmljJTIwZGVmYXVsdHMlMjAoYXV0b21hdGljYWxseSUyMGRldGVjdGVkKSUwQWNvbmZpZyUyMCUzRCUyME9TRkNvbmZpZygpJTIwJTIwJTIzJTIwVXNlcyUyMG1vZGVsLWFwcHJvcHJpYXRlJTIwZGVmYXVsdHM=",highlighted:`<span class="hljs-comment"># Specific module names</span>
config = OSFConfig(target_modules=[<span class="hljs-string">&quot;q_proj&quot;</span>, <span class="hljs-string">&quot;k_proj&quot;</span>, <span class="hljs-string">&quot;v_proj&quot;</span>, <span class="hljs-string">&quot;o_proj&quot;</span>])
<span class="hljs-comment"># All linear layers</span>
config = OSFConfig(target_modules=<span class="hljs-string">&quot;all-linear&quot;</span>)
<span class="hljs-comment"># Model-specific defaults (automatically detected)</span>
config = OSFConfig() <span class="hljs-comment"># Uses model-appropriate defaults</span>`,wrap:!1}}),z=new f({props:{title:"Effective Rank Configuration",local:"effective-rank-configuration",headingTag:"h3"}}),Y=new b({props:{code:"JTIzJTIwR2xvYmFsJTIwcHJlc2VydmVkJTIwcmFuayUyMChhcHBsaWVzJTIwdG8lMjBhbGwlMjB0YXJnZXQlMjBtb2R1bGVzKSUwQWNvbmZpZyUyMCUzRCUyME9TRkNvbmZpZyhlZmZlY3RpdmVfcmFuayUzRDE2KSUyMCUyMCUyMyUyMHByZXNlcnZlcyUyMHRvcC0xNiUyMHNpbmd1bGFyJTIwZGlyZWN0aW9ucyUzQiUyMHRyYWlucyUyMHRoZSUyMHJlc3QlMEElMEElMjMlMjBBdXRvbWF0aWMlMjBwcmVzZXJ2ZWQlMjByYW5rJTIwKDUwJTI1JTIwb2YlMjB0aGUlMjBzbWFsbGVyJTIwbWF0cml4JTIwZGltZW5zaW9uJTIwcGVyJTIwdGFyZ2V0KSUwQWNvbmZpZyUyMCUzRCUyME9TRkNvbmZpZyhlZmZlY3RpdmVfcmFuayUzRE5vbmUpJTBBJTBBJTIzJTIwUGVyLW1vZHVsZSUyMHByZXNlcnZlZC1yYW5rJTIwb3ZlcnJpZGVzJTBBY29uZmlnJTIwJTNEJTIwT1NGQ29uZmlnKCUwQSUyMCUyMCUyMCUyMGVmZmVjdGl2ZV9yYW5rJTNEOCUyQyUwQSUyMCUyMCUyMCUyMHJhbmtfcGF0dGVybiUzRCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnFfcHJvaiUyMiUzQSUyMDE2JTJDJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwSGlnaGVyJTIwcmFuayUyMGZvciUyMHF1ZXJ5JTIwcHJvamVjdGlvbiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmdhdGVfcHJvaiUyMiUzQSUyMDQlMjAlMjAlMjAlMjAlMjAlMjMlMjBMb3dlciUyMHJhbmslMjBmb3IlMjBnYXRlJTIwcHJvamVjdGlvbiUwQSUyMCUyMCUyMCUyMCU3RCUwQSklMEElMjAlMEElMjMlMjBGcmFjdGlvbmFsJTIwcHJlc2VydmVkJTIwcmFuayUyMGlzJTIwc3VwcG9ydGVkJTIwKGludGVycHJldGVkJTIwcGVyLXRhcmdldCUyMGFzJTIwZnJhY3Rpb24lMjAqJTIwbWluX2RpbSklMEFjb25maWclMjAlM0QlMjBPU0ZDb25maWcoZWZmZWN0aXZlX3JhbmslM0QwLjgpJTIwJTIwJTIzJTIwcHJlc2VydmUlMjA4MCUyNSUyMG9mJTIwbWluX2RpbSUzQiUyMHRyYWluJTIwcmVtYWluaW5nJTIwMjAlMjUlMEFjb25maWclMjAlM0QlMjBPU0ZDb25maWcocmFua19wYXR0ZXJuJTNEJTdCJTIycV9wcm9qJTIyJTNBJTIwMC41JTdEKSUyMCUyMCUyMyUyMHByZXNlcnZlJTIwNTAlMjUlMjBvbiUyMHFfcHJvaiUyQyUyMG90aGVycyUyMHVzZSUyMGdsb2JhbCUyRmRlZmF1bHQ=",highlighted:`<span class="hljs-comment"># Global preserved rank (applies to all target modules)</span>
config = OSFConfig(effective_rank=<span class="hljs-number">16</span>) <span class="hljs-comment"># preserves top-16 singular directions; trains the rest</span>
<span class="hljs-comment"># Automatic preserved rank (50% of the smaller matrix dimension per target)</span>
config = OSFConfig(effective_rank=<span class="hljs-literal">None</span>)
<span class="hljs-comment"># Per-module preserved-rank overrides</span>
config = OSFConfig(
effective_rank=<span class="hljs-number">8</span>,
rank_pattern={
<span class="hljs-string">&quot;q_proj&quot;</span>: <span class="hljs-number">16</span>, <span class="hljs-comment"># Higher rank for query projection</span>
<span class="hljs-string">&quot;gate_proj&quot;</span>: <span class="hljs-number">4</span> <span class="hljs-comment"># Lower rank for gate projection</span>
}
)
<span class="hljs-comment"># Fractional preserved rank is supported (interpreted per-target as fraction * min_dim)</span>
config = OSFConfig(effective_rank=<span class="hljs-number">0.8</span>) <span class="hljs-comment"># preserve 80% of min_dim; train remaining 20%</span>
config = OSFConfig(rank_pattern={<span class="hljs-string">&quot;q_proj&quot;</span>: <span class="hljs-number">0.5</span>}) <span class="hljs-comment"># preserve 50% on q_proj, others use global/default</span>`,wrap:!1}}),E=new f({props:{title:"Training Advice for Continual Learning",local:"training-advice-for-continual-learning",headingTag:"h2"}}),H=new f({props:{title:"Sequential Task Learning",local:"sequential-task-learning",headingTag:"h3"}}),A=new b({props:{code:"JTIzJTIwVGFzayUyMDElM0ElMjB0cmFpbiUyMG9uJTIwZG9tYWluJTIwQSUyMHdpdGglMjBpbml0aWFsJTIwcHJlc2VydmVkJTIwc3Vic3BhY2UlMEFyJTIwJTNEJTIwOCUyMCUyMCUyMyUyMGluaXRpYWwlMjBlZmZlY3RpdmUlMjByYW5rJTIwdG8lMjBwcmVzZXJ2ZSUwQW1vZGVsJTIwJTNEJTIwZ2V0X3BlZnRfbW9kZWwoYmFzZV9tb2RlbCUyQyUyME9TRkNvbmZpZyhlZmZlY3RpdmVfcmFuayUzRHIpKSUwQXRyYWluX3Rhc2sobW9kZWwlMkMlMjB0YXNrXzFfZGF0YSklMEElMEElMjMlMjBUYXNrJTIwMiUzQSUyMHJlY29tcHV0ZSUyMFNWRCUyMG9uJTIwdXBkYXRlZCUyMHdlaWdodHMlMjBhbmQlMjBpbmNyZWFzZSUyMHByZXNlcnZlZCUyMHN1YnNwYWNlJTBBYmFzZV9tb2RlbCUyMCUzRCUyMG1vZGVsLnVubG9hZCgpJTIwJTIwJTIzJTIwdW53cmFwJTIwYmFzZSUyMG1vZGVsJTIwd2l0aG91dCUyMGFzc3VtaW5nJTIwaW50ZXJuYWxzJTBBciUyMCUyQiUzRCUyMDQlMjAlMjAlMjMlMjBncm93JTIwcHJlc2VydmVkJTIwc3Vic3BhY2UlMjB0byUyMGluY2x1ZGUlMjBUYXNrJTIwMSUyMGtub3dsZWRnZSUwQW1vZGVsJTIwJTNEJTIwZ2V0X3BlZnRfbW9kZWwoYmFzZV9tb2RlbCUyQyUyME9TRkNvbmZpZyhlZmZlY3RpdmVfcmFuayUzRHIpKSUwQXRyYWluX3Rhc2sobW9kZWwlMkMlMjB0YXNrXzJfZGF0YSklMEElMEElMjMlMjBUYXNrJTIwMyUzQSUyMHJlY29tcHV0ZSUyMGFnYWluJTIwYW5kJTIwZXhwYW5kJTIwcHJlc2VydmVkJTIwc3Vic3BhY2UlMjBmdXJ0aGVyJTBBYmFzZV9tb2RlbCUyMCUzRCUyMG1vZGVsLnVubG9hZCgpJTBBciUyMCUyQiUzRCUyMDQlMEFtb2RlbCUyMCUzRCUyMGdldF9wZWZ0X21vZGVsKGJhc2VfbW9kZWwlMkMlMjBPU0ZDb25maWcoZWZmZWN0aXZlX3JhbmslM0RyKSklMEF0cmFpbl90YXNrKG1vZGVsJTJDJTIwdGFza18zX2RhdGEp",highlighted:`<span class="hljs-comment"># Task 1: train on domain A with initial preserved subspace</span>
r = <span class="hljs-number">8</span> <span class="hljs-comment"># initial effective rank to preserve</span>
model = get_peft_model(base_model, OSFConfig(effective_rank=r))
train_task(model, task_1_data)
<span class="hljs-comment"># Task 2: recompute SVD on updated weights and increase preserved subspace</span>
base_model = model.unload() <span class="hljs-comment"># unwrap base model without assuming internals</span>
r += <span class="hljs-number">4</span> <span class="hljs-comment"># grow preserved subspace to include Task 1 knowledge</span>
model = get_peft_model(base_model, OSFConfig(effective_rank=r))
train_task(model, task_2_data)
<span class="hljs-comment"># Task 3: recompute again and expand preserved subspace further</span>
base_model = model.unload()
r += <span class="hljs-number">4</span>
model = get_peft_model(base_model, OSFConfig(effective_rank=r))
train_task(model, task_3_data)`,wrap:!1}}),O=new f({props:{title:"Budget Allocation for Task Sequences",local:"budget-allocation-for-task-sequences",headingTag:"h3"}}),K=new b({props:{code:"JTIzJTIwRXhhbXBsZSUzQSUyMDQtdGFzayUyMHNlcXVlbmNlJTIwd2l0aCUyMHByb2dyZXNzaXZlJTIwYnVkZ2V0JTIwYWxsb2NhdGlvbiUwQW5fdGFza3MlMjAlM0QlMjA0JTBBbWF4X3ByZXNlcnZlZF9yYW5rJTIwJTNEJTIwNTEyJTIwJTIwJTIzJTIwVXBwZXIlMjBib3VuZCUyMGZvciUyMHByZXNlcnZlZCUyMHJhbmslMjBwZXIlMjB0YXJnZXQlMjAoaGV1cmlzdGljKSUwQSUwQWZvciUyMHRhc2tfaWQlMjBpbiUyMHJhbmdlKG5fdGFza3MpJTNBJTBBJTIwJTIwJTIwJTIwJTIzJTIwRnJlZXplJTIwaW5jcmVhc2VzJTIwb3ZlciUyMHRpbWUlM0IlMjB0cmFpbmFibGUlMjBjYXBhY2l0eSUyMHNocmlua3MlMEElMjAlMjAlMjAlMjBwcmVzZXJ2ZWRfZnJhY3Rpb24lMjAlM0QlMjAodGFza19pZCUyMCUyQiUyMDEpJTIwJTJGJTIwbl90YXNrcyUwQSUyMCUyMCUyMCUyMHByZXNlcnZlZF9yYW5rJTIwJTNEJTIwaW50KG1heF9wcmVzZXJ2ZWRfcmFuayUyMColMjBwcmVzZXJ2ZWRfZnJhY3Rpb24pJTBBJTBBJTIwJTIwJTIwJTIwY29uZmlnJTIwJTNEJTIwT1NGQ29uZmlnKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhcmdldF9tb2R1bGVzJTNEJTVCJTIycV9wcm9qJTIyJTJDJTIwJTIya19wcm9qJTIyJTJDJTIwJTIydl9wcm9qJTIyJTJDJTIwJTIyb19wcm9qJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZWZmZWN0aXZlX3JhbmslM0RwcmVzZXJ2ZWRfcmFuayUyQyUwQSUyMCUyMCUyMCUyMCklMEElMEElMjAlMjAlMjAlMjBwcmludCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmJTIyVGFzayUyMCU3QnRhc2tfaWQlMjAlMkIlMjAxJTdEJTNBJTIwUHJlc2VydmluZyUyMHJhbmslMjAlN0JwcmVzZXJ2ZWRfcmFuayU3RCUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGYlMjIoJTdCcHJlc2VydmVkX2ZyYWN0aW9uJTNBLjElMjUlN0QlMjBvZiUyMG1heF9wcmVzZXJ2ZWRfcmFuayUyMC0lMjAlN0JtYXhfcHJlc2VydmVkX3JhbmslN0QlMjBmcm96ZW4pJTNCJTIwdHJhaW5hYmxlJTIwcmFuayUyMCUzRCUyMG1pbl9kaW0lMjAtJTIwcHJlc2VydmVkX3JhbmslMjIlMEElMjAlMjAlMjAlMjApJTBBJTBBJTIwJTIwJTIwJTIwbW9kZWwlMjAlM0QlMjBnZXRfcGVmdF9tb2RlbChiYXNlX21vZGVsJTJDJTIwY29uZmlnKSUwQSUyMCUyMCUyMCUyMHRyYWluX3Rhc2sobW9kZWwlMkMlMjB0YXNrX2RhdGElNUJ0YXNrX2lkJTVEKQ==",highlighted:`<span class="hljs-comment"># Example: 4-task sequence with progressive budget allocation</span>
n_tasks = <span class="hljs-number">4</span>
max_preserved_rank = <span class="hljs-number">512</span> <span class="hljs-comment"># Upper bound for preserved rank per target (heuristic)</span>
<span class="hljs-keyword">for</span> task_id <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(n_tasks):
<span class="hljs-comment"># Freeze increases over time; trainable capacity shrinks</span>
preserved_fraction = (task_id + <span class="hljs-number">1</span>) / n_tasks
preserved_rank = <span class="hljs-built_in">int</span>(max_preserved_rank * preserved_fraction)
config = OSFConfig(
target_modules=[<span class="hljs-string">&quot;q_proj&quot;</span>, <span class="hljs-string">&quot;k_proj&quot;</span>, <span class="hljs-string">&quot;v_proj&quot;</span>, <span class="hljs-string">&quot;o_proj&quot;</span>],
effective_rank=preserved_rank,
)
<span class="hljs-built_in">print</span>(
<span class="hljs-string">f&quot;Task <span class="hljs-subst">{task_id + <span class="hljs-number">1</span>}</span>: Preserving rank <span class="hljs-subst">{preserved_rank}</span> &quot;</span>
<span class="hljs-string">f&quot;(<span class="hljs-subst">{preserved_fraction:<span class="hljs-number">.1</span>%}</span> of max_preserved_rank - <span class="hljs-subst">{max_preserved_rank}</span> frozen); trainable rank = min_dim - preserved_rank&quot;</span>
)
model = get_peft_model(base_model, config)
train_task(model, task_data[task_id])`,wrap:!1}}),ee=new f({props:{title:"Best Practices",local:"best-practices",headingTag:"h3"}}),se=new f({props:{title:"Memory Considerations",local:"memory-considerations",headingTag:"h3"}}),ae=new b({props:{code:"JTIzJTIwTWVtb3J5JTIwdXNhZ2UlMjByZW1haW5zJTIwY2xvc2UlMjB0byUyMGJhc2UlMjBtb2RlbCUwQXByaW50KGYlMjJCYXNlJTIwbW9kZWwlMjBwYXJhbWV0ZXJzJTNBJTIwJTdCYmFzZV9tb2RlbC5udW1fcGFyYW1ldGVycygpJTNBJTJDJTdEJTIyKSUwQXByaW50KGYlMjJPU0YlMjBtb2RlbCUyMHBhcmFtZXRlcnMlM0ElMjAlN0Jvc2ZfbW9kZWwubnVtX3BhcmFtZXRlcnMoKSUzQSUyQyU3RCUyMiklMjAlMjAlMjMlMjBTaW1pbGFyJTIwY291bnQ=",highlighted:`<span class="hljs-comment"># Memory usage remains close to base model</span>
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;Base model parameters: <span class="hljs-subst">{base_model.num_parameters():,}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;OSF model parameters: <span class="hljs-subst">{osf_model.num_parameters():,}</span>&quot;</span>) <span class="hljs-comment"># Similar count</span>`,wrap:!1}}),le=new f({props:{title:"Advanced Usage",local:"advanced-usage",headingTag:"h2"}}),ie=new f({props:{title:"Custom Target Modules",local:"custom-target-modules",headingTag:"h3"}}),oe=new b({props:{code:"Y29uZmlnJTIwJTNEJTIwT1NGQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHRhcmdldF9tb2R1bGVzJTNEJTVCJTIyZGVuc2UlMjIlMkMlMjAlMjJpbnRlcm1lZGlhdGUuZGVuc2UlMjIlNUQlMkMlMjAlMjAlMjMlMjBDdXN0b20lMjBsYXllciUyMG5hbWVzJTBBJTIwJTIwJTIwJTIwZWZmZWN0aXZlX3JhbmslM0QxMiUyQyUwQSUyMCUyMCUyMCUyMHJhbmtfcGF0dGVybiUzRCU3QiUyMmRlbnNlJTIyJTNBJTIwOCUyQyUyMCUyMmludGVybWVkaWF0ZS5kZW5zZSUyMiUzQSUyMDE2JTdEJTBBKQ==",highlighted:`config = OSFConfig(
target_modules=[<span class="hljs-string">&quot;dense&quot;</span>, <span class="hljs-string">&quot;intermediate.dense&quot;</span>], <span class="hljs-comment"># Custom layer names</span>
effective_rank=<span class="hljs-number">12</span>,
rank_pattern={<span class="hljs-string">&quot;dense&quot;</span>: <span class="hljs-number">8</span>, <span class="hljs-string">&quot;intermediate.dense&quot;</span>: <span class="hljs-number">16</span>}
)`,wrap:!1}}),pe=new f({props:{title:"Integration with Other Methods",local:"integration-with-other-methods",headingTag:"h3"}}),ce=new b({props:{code:"JTIzJTIwVXNlJTIwd2l0aCUyMGdyYWRpZW50JTIwY2hlY2twb2ludGluZyUyMGZvciUyMG1lbW9yeSUyMGVmZmljaWVuY3klMEFtb2RlbC5ncmFkaWVudF9jaGVja3BvaW50aW5nX2VuYWJsZSgpJTBBJTBBJTIzJTIwQXBwbHklMjB3ZWlnaHQlMjBkZWNheSUyMHNlbGVjdGl2ZWx5JTIwKHJlZ3VsYXJpemVzJTIwbG93LXJhbmslMjBmYWN0b3JzJTIwdG8lMjBsaW1pdCUyMGRyaWZ0JTJGb3ZlcmZpdHRpbmclMjBpbiUyMGNvbnRpbnVhbCUyMHVwZGF0ZXMlM0IlMjBrZWVwJTIwc21hbGwpJTBBb3B0aW1pemVyJTIwJTNEJTIwdG9yY2gub3B0aW0uQWRhbVcoJTVCJTBBJTIwJTIwJTIwJTIwJTdCJTIycGFyYW1zJTIyJTNBJTIwJTVCcCUyMGZvciUyMG4lMkMlMjBwJTIwaW4lMjBtb2RlbC5uYW1lZF9wYXJhbWV0ZXJzKCklMjBpZiUyMCUyMlVfbG93JTIyJTIwaW4lMjBuJTVEJTJDJTIwJTIyd2VpZ2h0X2RlY2F5JTIyJTNBJTIwMC4wMSU3RCUyQyUwQSUyMCUyMCUyMCUyMCU3QiUyMnBhcmFtcyUyMiUzQSUyMCU1QnAlMjBmb3IlMjBuJTJDJTIwcCUyMGluJTIwbW9kZWwubmFtZWRfcGFyYW1ldGVycygpJTIwaWYlMjAlMjJTX2xvdyUyMiUyMGluJTIwbiU1RCUyQyUyMCUyMndlaWdodF9kZWNheSUyMiUzQSUyMDAuMDAxJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTdCJTIycGFyYW1zJTIyJTNBJTIwJTVCcCUyMGZvciUyMG4lMkMlMjBwJTIwaW4lMjBtb2RlbC5uYW1lZF9wYXJhbWV0ZXJzKCklMjBpZiUyMCUyMlZfbG93JTIyJTIwaW4lMjBuJTVEJTJDJTIwJTIyd2VpZ2h0X2RlY2F5JTIyJTNBJTIwMC4wMSU3RCUyQyUwQSU1RCUyQyUyMGxyJTNEMWUtNCk=",highlighted:`<span class="hljs-comment"># Use with gradient checkpointing for memory efficiency</span>
model.gradient_checkpointing_enable()
<span class="hljs-comment"># Apply weight decay selectively (regularizes low-rank factors to limit drift/overfitting in continual updates; keep small)</span>
optimizer = torch.optim.AdamW([
{<span class="hljs-string">&quot;params&quot;</span>: [p <span class="hljs-keyword">for</span> n, p <span class="hljs-keyword">in</span> model.named_parameters() <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;U_low&quot;</span> <span class="hljs-keyword">in</span> n], <span class="hljs-string">&quot;weight_decay&quot;</span>: <span class="hljs-number">0.01</span>},
{<span class="hljs-string">&quot;params&quot;</span>: [p <span class="hljs-keyword">for</span> n, p <span class="hljs-keyword">in</span> model.named_parameters() <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;S_low&quot;</span> <span class="hljs-keyword">in</span> n], <span class="hljs-string">&quot;weight_decay&quot;</span>: <span class="hljs-number">0.001</span>},
{<span class="hljs-string">&quot;params&quot;</span>: [p <span class="hljs-keyword">for</span> n, p <span class="hljs-keyword">in</span> model.named_parameters() <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;V_low&quot;</span> <span class="hljs-keyword">in</span> n], <span class="hljs-string">&quot;weight_decay&quot;</span>: <span class="hljs-number">0.01</span>},
], lr=<span class="hljs-number">1e-4</span>)`,wrap:!1}}),ye=new f({props:{title:"OSFConfig",local:"peft.OSFConfig",headingTag:"h2"}}),Me=new Be({props:{name:"class peft.OSFConfig",anchor:"peft.OSFConfig",parameters:[{name:"task_type",val:": Optional[Union[str, TaskType]] = None"},{name:"peft_type",val:": Optional[Union[str, PeftType]] = None"},{name:"auto_mapping",val:": Optional[dict] = None"},{name:"peft_version",val:": Optional[str] = None"},{name:"base_model_name_or_path",val:": Optional[str] = None"},{name:"revision",val:": Optional[str] = None"},{name:"inference_mode",val:": bool = False"},{name:"effective_rank",val:": Optional[Union[int, float]] = None"},{name:"target_modules",val:": Optional[Union[list[str], str]] = None"},{name:"rank_pattern",val:": Optional[dict[str, Union[int, float]]] = None"},{name:"init_weights",val:": Optional[bool] = None"},{name:"modules_to_save",val:": Optional[list[str]] = None"},{name:"target_svd_config",val:": Optional[dict[str, int]] = None"}],parametersDescription:[{anchor:"peft.OSFConfig.effective_rank",description:`<strong>effective_rank</strong> (<em>int</em> or <em>float</em>, <em>optional</em>) &#x2014;
Preserved SVD rank (&#x201C;high&#x201D; subspace). The top-<code>effective_rank</code> singular directions are frozen and
retained across tasks; the remaining dimensions form the trainable low-rank subspace. If <em>None</em>, defaults
to 50% of the smaller weight dimension per target module. Note: This differs from LoRA&#x2019;s <em>r</em> (trainable
rank). In OSF, the trainable rank is <em>min(weight.shape) - effective_rank</em>.`,name:"effective_rank"},{anchor:"peft.OSFConfig.target_modules",description:`<strong>target_modules</strong> (<em>Union[list[str], str]</em>, <em>optional</em>) &#x2014;
The names of the modules to apply OSF to. Can be a list of module names or <em>&#x201C;all-linear&#x201D;</em>.`,name:"target_modules"},{anchor:"peft.OSFConfig.rank_pattern",description:`<strong>rank_pattern</strong> (<em>dict[str, int|float]</em>, <em>optional</em>) &#x2014;
A dictionary of regex patterns to override <em>effective_rank</em> for specific modules. Values can be absolute
integers or fractions in (0, 1], interpreted as a fraction of the smaller matrix dimension per target.`,name:"rank_pattern"}],source:"https://github.com/huggingface/peft/blob/vr_3206/src/peft/tuners/osf/config.py#L11"}}),de=new f({props:{title:"OSFModel",local:"peft.OSFModel",headingTag:"h2"}}),fe=new Be({props:{name:"class peft.OSFModel",anchor:"peft.OSFModel",parameters:[{name:"model",val:""},{name:"config",val:""},{name:"adapter_name",val:""},{name:"low_cpu_mem_usage",val:": bool = False"},{name:"state_dict",val:": dict[str, torch.Tensor] | None = None"}],source:"https://github.com/huggingface/peft/blob/vr_3206/src/peft/tuners/osf/model.py#L14"}}),ue=new f({props:{title:"Utility Functions",local:"utility-functions",headingTag:"h2"}}),ge=new f({props:{title:"Weight Decomposition",local:"peft.tuners.osf.utils.decompose_weight_matrix",headingTag:"h3"}}),Ue=new Be({props:{name:"peft.tuners.osf.utils.decompose_weight_matrix",anchor:"peft.tuners.osf.utils.decompose_weight_matrix",parameters:[{name:"weight",val:": torch.Tensor"},{name:"top_k",val:": int"}],source:"https://github.com/huggingface/peft/blob/vr_3206/src/peft/tuners/osf/utils.py#L42"}}),we=new Be({props:{name:"peft.tuners.osf.utils.reconstruct_weight_matrix",anchor:"peft.tuners.osf.utils.reconstruct_weight_matrix",parameters:[{name:"svd_dict",val:": dict[str, torch.Tensor]"}],source:"https://github.com/huggingface/peft/blob/vr_3206/src/peft/tuners/osf/utils.py#L62"}}),Je=new f({props:{title:"Gradient Projection",local:"peft.tuners.osf.utils.project_gradient_to_orthogonal_space",headingTag:"h3"}}),he=new Be({props:{name:"peft.tuners.osf.utils.project_gradient_to_orthogonal_space",anchor:"peft.tuners.osf.utils.project_gradient_to_orthogonal_space",parameters:[{name:"svd_dict",val:": dict[str, Any]"}],source:"https://github.com/huggingface/peft/blob/vr_3206/src/peft/tuners/osf/utils.py#L84"}}),Te=new Ts({props:{source:"https://github.com/huggingface/peft/blob/main/docs/source/package_reference/osf.md"}}),{c(){g=y("meta"),Ve=a(),ke=y("p"),We=a(),i(j.$$.fragment),Ge=a(),i(C.$$.fragment),Fe=a(),v=y("p"),v.innerHTML=Ht,Se=a(),$=y("p"),$.textContent=qt,Re=a(),Z=y("p"),Z.innerHTML=At,Xe=a(),i(I.$$.fragment),Ne=a(),k=y("p"),k.textContent=Ot,ze=a(),i(_.$$.fragment),Qe=a(),B=y("p"),B.textContent=Lt,Ye=a(),V=y("ul"),V.innerHTML=Dt,xe=a(),W=y("p"),W.textContent=Pt,Ee=a(),i(G.$$.fragment),He=a(),i(F.$$.fragment),qe=a(),i(S.$$.fragment),Ae=a(),i(R.$$.fragment),Oe=a(),X=y("p"),X.textContent=Kt,Le=a(),i(N.$$.fragment),De=a(),i(z.$$.fragment),Pe=a(),Q=y("p"),Q.textContent=es,Ke=a(),i(Y.$$.fragment),et=a(),x=y("p"),x.innerHTML=ts,tt=a(),i(E.$$.fragment),st=a(),i(H.$$.fragment),nt=a(),q=y("p"),q.textContent=ss,at=a(),i(A.$$.fragment),lt=a(),i(O.$$.fragment),it=a(),L=y("p"),L.textContent=ns,rt=a(),D=y("ul"),D.innerHTML=as,ot=a(),P=y("p"),P.textContent=ls,pt=a(),i(K.$$.fragment),mt=a(),i(ee.$$.fragment),ct=a(),te=y("ol"),te.innerHTML=is,yt=a(),i(se.$$.fragment),Mt=a(),ne=y("p"),ne.textContent=rs,dt=a(),i(ae.$$.fragment),ft=a(),i(le.$$.fragment),ut=a(),i(ie.$$.fragment),gt=a(),re=y("p"),re.textContent=os,Ut=a(),i(oe.$$.fragment),wt=a(),i(pe.$$.fragment),Jt=a(),me=y("p"),me.textContent=ps,ht=a(),i(ce.$$.fragment),Tt=a(),i(ye.$$.fragment),bt=a(),U=y("div"),i(Me.$$.fragment),Nt=a(),Ce=y("p"),Ce.textContent=ms,jt=a(),i(de.$$.fragment),Ct=a(),w=y("div"),i(fe.$$.fragment),zt=a(),ve=y("p"),ve.textContent=cs,vt=a(),i(ue.$$.fragment),$t=a(),i(ge.$$.fragment),Zt=a(),J=y("div"),i(Ue.$$.fragment),Qt=a(),$e=y("p"),$e.innerHTML=ys,It=a(),h=y("div"),i(we.$$.fragment),Yt=a(),Ze=y("p"),Ze.textContent=Ms,kt=a(),i(Je.$$.fragment),_t=a(),T=y("div"),i(he.$$.fragment),xt=a(),Ie=y("p"),Ie.innerHTML=ds,Bt=a(),i(Te.$$.fragment),Vt=a(),_e=y("p"),this.h()},l(e){const t=Js("svelte-u9bgzb",document.head);g=M(t,"META",{name:!0,content:!0}),t.forEach(s),Ve=l(e),ke=M(e,"P",{}),be(ke).forEach(s),We=l(e),r(j.$$.fragment,e),Ge=l(e),r(C.$$.fragment,e),Fe=l(e),v=M(e,"P",{"data-svelte-h":!0}),d(v)!=="svelte-1qr3fyd"&&(v.innerHTML=Ht),Se=l(e),$=M(e,"P",{"data-svelte-h":!0}),d($)!=="svelte-1cwsb16"&&($.textContent=qt),Re=l(e),Z=M(e,"P",{"data-svelte-h":!0}),d(Z)!=="svelte-uh1yaa"&&(Z.innerHTML=At),Xe=l(e),r(I.$$.fragment,e),Ne=l(e),k=M(e,"P",{"data-svelte-h":!0}),d(k)!=="svelte-18j28xc"&&(k.textContent=Ot),ze=l(e),r(_.$$.fragment,e),Qe=l(e),B=M(e,"P",{"data-svelte-h":!0}),d(B)!=="svelte-ubj59h"&&(B.textContent=Lt),Ye=l(e),V=M(e,"UL",{"data-svelte-h":!0}),d(V)!=="svelte-um6twe"&&(V.innerHTML=Dt),xe=l(e),W=M(e,"P",{"data-svelte-h":!0}),d(W)!=="svelte-1hog9yq"&&(W.textContent=Pt),Ee=l(e),r(G.$$.fragment,e),He=l(e),r(F.$$.fragment,e),qe=l(e),r(S.$$.fragment,e),Ae=l(e),r(R.$$.fragment,e),Oe=l(e),X=M(e,"P",{"data-svelte-h":!0}),d(X)!=="svelte-pwz5xj"&&(X.textContent=Kt),Le=l(e),r(N.$$.fragment,e),De=l(e),r(z.$$.fragment,e),Pe=l(e),Q=M(e,"P",{"data-svelte-h":!0}),d(Q)!=="svelte-1wx06z0"&&(Q.textContent=es),Ke=l(e),r(Y.$$.fragment,e),et=l(e),x=M(e,"P",{"data-svelte-h":!0}),d(x)!=="svelte-1cb9asi"&&(x.innerHTML=ts),tt=l(e),r(E.$$.fragment,e),st=l(e),r(H.$$.fragment,e),nt=l(e),q=M(e,"P",{"data-svelte-h":!0}),d(q)!=="svelte-1r3eaym"&&(q.textContent=ss),at=l(e),r(A.$$.fragment,e),lt=l(e),r(O.$$.fragment,e),it=l(e),L=M(e,"P",{"data-svelte-h":!0}),d(L)!=="svelte-1grqwqq"&&(L.textContent=ns),rt=l(e),D=M(e,"UL",{"data-svelte-h":!0}),d(D)!=="svelte-l0bpqy"&&(D.innerHTML=as),ot=l(e),P=M(e,"P",{"data-svelte-h":!0}),d(P)!=="svelte-1jg7192"&&(P.textContent=ls),pt=l(e),r(K.$$.fragment,e),mt=l(e),r(ee.$$.fragment,e),ct=l(e),te=M(e,"OL",{"data-svelte-h":!0}),d(te)!=="svelte-1nv49ca"&&(te.innerHTML=is),yt=l(e),r(se.$$.fragment,e),Mt=l(e),ne=M(e,"P",{"data-svelte-h":!0}),d(ne)!=="svelte-1i5cxzh"&&(ne.textContent=rs),dt=l(e),r(ae.$$.fragment,e),ft=l(e),r(le.$$.fragment,e),ut=l(e),r(ie.$$.fragment,e),gt=l(e),re=M(e,"P",{"data-svelte-h":!0}),d(re)!=="svelte-jv3vjq"&&(re.textContent=os),Ut=l(e),r(oe.$$.fragment,e),wt=l(e),r(pe.$$.fragment,e),Jt=l(e),me=M(e,"P",{"data-svelte-h":!0}),d(me)!=="svelte-1upwa29"&&(me.textContent=ps),ht=l(e),r(ce.$$.fragment,e),Tt=l(e),r(ye.$$.fragment,e),bt=l(e),U=M(e,"DIV",{class:!0});var Gt=be(U);r(Me.$$.fragment,Gt),Nt=l(Gt),Ce=M(Gt,"P",{"data-svelte-h":!0}),d(Ce)!=="svelte-6pybv"&&(Ce.textContent=ms),Gt.forEach(s),jt=l(e),r(de.$$.fragment,e),Ct=l(e),w=M(e,"DIV",{class:!0});var Ft=be(w);r(fe.$$.fragment,Ft),zt=l(Ft),ve=M(Ft,"P",{"data-svelte-h":!0}),d(ve)!=="svelte-di4amu"&&(ve.textContent=cs),Ft.forEach(s),vt=l(e),r(ue.$$.fragment,e),$t=l(e),r(ge.$$.fragment,e),Zt=l(e),J=M(e,"DIV",{class:!0});var St=be(J);r(Ue.$$.fragment,St),Qt=l(St),$e=M(St,"P",{"data-svelte-h":!0}),d($e)!=="svelte-gg62hi"&&($e.innerHTML=ys),St.forEach(s),It=l(e),h=M(e,"DIV",{class:!0});var Rt=be(h);r(we.$$.fragment,Rt),Yt=l(Rt),Ze=M(Rt,"P",{"data-svelte-h":!0}),d(Ze)!=="svelte-1msv5nl"&&(Ze.textContent=Ms),Rt.forEach(s),kt=l(e),r(Je.$$.fragment,e),_t=l(e),T=M(e,"DIV",{class:!0});var Xt=be(T);r(he.$$.fragment,Xt),xt=l(Xt),Ie=M(Xt,"P",{"data-svelte-h":!0}),d(Ie)!=="svelte-1most7g"&&(Ie.innerHTML=ds),Xt.forEach(s),Bt=l(e),r(Te.$$.fragment,e),Vt=l(e),_e=M(e,"P",{}),be(_e).forEach(s),this.h()},h(){je(g,"name","hf:doc:metadata"),je(g,"content",js),je(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),je(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),je(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),je(h,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),je(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){u(document.head,g),n(e,Ve,t),n(e,ke,t),n(e,We,t),o(j,e,t),n(e,Ge,t),o(C,e,t),n(e,Fe,t),n(e,v,t),n(e,Se,t),n(e,$,t),n(e,Re,t),n(e,Z,t),n(e,Xe,t),o(I,e,t),n(e,Ne,t),n(e,k,t),n(e,ze,t),o(_,e,t),n(e,Qe,t),n(e,B,t),n(e,Ye,t),n(e,V,t),n(e,xe,t),n(e,W,t),n(e,Ee,t),o(G,e,t),n(e,He,t),o(F,e,t),n(e,qe,t),o(S,e,t),n(e,Ae,t),o(R,e,t),n(e,Oe,t),n(e,X,t),n(e,Le,t),o(N,e,t),n(e,De,t),o(z,e,t),n(e,Pe,t),n(e,Q,t),n(e,Ke,t),o(Y,e,t),n(e,et,t),n(e,x,t),n(e,tt,t),o(E,e,t),n(e,st,t),o(H,e,t),n(e,nt,t),n(e,q,t),n(e,at,t),o(A,e,t),n(e,lt,t),o(O,e,t),n(e,it,t),n(e,L,t),n(e,rt,t),n(e,D,t),n(e,ot,t),n(e,P,t),n(e,pt,t),o(K,e,t),n(e,mt,t),o(ee,e,t),n(e,ct,t),n(e,te,t),n(e,yt,t),o(se,e,t),n(e,Mt,t),n(e,ne,t),n(e,dt,t),o(ae,e,t),n(e,ft,t),o(le,e,t),n(e,ut,t),o(ie,e,t),n(e,gt,t),n(e,re,t),n(e,Ut,t),o(oe,e,t),n(e,wt,t),o(pe,e,t),n(e,Jt,t),n(e,me,t),n(e,ht,t),o(ce,e,t),n(e,Tt,t),o(ye,e,t),n(e,bt,t),n(e,U,t),o(Me,U,null),u(U,Nt),u(U,Ce),n(e,jt,t),o(de,e,t),n(e,Ct,t),n(e,w,t),o(fe,w,null),u(w,zt),u(w,ve),n(e,vt,t),o(ue,e,t),n(e,$t,t),o(ge,e,t),n(e,Zt,t),n(e,J,t),o(Ue,J,null),u(J,Qt),u(J,$e),n(e,It,t),n(e,h,t),o(we,h,null),u(h,Yt),u(h,Ze),n(e,kt,t),o(Je,e,t),n(e,_t,t),n(e,T,t),o(he,T,null),u(T,xt),u(T,Ie),n(e,Bt,t),o(Te,e,t),n(e,Vt,t),n(e,_e,t),Wt=!0},p:us,i(e){Wt||(p(j.$$.fragment,e),p(C.$$.fragment,e),p(I.$$.fragment,e),p(_.$$.fragment,e),p(G.$$.fragment,e),p(F.$$.fragment,e),p(S.$$.fragment,e),p(R.$$.fragment,e),p(N.$$.fragment,e),p(z.$$.fragment,e),p(Y.$$.fragment,e),p(E.$$.fragment,e),p(H.$$.fragment,e),p(A.$$.fragment,e),p(O.$$.fragment,e),p(K.$$.fragment,e),p(ee.$$.fragment,e),p(se.$$.fragment,e),p(ae.$$.fragment,e),p(le.$$.fragment,e),p(ie.$$.fragment,e),p(oe.$$.fragment,e),p(pe.$$.fragment,e),p(ce.$$.fragment,e),p(ye.$$.fragment,e),p(Me.$$.fragment,e),p(de.$$.fragment,e),p(fe.$$.fragment,e),p(ue.$$.fragment,e),p(ge.$$.fragment,e),p(Ue.$$.fragment,e),p(we.$$.fragment,e),p(Je.$$.fragment,e),p(he.$$.fragment,e),p(Te.$$.fragment,e),Wt=!0)},o(e){m(j.$$.fragment,e),m(C.$$.fragment,e),m(I.$$.fragment,e),m(_.$$.fragment,e),m(G.$$.fragment,e),m(F.$$.fragment,e),m(S.$$.fragment,e),m(R.$$.fragment,e),m(N.$$.fragment,e),m(z.$$.fragment,e),m(Y.$$.fragment,e),m(E.$$.fragment,e),m(H.$$.fragment,e),m(A.$$.fragment,e),m(O.$$.fragment,e),m(K.$$.fragment,e),m(ee.$$.fragment,e),m(se.$$.fragment,e),m(ae.$$.fragment,e),m(le.$$.fragment,e),m(ie.$$.fragment,e),m(oe.$$.fragment,e),m(pe.$$.fragment,e),m(ce.$$.fragment,e),m(ye.$$.fragment,e),m(Me.$$.fragment,e),m(de.$$.fragment,e),m(fe.$$.fragment,e),m(ue.$$.fragment,e),m(ge.$$.fragment,e),m(Ue.$$.fragment,e),m(we.$$.fragment,e),m(Je.$$.fragment,e),m(he.$$.fragment,e),m(Te.$$.fragment,e),Wt=!1},d(e){e&&(s(Ve),s(ke),s(We),s(Ge),s(Fe),s(v),s(Se),s($),s(Re),s(Z),s(Xe),s(Ne),s(k),s(ze),s(Qe),s(B),s(Ye),s(V),s(xe),s(W),s(Ee),s(He),s(qe),s(Ae),s(Oe),s(X),s(Le),s(De),s(Pe),s(Q),s(Ke),s(et),s(x),s(tt),s(st),s(nt),s(q),s(at),s(lt),s(it),s(L),s(rt),s(D),s(ot),s(P),s(pt),s(mt),s(ct),s(te),s(yt),s(Mt),s(ne),s(dt),s(ft),s(ut),s(gt),s(re),s(Ut),s(wt),s(Jt),s(me),s(ht),s(Tt),s(bt),s(U),s(jt),s(Ct),s(w),s(vt),s($t),s(Zt),s(J),s(It),s(h),s(kt),s(_t),s(T),s(Bt),s(Vt),s(_e)),s(g),c(j,e),c(C,e),c(I,e),c(_,e),c(G,e),c(F,e),c(S,e),c(R,e),c(N,e),c(z,e),c(Y,e),c(E,e),c(H,e),c(A,e),c(O,e),c(K,e),c(ee,e),c(se,e),c(ae,e),c(le,e),c(ie,e),c(oe,e),c(pe,e),c(ce,e),c(ye,e),c(Me),c(de,e),c(fe),c(ue,e),c(ge,e),c(Ue),c(we),c(Je,e),c(he),c(Te,e)}}}const js='{"title":"OSF (Orthogonal Subspace Fine-tuning)","local":"osf-orthogonal-subspace-fine-tuning","sections":[{"title":"How OSF Works","local":"how-osf-works","sections":[],"depth":2},{"title":"Basic Usage","local":"basic-usage","sections":[],"depth":2},{"title":"Configuration Options","local":"configuration-options","sections":[{"title":"Target Modules","local":"target-modules","sections":[],"depth":3},{"title":"Effective Rank Configuration","local":"effective-rank-configuration","sections":[],"depth":3}],"depth":2},{"title":"Training Advice for Continual Learning","local":"training-advice-for-continual-learning","sections":[{"title":"Sequential Task Learning","local":"sequential-task-learning","sections":[],"depth":3},{"title":"Budget Allocation for Task Sequences","local":"budget-allocation-for-task-sequences","sections":[],"depth":3},{"title":"Best Practices","local":"best-practices","sections":[],"depth":3},{"title":"Memory Considerations","local":"memory-considerations","sections":[],"depth":3}],"depth":2},{"title":"Advanced Usage","local":"advanced-usage","sections":[{"title":"Custom Target Modules","local":"custom-target-modules","sections":[],"depth":3},{"title":"Integration with Other Methods","local":"integration-with-other-methods","sections":[],"depth":3}],"depth":2},{"title":"OSFConfig","local":"peft.OSFConfig","sections":[],"depth":2},{"title":"OSFModel","local":"peft.OSFModel","sections":[],"depth":2},{"title":"Utility Functions","local":"utility-functions","sections":[{"title":"Weight Decomposition","local":"peft.tuners.osf.utils.decompose_weight_matrix","sections":[],"depth":3},{"title":"Gradient Projection","local":"peft.tuners.osf.utils.project_gradient_to_orthogonal_space","sections":[],"depth":3}],"depth":2}],"depth":1}';function Cs(Et){return gs(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class _s extends Us{constructor(g){super(),ws(this,g,Cs,bs,fs,{})}}export{_s as component};

Xet Storage Details

Size:
40.5 kB
·
Xet hash:
7cca5f755ccfc4af959fec8e038a1dcc4ebe7f35974cb5c5f3bc486613f2533a

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.