Buckets:
| import{s as Fe,o as Je,n as Pe}from"../chunks/scheduler.78382b47.js";import{S as qe,i as Ze,e as r,s as a,c as y,h as je,a as l,d as n,b as o,f as pe,g as _,j as m,k as me,l as b,m as s,n as T,t as w,o as $,p as M}from"../chunks/index.6dd35eb6.js";import{C as He,H as be,E as Ie}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.184d38d3.js";import{D as ke}from"../chunks/Docstring.e49bb367.js";import{C as Ne}from"../chunks/CodeBlock.7e3c9fac.js";import{E as Re}from"../chunks/ExampleCodeBlock.dc1b216e.js";function Se(O){let f,x="Example:",u,d,c;return d=new Ne({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcTJTZXFMTSUwQWZyb20lMjBwZWZ0JTIwaW1wb3J0JTIwQmVmdE1vZGVsJTJDJTIwQmVmdENvbmZpZyUwQSUwQWNvbmZpZyUyMCUzRCUyMEJlZnRDb25maWcoJTBBJTIwJTIwJTIwJTIwcGVmdF90eXBlJTNEJTIyQmVmdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMlNFUV8yX1NFUV9MTSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhcmdldF9tb2R1bGVzJTNEJTVCJTIydiUyMiU1RCUyQyUwQSklMEElMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvclNlcTJTZXFMTS5mcm9tX3ByZXRyYWluZWQoJTIydDUtYmFzZSUyMiklMEFiZWZ0X21vZGVsJTIwJTNEJTIwQmVmdE1vZGVsKG1vZGVsJTJDJTIwY29uZmlnJTJDJTIwYWRhcHRlcl9uYW1lJTNEJTIyZGVmYXVsdCUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSeq2SeqLM | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> BeftModel, BeftConfig | |
| <span class="hljs-meta">>>> </span>config = BeftConfig( | |
| <span class="hljs-meta">... </span> peft_type=<span class="hljs-string">"Beft"</span>, | |
| <span class="hljs-meta">... </span> task_type=<span class="hljs-string">"SEQ_2_SEQ_LM"</span>, | |
| <span class="hljs-meta">... </span> target_modules=[<span class="hljs-string">"v"</span>], | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>model = AutoModelForSeq2SeqLM.from_pretrained(<span class="hljs-string">"t5-base"</span>) | |
| <span class="hljs-meta">>>> </span>beft_model = BeftModel(model, config, adapter_name=<span class="hljs-string">"default"</span>)`,wrap:!1}}),{c(){f=r("p"),f.textContent=x,u=a(),y(d.$$.fragment)},l(i){f=l(i,"P",{"data-svelte-h":!0}),m(f)!=="svelte-11lpom8"&&(f.textContent=x),u=o(i),_(d.$$.fragment,i)},m(i,g){s(i,f,g),s(i,u,g),T(d,i,g),c=!0},p:Pe,i(i){c||(w(d.$$.fragment,i),c=!0)},o(i){$(d.$$.fragment,i),c=!1},d(i){i&&(n(f),n(u)),M(d,i)}}}function Ve(O){let f,x,u,d,c,i,g,G,B,ve='<a href="https://arxiv.org/abs/2509.15974" rel="nofollow">BEFT</a> is a parameter efficient fine-tuning algorithm (PEFT) that only fine-tunes the added bias terms of value projections from pretrained transformer models. BEFT demonstrates that fine-tuning the added bias terms of value projections from pretrained transformers generally leads to a higher downstream performance in low-data regimes than fine-tuning the added bias terms of query/key projections.',z,U,ye="BEFT currently has the following tradeoffs:",A,E,_e="Pros:",X,L,Te="<li>BEFT requires far fewer parameters than LoRA, while maintaining competitive or superior performance across tasks in low-data regimes.</li>",Y,k,we="Cons:",K,F,$e="<li>In high-data regimes, BEFT may show limited effectiveness compared to LoRA and full-parameters fine-tuning.</li>",ee,J,Me="If your use case belongs to the high-data regime, consider other PEFT methods such as LoRA.",te,P,Ce="The abstract from the paper is:",ne,q,xe="<em>Fine-tuning the bias terms of large language models (LLMs) has the potential to achieve unprecedented parameter efficiency while maintaining competitive performance, particularly in low-data regimes. However, the link between fine-tuning different bias terms (i.e., <strong>b</strong><sub>q</sub>, <strong>b</strong><sub>k</sub>, and <strong>b</strong><sub>v</sub> in the query, key, or value projections) and downstream performance remains largely unclear to date. In this paper, we investigate the link between fine-tuning <strong>b</strong><sub>q</sub>, <strong>b</strong><sub>k</sub>, and <strong>b</strong><sub>v</sub> with the performance of the downstream task. Our key finding is that directly fine-tuning <strong>b</strong><sub>v</sub> generally leads to higher downstream performance in low-data regimes, in comparison to <strong>b</strong><sub>q</sub> and <strong>b</strong><sub>k</sub>. We extensively evaluate this unique property across a wide range of LLMs spanning encoder-only and decoder-only architectures up to 6.7B parameters (including bias-free LLMs). Our results provide strong evidence for the effectiveness of directly fine-tuning <strong>b</strong><sub>v</sub> across various downstream tasks</em>.",se,Z,ae,v,j,de,S,Be='This is the configuration class to store the configuration of a <a href="/docs/peft/pr_3205/en/package_reference/beft#peft.BeftModel">BeftModel</a>.',oe,H,ie,p,I,ce,V,Ue=`Creates a Infused Adapter by only fine-tuning the added bias terms of value projections from a pretrained | |
| transformers model in low-training-data regimes (BEFT). The method is described in detail in | |
| <a href="https://arxiv.org/abs/2509.15974" rel="nofollow">https://arxiv.org/abs/2509.15974</a>`,ge,C,ue,Q,Ee="<strong>Attributes</strong>:",he,W,Le='<li><strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — The model to be adapted.</li> <li><strong>peft_config</strong> (<a href="/docs/peft/pr_3205/en/package_reference/beft#peft.BeftConfig">BeftConfig</a>): The configuration of the (BEFT) model.</li>',re,N,le,D,fe;return c=new He({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),g=new be({props:{title:"BEFT: Bias-Efficient Fine-Tuning of Language Models in Low-Data Regimes",local:"beft-bias-efficient-fine-tuning-of-language-models-in-low-data-regimes",headingTag:"h1"}}),Z=new be({props:{title:"BeftConfig",local:"peft.BeftConfig",headingTag:"h2"}}),j=new ke({props:{name:"class peft.BeftConfig",anchor:"peft.BeftConfig",parameters:[{name:"task_type",val:": Optional[Union[str, TaskType]] = None"},{name:"peft_type",val:": Optional[Union[str, PeftType]] = None"},{name:"auto_mapping",val:": Optional[dict] = None"},{name:"peft_version",val:": Optional[str] = None"},{name:"base_model_name_or_path",val:": Optional[str] = None"},{name:"revision",val:": Optional[str] = None"},{name:"inference_mode",val:": bool = False"},{name:"target_modules",val:": Optional[Union[list[str], str]] = None"},{name:"modules_to_save",val:": Optional[list[str]] = None"},{name:"init_weights",val:": bool = True"}],parametersDescription:[{anchor:"peft.BeftConfig.target_modules",description:`<strong>target_modules</strong> (<code>Optional[Union[List[str], str]]</code>) — | |
| The names of the modules to apply the adapter to. If this is specified, only the modules with the specified | |
| names will be replaced. When passing a string, a regex match will be performed. When passing a list of | |
| strings, either an exact match will be performed or it is checked if the name of the module ends with any | |
| of the passed strings. If this is not specified, modules will be chosen according to the model | |
| architecture. If the architecture is not known, an error will be raised — in this case, you should specify | |
| the target modules manually.`,name:"target_modules"},{anchor:"peft.BeftConfig.modules_to_save",description:`<strong>modules_to_save</strong> (<code>Optional[List[str]]</code>) — | |
| List of modules apart from BEFT layers to be set as trainable and saved in the final checkpoint.`,name:"modules_to_save"},{anchor:"peft.BeftConfig.init_weights",description:`<strong>init_weights</strong> (<code>bool</code>) — | |
| Whether to initialize the vectors in the BEFT layers, defaults to <code>True</code>. Setting this to <code>False</code> is | |
| discouraged.`,name:"init_weights"}],source:"https://github.com/huggingface/peft/blob/vr_3205/src/peft/tuners/beft/config.py#L25"}}),H=new be({props:{title:"BeftModel",local:"peft.BeftModel",headingTag:"h2"}}),I=new ke({props:{name:"class peft.BeftModel",anchor:"peft.BeftModel",parameters:[{name:"model",val:""},{name:"peft_config",val:": Union[PeftConfig, dict[str, PeftConfig]]"},{name:"adapter_name",val:": str"},{name:"low_cpu_mem_usage",val:": bool = False"},{name:"state_dict",val:": Optional[dict[str, torch.Tensor]] = None"}],parametersDescription:[{anchor:"peft.BeftModel.model",description:'<strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — The model to be adapted.',name:"model"},{anchor:"peft.BeftModel.config",description:'<strong>config</strong> (<a href="/docs/peft/pr_3205/en/package_reference/beft#peft.BeftConfig">BeftConfig</a>) — The configuration of the (BEFT) model.',name:"config"},{anchor:"peft.BeftModel.adapter_name",description:"<strong>adapter_name</strong> (<code>str</code>) — The name of the adapter, defaults to <code>"default"</code>.",name:"adapter_name"},{anchor:"peft.BeftModel.low_cpu_mem_usage",description:`<strong>low_cpu_mem_usage</strong> (<code>bool</code>, <code>optional</code>, defaults to <code>False</code>) — | |
| Create empty adapter weights on meta device. Useful to speed up the loading process.`,name:"low_cpu_mem_usage"}],source:"https://github.com/huggingface/peft/blob/vr_3205/src/peft/tuners/beft/model.py#L26",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The (BEFT) model.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>torch.nn.Module</code></p> | |
| `}}),C=new Re({props:{anchor:"peft.BeftModel.example",$$slots:{default:[Se]},$$scope:{ctx:O}}}),N=new Ie({props:{source:"https://github.com/huggingface/peft/blob/main/docs/source/package_reference/beft.md"}}),{c(){f=r("meta"),x=a(),u=r("p"),d=a(),y(c.$$.fragment),i=a(),y(g.$$.fragment),G=a(),B=r("p"),B.innerHTML=ve,z=a(),U=r("p"),U.textContent=ye,A=a(),E=r("p"),E.textContent=_e,X=a(),L=r("ul"),L.innerHTML=Te,Y=a(),k=r("p"),k.textContent=we,K=a(),F=r("ul"),F.innerHTML=$e,ee=a(),J=r("p"),J.textContent=Me,te=a(),P=r("p"),P.textContent=Ce,ne=a(),q=r("p"),q.innerHTML=xe,se=a(),y(Z.$$.fragment),ae=a(),v=r("div"),y(j.$$.fragment),de=a(),S=r("p"),S.innerHTML=Be,oe=a(),y(H.$$.fragment),ie=a(),p=r("div"),y(I.$$.fragment),ce=a(),V=r("p"),V.innerHTML=Ue,ge=a(),y(C.$$.fragment),ue=a(),Q=r("p"),Q.innerHTML=Ee,he=a(),W=r("ul"),W.innerHTML=Le,re=a(),y(N.$$.fragment),le=a(),D=r("p"),this.h()},l(e){const t=je("svelte-u9bgzb",document.head);f=l(t,"META",{name:!0,content:!0}),t.forEach(n),x=o(e),u=l(e,"P",{}),pe(u).forEach(n),d=o(e),_(c.$$.fragment,e),i=o(e),_(g.$$.fragment,e),G=o(e),B=l(e,"P",{"data-svelte-h":!0}),m(B)!=="svelte-1wxvl8g"&&(B.innerHTML=ve),z=o(e),U=l(e,"P",{"data-svelte-h":!0}),m(U)!=="svelte-1cwa87l"&&(U.textContent=ye),A=o(e),E=l(e,"P",{"data-svelte-h":!0}),m(E)!=="svelte-nbbe2"&&(E.textContent=_e),X=o(e),L=l(e,"UL",{"data-svelte-h":!0}),m(L)!=="svelte-tbuek5"&&(L.innerHTML=Te),Y=o(e),k=l(e,"P",{"data-svelte-h":!0}),m(k)!=="svelte-domxx"&&(k.textContent=we),K=o(e),F=l(e,"UL",{"data-svelte-h":!0}),m(F)!=="svelte-1yqqe77"&&(F.innerHTML=$e),ee=o(e),J=l(e,"P",{"data-svelte-h":!0}),m(J)!=="svelte-11wyhay"&&(J.textContent=Me),te=o(e),P=l(e,"P",{"data-svelte-h":!0}),m(P)!=="svelte-1cwsb16"&&(P.textContent=Ce),ne=o(e),q=l(e,"P",{"data-svelte-h":!0}),m(q)!=="svelte-skiy9o"&&(q.innerHTML=xe),se=o(e),_(Z.$$.fragment,e),ae=o(e),v=l(e,"DIV",{class:!0});var R=pe(v);_(j.$$.fragment,R),de=o(R),S=l(R,"P",{"data-svelte-h":!0}),m(S)!=="svelte-mu2mlm"&&(S.innerHTML=Be),R.forEach(n),oe=o(e),_(H.$$.fragment,e),ie=o(e),p=l(e,"DIV",{class:!0});var h=pe(p);_(I.$$.fragment,h),ce=o(h),V=l(h,"P",{"data-svelte-h":!0}),m(V)!=="svelte-102xgix"&&(V.innerHTML=Ue),ge=o(h),_(C.$$.fragment,h),ue=o(h),Q=l(h,"P",{"data-svelte-h":!0}),m(Q)!=="svelte-1xx6nm4"&&(Q.innerHTML=Ee),he=o(h),W=l(h,"UL",{"data-svelte-h":!0}),m(W)!=="svelte-s7cd7"&&(W.innerHTML=Le),h.forEach(n),re=o(e),_(N.$$.fragment,e),le=o(e),D=l(e,"P",{}),pe(D).forEach(n),this.h()},h(){me(f,"name","hf:doc:metadata"),me(f,"content",Qe),me(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),me(p,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){b(document.head,f),s(e,x,t),s(e,u,t),s(e,d,t),T(c,e,t),s(e,i,t),T(g,e,t),s(e,G,t),s(e,B,t),s(e,z,t),s(e,U,t),s(e,A,t),s(e,E,t),s(e,X,t),s(e,L,t),s(e,Y,t),s(e,k,t),s(e,K,t),s(e,F,t),s(e,ee,t),s(e,J,t),s(e,te,t),s(e,P,t),s(e,ne,t),s(e,q,t),s(e,se,t),T(Z,e,t),s(e,ae,t),s(e,v,t),T(j,v,null),b(v,de),b(v,S),s(e,oe,t),T(H,e,t),s(e,ie,t),s(e,p,t),T(I,p,null),b(p,ce),b(p,V),b(p,ge),T(C,p,null),b(p,ue),b(p,Q),b(p,he),b(p,W),s(e,re,t),T(N,e,t),s(e,le,t),s(e,D,t),fe=!0},p(e,[t]){const R={};t&2&&(R.$$scope={dirty:t,ctx:e}),C.$set(R)},i(e){fe||(w(c.$$.fragment,e),w(g.$$.fragment,e),w(Z.$$.fragment,e),w(j.$$.fragment,e),w(H.$$.fragment,e),w(I.$$.fragment,e),w(C.$$.fragment,e),w(N.$$.fragment,e),fe=!0)},o(e){$(c.$$.fragment,e),$(g.$$.fragment,e),$(Z.$$.fragment,e),$(j.$$.fragment,e),$(H.$$.fragment,e),$(I.$$.fragment,e),$(C.$$.fragment,e),$(N.$$.fragment,e),fe=!1},d(e){e&&(n(x),n(u),n(d),n(i),n(G),n(B),n(z),n(U),n(A),n(E),n(X),n(L),n(Y),n(k),n(K),n(F),n(ee),n(J),n(te),n(P),n(ne),n(q),n(se),n(ae),n(v),n(oe),n(ie),n(p),n(re),n(le),n(D)),n(f),M(c,e),M(g,e),M(Z,e),M(j),M(H,e),M(I),M(C),M(N,e)}}}const Qe='{"title":"BEFT: Bias-Efficient Fine-Tuning of Language Models in Low-Data Regimes","local":"beft-bias-efficient-fine-tuning-of-language-models-in-low-data-regimes","sections":[{"title":"BeftConfig","local":"peft.BeftConfig","sections":[],"depth":2},{"title":"BeftModel","local":"peft.BeftModel","sections":[],"depth":2}],"depth":1}';function We(O){return Je(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ye extends qe{constructor(f){super(),Ze(this,f,We,Ve,Fe,{})}}export{Ye as component}; | |
Xet Storage Details
- Size:
- 14.2 kB
- Xet hash:
- 0338cc3138e1baa4c23737a744fcb198081f35b10f748e11244eac20f458e229
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.