Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / peft /pr_3205 /en /_app /immutable /nodes /24.5a2dfbe3.js

HuggingFaceDocBuilder's picture

HuggingFaceDocBuilder

about 1 month ago

14.2 kB

	import{s as Fe,o as Je,n as Pe}from"../chunks/scheduler.78382b47.js";import{S as qe,i as Ze,e as r,s as a,c as y,h as je,a as l,d as n,b as o,f as pe,g as _,j as m,k as me,l as b,m as s,n as T,t as w,o as $,p as M}from"../chunks/index.6dd35eb6.js";import{C as He,H as be,E as Ie}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.184d38d3.js";import{D as ke}from"../chunks/Docstring.e49bb367.js";import{C as Ne}from"../chunks/CodeBlock.7e3c9fac.js";import{E as Re}from"../chunks/ExampleCodeBlock.dc1b216e.js";function Se(O){let f,x="Example:",u,d,c;return d=new Ne({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcTJTZXFMTSUwQWZyb20lMjBwZWZ0JTIwaW1wb3J0JTIwQmVmdE1vZGVsJTJDJTIwQmVmdENvbmZpZyUwQSUwQWNvbmZpZyUyMCUzRCUyMEJlZnRDb25maWcoJTBBJTIwJTIwJTIwJTIwcGVmdF90eXBlJTNEJTIyQmVmdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMlNFUV8yX1NFUV9MTSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhcmdldF9tb2R1bGVzJTNEJTVCJTIydiUyMiU1RCUyQyUwQSklMEElMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvclNlcTJTZXFMTS5mcm9tX3ByZXRyYWluZWQoJTIydDUtYmFzZSUyMiklMEFiZWZ0X21vZGVsJTIwJTNEJTIwQmVmdE1vZGVsKG1vZGVsJTJDJTIwY29uZmlnJTJDJTIwYWRhcHRlcl9uYW1lJTNEJTIyZGVmYXVsdCUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSeq2SeqLM
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> BeftModel, BeftConfig

	<span class="hljs-meta">>>> </span>config = BeftConfig(
	<span class="hljs-meta">... </span> peft_type=<span class="hljs-string">"Beft"</span>,
	<span class="hljs-meta">... </span> task_type=<span class="hljs-string">"SEQ_2_SEQ_LM"</span>,
	<span class="hljs-meta">... </span> target_modules=[<span class="hljs-string">"v"</span>],
	<span class="hljs-meta">... </span>)

	<span class="hljs-meta">>>> </span>model = AutoModelForSeq2SeqLM.from_pretrained(<span class="hljs-string">"t5-base"</span>)
	<span class="hljs-meta">>>> </span>beft_model = BeftModel(model, config, adapter_name=<span class="hljs-string">"default"</span>)`,wrap:!1}}),{c(){f=r("p"),f.textContent=x,u=a(),y(d.$$.fragment)},l(i){f=l(i,"P",{"data-svelte-h":!0}),m(f)!=="svelte-11lpom8"&&(f.textContent=x),u=o(i),_(d.$$.fragment,i)},m(i,g){s(i,f,g),s(i,u,g),T(d,i,g),c=!0},p:Pe,i(i){c\|\|(w(d.$$.fragment,i),c=!0)},o(i){$(d.$$.fragment,i),c=!1},d(i){i&&(n(f),n(u)),M(d,i)}}}function Ve(O){let f,x,u,d,c,i,g,G,B,ve='<a href="https://arxiv.org/abs/2509.15974" rel="nofollow">BEFT</a> is a parameter efficient fine-tuning algorithm (PEFT) that only fine-tunes the added bias terms of value projections from pretrained transformer models. BEFT demonstrates that fine-tuning the added bias terms of value projections from pretrained transformers generally leads to a higher downstream performance in low-data regimes than fine-tuning the added bias terms of query/key projections.',z,U,ye="BEFT currently has the following tradeoffs:",A,E,_e="Pros:",X,L,Te="<li>BEFT requires far fewer parameters than LoRA, while maintaining competitive or superior performance across tasks in low-data regimes.</li>",Y,k,we="Cons:",K,F,$e="<li>In high-data regimes, BEFT may show limited effectiveness compared to LoRA and full-parameters fine-tuning.</li>",ee,J,Me="If your use case belongs to the high-data regime, consider other PEFT methods such as LoRA.",te,P,Ce="The abstract from the paper is:",ne,q,xe="<em>Fine-tuning the bias terms of large language models (LLMs) has the potential to achieve unprecedented parameter efficiency while maintaining competitive performance, particularly in low-data regimes. However, the link between fine-tuning different bias terms (i.e., <strong>b</strong><sub>q</sub>, <strong>b</strong><sub>k</sub>, and <strong>b</strong><sub>v</sub> in the query, key, or value projections) and downstream performance remains largely unclear to date. In this paper, we investigate the link between fine-tuning <strong>b</strong><sub>q</sub>, <strong>b</strong><sub>k</sub>, and <strong>b</strong><sub>v</sub> with the performance of the downstream task. Our key finding is that directly fine-tuning <strong>b</strong><sub>v</sub> generally leads to higher downstream performance in low-data regimes, in comparison to <strong>b</strong><sub>q</sub> and <strong>b</strong><sub>k</sub>. We extensively evaluate this unique property across a wide range of LLMs spanning encoder-only and decoder-only architectures up to 6.7B parameters (including bias-free LLMs). Our results provide strong evidence for the effectiveness of directly fine-tuning <strong>b</strong><sub>v</sub> across various downstream tasks</em>.",se,Z,ae,v,j,de,S,Be='This is the configuration class to store the configuration of a <a href="/docs/peft/pr_3205/en/package_reference/beft#peft.BeftModel">BeftModel</a>.',oe,H,ie,p,I,ce,V,Ue=`Creates a Infused Adapter by only fine-tuning the added bias terms of value projections from a pretrained
	transformers model in low-training-data regimes (BEFT). The method is described in detail in
	<a href="https://arxiv.org/abs/2509.15974" rel="nofollow">https://arxiv.org/abs/2509.15974</a>`,ge,C,ue,Q,Ee="<strong>Attributes</strong>:",he,W,Le='<li><strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — The model to be adapted.</li> <li><strong>peft_config</strong> (<a href="/docs/peft/pr_3205/en/package_reference/beft#peft.BeftConfig">BeftConfig</a>): The configuration of the (BEFT) model.</li>',re,N,le,D,fe;return c=new He({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),g=new be({props:{title:"BEFT: Bias-Efficient Fine-Tuning of Language Models in Low-Data Regimes",local:"beft-bias-efficient-fine-tuning-of-language-models-in-low-data-regimes",headingTag:"h1"}}),Z=new be({props:{title:"BeftConfig",local:"peft.BeftConfig",headingTag:"h2"}}),j=new ke({props:{name:"class peft.BeftConfig",anchor:"peft.BeftConfig",parameters:[{name:"task_type",val:": Optional[Union[str, TaskType]] = None"},{name:"peft_type",val:": Optional[Union[str, PeftType]] = None"},{name:"auto_mapping",val:": Optional[dict] = None"},{name:"peft_version",val:": Optional[str] = None"},{name:"base_model_name_or_path",val:": Optional[str] = None"},{name:"revision",val:": Optional[str] = None"},{name:"inference_mode",val:": bool = False"},{name:"target_modules",val:": Optional[Union[list[str], str]] = None"},{name:"modules_to_save",val:": Optional[list[str]] = None"},{name:"init_weights",val:": bool = True"}],parametersDescription:[{anchor:"peft.BeftConfig.target_modules",description:`<strong>target_modules</strong> (<code>Optional[Union[List[str], str]]</code>) —
	The names of the modules to apply the adapter to. If this is specified, only the modules with the specified
	names will be replaced. When passing a string, a regex match will be performed. When passing a list of
	strings, either an exact match will be performed or it is checked if the name of the module ends with any
	of the passed strings. If this is not specified, modules will be chosen according to the model
	architecture. If the architecture is not known, an error will be raised — in this case, you should specify
	the target modules manually.`,name:"target_modules"},{anchor:"peft.BeftConfig.modules_to_save",description:`<strong>modules_to_save</strong> (<code>Optional[List[str]]</code>) —
	List of modules apart from BEFT layers to be set as trainable and saved in the final checkpoint.`,name:"modules_to_save"},{anchor:"peft.BeftConfig.init_weights",description:`<strong>init_weights</strong> (<code>bool</code>) —
	Whether to initialize the vectors in the BEFT layers, defaults to <code>True</code>. Setting this to <code>False</code> is
	discouraged.`,name:"init_weights"}],source:"https://github.com/huggingface/peft/blob/vr_3205/src/peft/tuners/beft/config.py#L25"}}),H=new be({props:{title:"BeftModel",local:"peft.BeftModel",headingTag:"h2"}}),I=new ke({props:{name:"class peft.BeftModel",anchor:"peft.BeftModel",parameters:[{name:"model",val:""},{name:"peft_config",val:": Union[PeftConfig, dict[str, PeftConfig]]"},{name:"adapter_name",val:": str"},{name:"low_cpu_mem_usage",val:": bool = False"},{name:"state_dict",val:": Optional[dict[str, torch.Tensor]] = None"}],parametersDescription:[{anchor:"peft.BeftModel.model",description:'<strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — The model to be adapted.',name:"model"},{anchor:"peft.BeftModel.config",description:'<strong>config</strong> (<a href="/docs/peft/pr_3205/en/package_reference/beft#peft.BeftConfig">BeftConfig</a>) — The configuration of the (BEFT) model.',name:"config"},{anchor:"peft.BeftModel.adapter_name",description:"<strong>adapter_name</strong> (<code>str</code>) — The name of the adapter, defaults to <code>"default"</code>.",name:"adapter_name"},{anchor:"peft.BeftModel.low_cpu_mem_usage",description:`<strong>low_cpu_mem_usage</strong> (<code>bool</code>, <code>optional</code>, defaults to <code>False</code>) —
	Create empty adapter weights on meta device. Useful to speed up the loading process.`,name:"low_cpu_mem_usage"}],source:"https://github.com/huggingface/peft/blob/vr_3205/src/peft/tuners/beft/model.py#L26",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>The (BEFT) model.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>torch.nn.Module</code></p>
	`}}),C=new Re({props:{anchor:"peft.BeftModel.example",$$slots:{default:[Se]},$$scope:{ctx:O}}}),N=new Ie({props:{source:"https://github.com/huggingface/peft/blob/main/docs/source/package_reference/beft.md"}}),{c(){f=r("meta"),x=a(),u=r("p"),d=a(),y(c.$$.fragment),i=a(),y(g.$$.fragment),G=a(),B=r("p"),B.innerHTML=ve,z=a(),U=r("p"),U.textContent=ye,A=a(),E=r("p"),E.textContent=_e,X=a(),L=r("ul"),L.innerHTML=Te,Y=a(),k=r("p"),k.textContent=we,K=a(),F=r("ul"),F.innerHTML=$e,ee=a(),J=r("p"),J.textContent=Me,te=a(),P=r("p"),P.textContent=Ce,ne=a(),q=r("p"),q.innerHTML=xe,se=a(),y(Z.$$.fragment),ae=a(),v=r("div"),y(j.$$.fragment),de=a(),S=r("p"),S.innerHTML=Be,oe=a(),y(H.$$.fragment),ie=a(),p=r("div"),y(I.$$.fragment),ce=a(),V=r("p"),V.innerHTML=Ue,ge=a(),y(C.$$.fragment),ue=a(),Q=r("p"),Q.innerHTML=Ee,he=a(),W=r("ul"),W.innerHTML=Le,re=a(),y(N.$$.fragment),le=a(),D=r("p"),this.h()},l(e){const t=je("svelte-u9bgzb",document.head);f=l(t,"META",{name:!0,content:!0}),t.forEach(n),x=o(e),u=l(e,"P",{}),pe(u).forEach(n),d=o(e),_(c.$$.fragment,e),i=o(e),_(g.$$.fragment,e),G=o(e),B=l(e,"P",{"data-svelte-h":!0}),m(B)!=="svelte-1wxvl8g"&&(B.innerHTML=ve),z=o(e),U=l(e,"P",{"data-svelte-h":!0}),m(U)!=="svelte-1cwa87l"&&(U.textContent=ye),A=o(e),E=l(e,"P",{"data-svelte-h":!0}),m(E)!=="svelte-nbbe2"&&(E.textContent=_e),X=o(e),L=l(e,"UL",{"data-svelte-h":!0}),m(L)!=="svelte-tbuek5"&&(L.innerHTML=Te),Y=o(e),k=l(e,"P",{"data-svelte-h":!0}),m(k)!=="svelte-domxx"&&(k.textContent=we),K=o(e),F=l(e,"UL",{"data-svelte-h":!0}),m(F)!=="svelte-1yqqe77"&&(F.innerHTML=$e),ee=o(e),J=l(e,"P",{"data-svelte-h":!0}),m(J)!=="svelte-11wyhay"&&(J.textContent=Me),te=o(e),P=l(e,"P",{"data-svelte-h":!0}),m(P)!=="svelte-1cwsb16"&&(P.textContent=Ce),ne=o(e),q=l(e,"P",{"data-svelte-h":!0}),m(q)!=="svelte-skiy9o"&&(q.innerHTML=xe),se=o(e),_(Z.$$.fragment,e),ae=o(e),v=l(e,"DIV",{class:!0});var R=pe(v);_(j.$$.fragment,R),de=o(R),S=l(R,"P",{"data-svelte-h":!0}),m(S)!=="svelte-mu2mlm"&&(S.innerHTML=Be),R.forEach(n),oe=o(e),_(H.$$.fragment,e),ie=o(e),p=l(e,"DIV",{class:!0});var h=pe(p);_(I.$$.fragment,h),ce=o(h),V=l(h,"P",{"data-svelte-h":!0}),m(V)!=="svelte-102xgix"&&(V.innerHTML=Ue),ge=o(h),_(C.$$.fragment,h),ue=o(h),Q=l(h,"P",{"data-svelte-h":!0}),m(Q)!=="svelte-1xx6nm4"&&(Q.innerHTML=Ee),he=o(h),W=l(h,"UL",{"data-svelte-h":!0}),m(W)!=="svelte-s7cd7"&&(W.innerHTML=Le),h.forEach(n),re=o(e),_(N.$$.fragment,e),le=o(e),D=l(e,"P",{}),pe(D).forEach(n),this.h()},h(){me(f,"name","hf:doc:metadata"),me(f,"content",Qe),me(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),me(p,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){b(document.head,f),s(e,x,t),s(e,u,t),s(e,d,t),T(c,e,t),s(e,i,t),T(g,e,t),s(e,G,t),s(e,B,t),s(e,z,t),s(e,U,t),s(e,A,t),s(e,E,t),s(e,X,t),s(e,L,t),s(e,Y,t),s(e,k,t),s(e,K,t),s(e,F,t),s(e,ee,t),s(e,J,t),s(e,te,t),s(e,P,t),s(e,ne,t),s(e,q,t),s(e,se,t),T(Z,e,t),s(e,ae,t),s(e,v,t),T(j,v,null),b(v,de),b(v,S),s(e,oe,t),T(H,e,t),s(e,ie,t),s(e,p,t),T(I,p,null),b(p,ce),b(p,V),b(p,ge),T(C,p,null),b(p,ue),b(p,Q),b(p,he),b(p,W),s(e,re,t),T(N,e,t),s(e,le,t),s(e,D,t),fe=!0},p(e,[t]){const R={};t&2&&(R.$$scope={dirty:t,ctx:e}),C.$set(R)},i(e){fe\|\|(w(c.$$.fragment,e),w(g.$$.fragment,e),w(Z.$$.fragment,e),w(j.$$.fragment,e),w(H.$$.fragment,e),w(I.$$.fragment,e),w(C.$$.fragment,e),w(N.$$.fragment,e),fe=!0)},o(e){$(c.$$.fragment,e),$(g.$$.fragment,e),$(Z.$$.fragment,e),$(j.$$.fragment,e),$(H.$$.fragment,e),$(I.$$.fragment,e),$(C.$$.fragment,e),$(N.$$.fragment,e),fe=!1},d(e){e&&(n(x),n(u),n(d),n(i),n(G),n(B),n(z),n(U),n(A),n(E),n(X),n(L),n(Y),n(k),n(K),n(F),n(ee),n(J),n(te),n(P),n(ne),n(q),n(se),n(ae),n(v),n(oe),n(ie),n(p),n(re),n(le),n(D)),n(f),M(c,e),M(g,e),M(Z,e),M(j),M(H,e),M(I),M(C),M(N,e)}}}const Qe='{"title":"BEFT: Bias-Efficient Fine-Tuning of Language Models in Low-Data Regimes","local":"beft-bias-efficient-fine-tuning-of-language-models-in-low-data-regimes","sections":[{"title":"BeftConfig","local":"peft.BeftConfig","sections":[],"depth":2},{"title":"BeftModel","local":"peft.BeftModel","sections":[],"depth":2}],"depth":1}';function We(O){return Je(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ye extends qe{constructor(f){super(),Ze(this,f,We,Ve,Fe,{})}}export{Ye as component};

Xet Storage Details

Size:: 14.2 kB
Xet hash:: 0338cc3138e1baa4c23737a744fcb198081f35b10f748e11244eac20f458e229

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.