Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / peft /pr_3207 /en /_app /immutable /nodes /37.f43d101d.js

HuggingFaceDocBuilder's picture

HuggingFaceDocBuilder

about 1 month ago

15.6 kB

	import{s as Te,o as ye,n as Ie}from"../chunks/scheduler.78382b47.js";import{S as $e,i as xe,e as d,s as o,c as _,h as Me,a as p,d as a,b as s,f as H,g as w,j as $,k as V,l as m,m as n,n as b,t as v,o as T,p as y}from"../chunks/index.6dd35eb6.js";import{C as Ae,H as me,E as Je}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.d25d6883.js";import{D as ce}from"../chunks/Docstring.a245c00c.js";import{C as Ce}from"../chunks/CodeBlock.147ab5db.js";import{E as Ue}from"../chunks/ExampleCodeBlock.29fc6d51.js";function ke(O){let i,A="Example:",u,c,f;return c=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcTJTZXFMTSUyQyUyMGlhM0NvbmZpZyUwQWZyb20lMjBwZWZ0JTIwaW1wb3J0JTIwSUEzTW9kZWwlMkMlMjBJQTNDb25maWclMEElMEFjb25maWclMjAlM0QlMjBJQTNDb25maWcoJTBBJTIwJTIwJTIwJTIwcGVmdF90eXBlJTNEJTIySUEzJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGFza190eXBlJTNEJTIyU0VRXzJfU0VRX0xNJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGFyZ2V0X21vZHVsZXMlM0QlNUIlMjJrJTIyJTJDJTIwJTIydiUyMiUyQyUyMCUyMncwJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZmVlZGZvcndhcmRfbW9kdWxlcyUzRCU1QiUyMncwJTIyJTVEJTJDJTBBKSUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxMlNlcUxNLmZyb21fcHJldHJhaW5lZCglMjJ0NS1iYXNlJTIyKSUwQWlhM19tb2RlbCUyMCUzRCUyMElBM01vZGVsKGNvbmZpZyUyQyUyMG1vZGVsKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSeq2SeqLM, ia3Config
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> IA3Model, IA3Config

	<span class="hljs-meta">>>> </span>config = IA3Config(
	<span class="hljs-meta">... </span> peft_type=<span class="hljs-string">"IA3"</span>,
	<span class="hljs-meta">... </span> task_type=<span class="hljs-string">"SEQ_2_SEQ_LM"</span>,
	<span class="hljs-meta">... </span> target_modules=[<span class="hljs-string">"k"</span>, <span class="hljs-string">"v"</span>, <span class="hljs-string">"w0"</span>],
	<span class="hljs-meta">... </span> feedforward_modules=[<span class="hljs-string">"w0"</span>],
	<span class="hljs-meta">... </span>)

	<span class="hljs-meta">>>> </span>model = AutoModelForSeq2SeqLM.from_pretrained(<span class="hljs-string">"t5-base"</span>)
	<span class="hljs-meta">>>> </span>ia3_model = IA3Model(config, model)`,wrap:!1}}),{c(){i=d("p"),i.textContent=A,u=o(),_(c.$$.fragment)},l(r){i=p(r,"P",{"data-svelte-h":!0}),$(i)!=="svelte-11lpom8"&&(i.textContent=A),u=s(r),w(c.$$.fragment,r)},m(r,h){n(r,i,h),n(r,u,h),b(c,r,h),f=!0},p:Ie,i(r){f\|\|(v(c.$$.fragment,r),f=!0)},o(r){T(c.$$.fragment,r),f=!1},d(r){r&&(a(i),a(u)),y(c,r)}}}function Le(O){let i,A,u,c,f,r,h,Q,J,fe='Infused Adapter by Inhibiting and Amplifying Inner Activations, or <a href="https://hf.co/papers/2205.05638" rel="nofollow">IA3</a>, is a method that adds three learned vectors to rescale the keys and values of the self-attention and encoder-decoder attention layers, and the intermediate activation of the position-wise feed-forward network.',G,C,ge="The abstract from the paper is:",z,U,he="<em>Few-shot in-context learning (ICL) enables pre-trained language models to perform a previously-unseen task without any gradient-based training by feeding a small number of training examples as part of the input. ICL incurs substantial computational, memory, and storage costs because it involves processing all of the training examples every time a prediction is made. Parameter-efficient fine-tuning (PEFT) (e.g. adapter modules, prompt tuning, sparse update methods, etc.) offers an alternative paradigm where a small set of parameters are trained to enable a model to perform the new task. In this paper, we rigorously compare few-shot ICL and PEFT and demonstrate that the latter offers better accuracy as well as dramatically lower computational costs. Along the way, we introduce a new PEFT method called (IA)^3 that scales activations by learned vectors, attaining stronger performance while only introducing a relatively tiny amount of new parameters. We also propose a simple recipe based on the T0 model called T-Few that can be applied to new tasks without task-specific tuning or modifications. We validate the effectiveness of T-Few on completely unseen tasks by applying it to the RAFT benchmark, attaining super-human performance for the first time and outperforming the state-of-the-art by 6% absolute. All of the code used in our experiments is publicly available</em>.",R,k,X,I,L,oe,q,ue='This is the configuration class to store the configuration of a <a href="/docs/peft/pr_3207/en/package_reference/ia3#peft.IA3Model">IA3Model</a>.',K,E,Y,l,j,se,W,_e=`Creates a Infused Adapter by Inhibiting and Amplifying Inner Activations ((IA)^3) model from a pretrained
	transformers model. The method is described in detail in <a href="https://huggingface.co/papers/2205.05638" rel="nofollow">https://huggingface.co/papers/2205.05638</a>`,re,x,ie,B,we="<strong>Attributes</strong>:",le,F,be='<li><strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — The model to be adapted.</li> <li><strong>peft_config</strong> (<code>ia3Config</code>): The configuration of the (IA)^3 model.</li>',de,M,N,pe,S,ve="This method adds a new adapter by merging the given adapters with the given weights.",ee,P,te,D,ae;return f=new Ae({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),h=new me({props:{title:"IA3",local:"ia3",headingTag:"h1"}}),k=new me({props:{title:"IA3Config",local:"peft.IA3Config",headingTag:"h2"}}),L=new ce({props:{name:"class peft.IA3Config",anchor:"peft.IA3Config",parameters:[{name:"task_type",val:": Optional[Union[str, TaskType]] = None"},{name:"peft_type",val:": Optional[Union[str, PeftType]] = None"},{name:"auto_mapping",val:": Optional[dict] = None"},{name:"peft_version",val:": Optional[str] = None"},{name:"base_model_name_or_path",val:": Optional[str] = None"},{name:"revision",val:": Optional[str] = None"},{name:"inference_mode",val:": bool = False"},{name:"target_modules",val:": Optional[Union[list[str], str]] = None"},{name:"exclude_modules",val:": Optional[Union[list[str], str]] = None"},{name:"feedforward_modules",val:": Optional[Union[list[str], str]] = None"},{name:"fan_in_fan_out",val:": bool = False"},{name:"modules_to_save",val:": Optional[list[str]] = None"},{name:"init_ia3_weights",val:": bool = True"}],parametersDescription:[{anchor:"peft.IA3Config.target_modules",description:`<strong>target_modules</strong> (<code>Optional[Union[List[str], str]]</code>) —
	The names of the modules to apply the adapter to. If this is specified, only the modules with the specified
	names will be replaced. When passing a string, a regex match will be performed. When passing a list of
	strings, either an exact match will be performed or it is checked if the name of the module ends with any
	of the passed strings. If this is specified as ‘all-linear’, then all linear/Conv1D modules are chosen,
	excluding the output layer. If this is not specified, modules will be chosen according to the model
	architecture. If the architecture is not known, an error will be raised — in this case, you should specify
	the target modules manually.`,name:"target_modules"},{anchor:"peft.IA3Config.exclude_modules",description:`<strong>exclude_modules</strong> (<code>Optional[Union[List[str], str]]</code>) —
	The names of the modules to not apply the adapter. When passing a string, a regex match will be performed.
	When passing a list of strings, either an exact match will be performed or it is checked if the name of the
	module ends with any of the passed strings.`,name:"exclude_modules"},{anchor:"peft.IA3Config.feedforward_modules",description:`<strong>feedforward_modules</strong> (<code>Optional[Union[List[str], str]]</code>) —
	The names of the modules to be treated as feedforward modules, as in the original paper. These modules will
	have (IA)³ vectors multiplied to the input, instead of the output. <code>feedforward_modules</code> must be a name or
	a subset of names present in <code>target_modules</code>.`,name:"feedforward_modules"},{anchor:"peft.IA3Config.fan_in_fan_out",description:`<strong>fan_in_fan_out</strong> (<code>bool</code>) —
	Set this to True if the layer to replace stores weight like (fan_in, fan_out). For example, gpt-2 uses
	<code>Conv1D</code> which stores weights like (fan_in, fan_out) and hence this should be set to <code>True</code>.`,name:"fan_in_fan_out"},{anchor:"peft.IA3Config.modules_to_save",description:`<strong>modules_to_save</strong> (<code>Optional[List[str]]</code>) —
	List of modules apart from (IA)³ layers to be set as trainable and saved in the final checkpoint.`,name:"modules_to_save"},{anchor:"peft.IA3Config.init_ia3_weights",description:`<strong>init_ia3_weights</strong> (<code>bool</code>) —
	Whether to initialize the vectors in the (IA)³ layers, defaults to <code>True</code>. Setting this to <code>False</code> is
	discouraged.`,name:"init_ia3_weights"}],source:"https://github.com/huggingface/peft/blob/vr_3207/src/peft/tuners/ia3/config.py#L25"}}),E=new me({props:{title:"IA3Model",local:"peft.IA3Model",headingTag:"h2"}}),j=new ce({props:{name:"class peft.IA3Model",anchor:"peft.IA3Model",parameters:[{name:"model",val:""},{name:"peft_config",val:": Union[PeftConfig, dict[str, PeftConfig]]"},{name:"adapter_name",val:": str"},{name:"low_cpu_mem_usage",val:": bool = False"},{name:"state_dict",val:": Optional[dict[str, torch.Tensor]] = None"}],parametersDescription:[{anchor:"peft.IA3Model.model",description:'<strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — The model to be adapted.',name:"model"},{anchor:"peft.IA3Model.config",description:'<strong>config</strong> (<a href="/docs/peft/pr_3207/en/package_reference/ia3#peft.IA3Config">IA3Config</a>) — The configuration of the (IA)^3 model.',name:"config"},{anchor:"peft.IA3Model.adapter_name",description:"<strong>adapter_name</strong> (<code>str</code>) — The name of the adapter, defaults to <code>"default"</code>.",name:"adapter_name"},{anchor:"peft.IA3Model.low_cpu_mem_usage",description:`<strong>low_cpu_mem_usage</strong> (<code>bool</code>, <code>optional</code>, defaults to <code>False</code>) —
	Create empty adapter weights on meta device. Useful to speed up the loading process.`,name:"low_cpu_mem_usage"}],source:"https://github.com/huggingface/peft/blob/vr_3207/src/peft/tuners/ia3/model.py#L36",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>The (IA)^3 model.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>torch.nn.Module</code></p>
	`}}),x=new Ue({props:{anchor:"peft.IA3Model.example",$$slots:{default:[ke]},$$scope:{ctx:O}}}),N=new ce({props:{name:"add_weighted_adapter",anchor:"peft.IA3Model.add_weighted_adapter",parameters:[{name:"adapters",val:": list[str]"},{name:"weights",val:": list[float]"},{name:"adapter_name",val:": str"}],parametersDescription:[{anchor:"peft.IA3Model.add_weighted_adapter.adapters",description:`<strong>adapters</strong> (<code>list</code>) —
	List of adapter names to be merged.`,name:"adapters"},{anchor:"peft.IA3Model.add_weighted_adapter.weights",description:`<strong>weights</strong> (<code>list</code>) —
	List of weights for each adapter.`,name:"weights"},{anchor:"peft.IA3Model.add_weighted_adapter.adapter_name",description:`<strong>adapter_name</strong> (<code>str</code>) —
	Name of the new adapter.`,name:"adapter_name"}],source:"https://github.com/huggingface/peft/blob/vr_3207/src/peft/tuners/ia3/model.py#L273"}}),P=new Je({props:{source:"https://github.com/huggingface/peft/blob/main/docs/source/package_reference/ia3.md"}}),{c(){i=d("meta"),A=o(),u=d("p"),c=o(),_(f.$$.fragment),r=o(),_(h.$$.fragment),Q=o(),J=d("p"),J.innerHTML=fe,G=o(),C=d("p"),C.textContent=ge,z=o(),U=d("p"),U.innerHTML=he,R=o(),_(k.$$.fragment),X=o(),I=d("div"),_(L.$$.fragment),oe=o(),q=d("p"),q.innerHTML=ue,K=o(),_(E.$$.fragment),Y=o(),l=d("div"),_(j.$$.fragment),se=o(),W=d("p"),W.innerHTML=_e,re=o(),_(x.$$.fragment),ie=o(),B=d("p"),B.innerHTML=we,le=o(),F=d("ul"),F.innerHTML=be,de=o(),M=d("div"),_(N.$$.fragment),pe=o(),S=d("p"),S.textContent=ve,ee=o(),_(P.$$.fragment),te=o(),D=d("p"),this.h()},l(e){const t=Me("svelte-u9bgzb",document.head);i=p(t,"META",{name:!0,content:!0}),t.forEach(a),A=s(e),u=p(e,"P",{}),H(u).forEach(a),c=s(e),w(f.$$.fragment,e),r=s(e),w(h.$$.fragment,e),Q=s(e),J=p(e,"P",{"data-svelte-h":!0}),$(J)!=="svelte-1bb1wl3"&&(J.innerHTML=fe),G=s(e),C=p(e,"P",{"data-svelte-h":!0}),$(C)!=="svelte-1cwsb16"&&(C.textContent=ge),z=s(e),U=p(e,"P",{"data-svelte-h":!0}),$(U)!=="svelte-6ze8zr"&&(U.innerHTML=he),R=s(e),w(k.$$.fragment,e),X=s(e),I=p(e,"DIV",{class:!0});var Z=H(I);w(L.$$.fragment,Z),oe=s(Z),q=p(Z,"P",{"data-svelte-h":!0}),$(q)!=="svelte-1kdabp6"&&(q.innerHTML=ue),Z.forEach(a),K=s(e),w(E.$$.fragment,e),Y=s(e),l=p(e,"DIV",{class:!0});var g=H(l);w(j.$$.fragment,g),se=s(g),W=p(g,"P",{"data-svelte-h":!0}),$(W)!=="svelte-87vt46"&&(W.innerHTML=_e),re=s(g),w(x.$$.fragment,g),ie=s(g),B=p(g,"P",{"data-svelte-h":!0}),$(B)!=="svelte-1xx6nm4"&&(B.innerHTML=we),le=s(g),F=p(g,"UL",{"data-svelte-h":!0}),$(F)!=="svelte-1cvf3ph"&&(F.innerHTML=be),de=s(g),M=p(g,"DIV",{class:!0});var ne=H(M);w(N.$$.fragment,ne),pe=s(ne),S=p(ne,"P",{"data-svelte-h":!0}),$(S)!=="svelte-lg9b7q"&&(S.textContent=ve),ne.forEach(a),g.forEach(a),ee=s(e),w(P.$$.fragment,e),te=s(e),D=p(e,"P",{}),H(D).forEach(a),this.h()},h(){V(i,"name","hf:doc:metadata"),V(i,"content",Ee),V(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),V(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),V(l,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){m(document.head,i),n(e,A,t),n(e,u,t),n(e,c,t),b(f,e,t),n(e,r,t),b(h,e,t),n(e,Q,t),n(e,J,t),n(e,G,t),n(e,C,t),n(e,z,t),n(e,U,t),n(e,R,t),b(k,e,t),n(e,X,t),n(e,I,t),b(L,I,null),m(I,oe),m(I,q),n(e,K,t),b(E,e,t),n(e,Y,t),n(e,l,t),b(j,l,null),m(l,se),m(l,W),m(l,re),b(x,l,null),m(l,ie),m(l,B),m(l,le),m(l,F),m(l,de),m(l,M),b(N,M,null),m(M,pe),m(M,S),n(e,ee,t),b(P,e,t),n(e,te,t),n(e,D,t),ae=!0},p(e,[t]){const Z={};t&2&&(Z.$$scope={dirty:t,ctx:e}),x.$set(Z)},i(e){ae\|\|(v(f.$$.fragment,e),v(h.$$.fragment,e),v(k.$$.fragment,e),v(L.$$.fragment,e),v(E.$$.fragment,e),v(j.$$.fragment,e),v(x.$$.fragment,e),v(N.$$.fragment,e),v(P.$$.fragment,e),ae=!0)},o(e){T(f.$$.fragment,e),T(h.$$.fragment,e),T(k.$$.fragment,e),T(L.$$.fragment,e),T(E.$$.fragment,e),T(j.$$.fragment,e),T(x.$$.fragment,e),T(N.$$.fragment,e),T(P.$$.fragment,e),ae=!1},d(e){e&&(a(A),a(u),a(c),a(r),a(Q),a(J),a(G),a(C),a(z),a(U),a(R),a(X),a(I),a(K),a(Y),a(l),a(ee),a(te),a(D)),a(i),y(f,e),y(h,e),y(k,e),y(L),y(E,e),y(j),y(x),y(N),y(P,e)}}}const Ee='{"title":"IA3","local":"ia3","sections":[{"title":"IA3Config","local":"peft.IA3Config","sections":[],"depth":2},{"title":"IA3Model","local":"peft.IA3Model","sections":[],"depth":2}],"depth":1}';function je(O){return ye(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Fe extends $e{constructor(i){super(),xe(this,i,je,Le,Te,{})}}export{Fe as component};

Xet Storage Details

Size:: 15.6 kB
Xet hash:: 8d3f01b0730df3177ddc236e38feee8c4e3ce69cd47190e1f143b7a916f69d50

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.