Buckets:
| import{s as Te,o as ye,n as Ie}from"../chunks/scheduler.78382b47.js";import{S as $e,i as xe,e as d,s as o,c as _,h as Me,a as p,d as a,b as s,f as H,g as w,j as $,k as V,l as m,m as n,n as b,t as v,o as T,p as y}from"../chunks/index.6dd35eb6.js";import{C as Ae,H as me,E as Je}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.d25d6883.js";import{D as ce}from"../chunks/Docstring.a245c00c.js";import{C as Ce}from"../chunks/CodeBlock.147ab5db.js";import{E as Ue}from"../chunks/ExampleCodeBlock.29fc6d51.js";function ke(O){let i,A="Example:",u,c,f;return c=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcTJTZXFMTSUyQyUyMGlhM0NvbmZpZyUwQWZyb20lMjBwZWZ0JTIwaW1wb3J0JTIwSUEzTW9kZWwlMkMlMjBJQTNDb25maWclMEElMEFjb25maWclMjAlM0QlMjBJQTNDb25maWcoJTBBJTIwJTIwJTIwJTIwcGVmdF90eXBlJTNEJTIySUEzJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGFza190eXBlJTNEJTIyU0VRXzJfU0VRX0xNJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGFyZ2V0X21vZHVsZXMlM0QlNUIlMjJrJTIyJTJDJTIwJTIydiUyMiUyQyUyMCUyMncwJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZmVlZGZvcndhcmRfbW9kdWxlcyUzRCU1QiUyMncwJTIyJTVEJTJDJTBBKSUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxMlNlcUxNLmZyb21fcHJldHJhaW5lZCglMjJ0NS1iYXNlJTIyKSUwQWlhM19tb2RlbCUyMCUzRCUyMElBM01vZGVsKGNvbmZpZyUyQyUyMG1vZGVsKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSeq2SeqLM, ia3Config | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> IA3Model, IA3Config | |
| <span class="hljs-meta">>>> </span>config = IA3Config( | |
| <span class="hljs-meta">... </span> peft_type=<span class="hljs-string">"IA3"</span>, | |
| <span class="hljs-meta">... </span> task_type=<span class="hljs-string">"SEQ_2_SEQ_LM"</span>, | |
| <span class="hljs-meta">... </span> target_modules=[<span class="hljs-string">"k"</span>, <span class="hljs-string">"v"</span>, <span class="hljs-string">"w0"</span>], | |
| <span class="hljs-meta">... </span> feedforward_modules=[<span class="hljs-string">"w0"</span>], | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>model = AutoModelForSeq2SeqLM.from_pretrained(<span class="hljs-string">"t5-base"</span>) | |
| <span class="hljs-meta">>>> </span>ia3_model = IA3Model(config, model)`,wrap:!1}}),{c(){i=d("p"),i.textContent=A,u=o(),_(c.$$.fragment)},l(r){i=p(r,"P",{"data-svelte-h":!0}),$(i)!=="svelte-11lpom8"&&(i.textContent=A),u=s(r),w(c.$$.fragment,r)},m(r,h){n(r,i,h),n(r,u,h),b(c,r,h),f=!0},p:Ie,i(r){f||(v(c.$$.fragment,r),f=!0)},o(r){T(c.$$.fragment,r),f=!1},d(r){r&&(a(i),a(u)),y(c,r)}}}function Le(O){let i,A,u,c,f,r,h,Q,J,fe='Infused Adapter by Inhibiting and Amplifying Inner Activations, or <a href="https://hf.co/papers/2205.05638" rel="nofollow">IA3</a>, is a method that adds three learned vectors to rescale the keys and values of the self-attention and encoder-decoder attention layers, and the intermediate activation of the position-wise feed-forward network.',G,C,ge="The abstract from the paper is:",z,U,he="<em>Few-shot in-context learning (ICL) enables pre-trained language models to perform a previously-unseen task without any gradient-based training by feeding a small number of training examples as part of the input. ICL incurs substantial computational, memory, and storage costs because it involves processing all of the training examples every time a prediction is made. Parameter-efficient fine-tuning (PEFT) (e.g. adapter modules, prompt tuning, sparse update methods, etc.) offers an alternative paradigm where a small set of parameters are trained to enable a model to perform the new task. In this paper, we rigorously compare few-shot ICL and PEFT and demonstrate that the latter offers better accuracy as well as dramatically lower computational costs. Along the way, we introduce a new PEFT method called (IA)^3 that scales activations by learned vectors, attaining stronger performance while only introducing a relatively tiny amount of new parameters. We also propose a simple recipe based on the T0 model called T-Few that can be applied to new tasks without task-specific tuning or modifications. We validate the effectiveness of T-Few on completely unseen tasks by applying it to the RAFT benchmark, attaining super-human performance for the first time and outperforming the state-of-the-art by 6% absolute. All of the code used in our experiments is publicly available</em>.",R,k,X,I,L,oe,q,ue='This is the configuration class to store the configuration of a <a href="/docs/peft/pr_3207/en/package_reference/ia3#peft.IA3Model">IA3Model</a>.',K,E,Y,l,j,se,W,_e=`Creates a Infused Adapter by Inhibiting and Amplifying Inner Activations ((IA)^3) model from a pretrained | |
| transformers model. The method is described in detail in <a href="https://huggingface.co/papers/2205.05638" rel="nofollow">https://huggingface.co/papers/2205.05638</a>`,re,x,ie,B,we="<strong>Attributes</strong>:",le,F,be='<li><strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — The model to be adapted.</li> <li><strong>peft_config</strong> (<code>ia3Config</code>): The configuration of the (IA)^3 model.</li>',de,M,N,pe,S,ve="This method adds a new adapter by merging the given adapters with the given weights.",ee,P,te,D,ae;return f=new Ae({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),h=new me({props:{title:"IA3",local:"ia3",headingTag:"h1"}}),k=new me({props:{title:"IA3Config",local:"peft.IA3Config",headingTag:"h2"}}),L=new ce({props:{name:"class peft.IA3Config",anchor:"peft.IA3Config",parameters:[{name:"task_type",val:": Optional[Union[str, TaskType]] = None"},{name:"peft_type",val:": Optional[Union[str, PeftType]] = None"},{name:"auto_mapping",val:": Optional[dict] = None"},{name:"peft_version",val:": Optional[str] = None"},{name:"base_model_name_or_path",val:": Optional[str] = None"},{name:"revision",val:": Optional[str] = None"},{name:"inference_mode",val:": bool = False"},{name:"target_modules",val:": Optional[Union[list[str], str]] = None"},{name:"exclude_modules",val:": Optional[Union[list[str], str]] = None"},{name:"feedforward_modules",val:": Optional[Union[list[str], str]] = None"},{name:"fan_in_fan_out",val:": bool = False"},{name:"modules_to_save",val:": Optional[list[str]] = None"},{name:"init_ia3_weights",val:": bool = True"}],parametersDescription:[{anchor:"peft.IA3Config.target_modules",description:`<strong>target_modules</strong> (<code>Optional[Union[List[str], str]]</code>) — | |
| The names of the modules to apply the adapter to. If this is specified, only the modules with the specified | |
| names will be replaced. When passing a string, a regex match will be performed. When passing a list of | |
| strings, either an exact match will be performed or it is checked if the name of the module ends with any | |
| of the passed strings. If this is specified as ‘all-linear’, then all linear/Conv1D modules are chosen, | |
| excluding the output layer. If this is not specified, modules will be chosen according to the model | |
| architecture. If the architecture is not known, an error will be raised — in this case, you should specify | |
| the target modules manually.`,name:"target_modules"},{anchor:"peft.IA3Config.exclude_modules",description:`<strong>exclude_modules</strong> (<code>Optional[Union[List[str], str]]</code>) — | |
| The names of the modules to not apply the adapter. When passing a string, a regex match will be performed. | |
| When passing a list of strings, either an exact match will be performed or it is checked if the name of the | |
| module ends with any of the passed strings.`,name:"exclude_modules"},{anchor:"peft.IA3Config.feedforward_modules",description:`<strong>feedforward_modules</strong> (<code>Optional[Union[List[str], str]]</code>) — | |
| The names of the modules to be treated as feedforward modules, as in the original paper. These modules will | |
| have (IA)³ vectors multiplied to the input, instead of the output. <code>feedforward_modules</code> must be a name or | |
| a subset of names present in <code>target_modules</code>.`,name:"feedforward_modules"},{anchor:"peft.IA3Config.fan_in_fan_out",description:`<strong>fan_in_fan_out</strong> (<code>bool</code>) — | |
| Set this to True if the layer to replace stores weight like (fan_in, fan_out). For example, gpt-2 uses | |
| <code>Conv1D</code> which stores weights like (fan_in, fan_out) and hence this should be set to <code>True</code>.`,name:"fan_in_fan_out"},{anchor:"peft.IA3Config.modules_to_save",description:`<strong>modules_to_save</strong> (<code>Optional[List[str]]</code>) — | |
| List of modules apart from (IA)³ layers to be set as trainable and saved in the final checkpoint.`,name:"modules_to_save"},{anchor:"peft.IA3Config.init_ia3_weights",description:`<strong>init_ia3_weights</strong> (<code>bool</code>) — | |
| Whether to initialize the vectors in the (IA)³ layers, defaults to <code>True</code>. Setting this to <code>False</code> is | |
| discouraged.`,name:"init_ia3_weights"}],source:"https://github.com/huggingface/peft/blob/vr_3207/src/peft/tuners/ia3/config.py#L25"}}),E=new me({props:{title:"IA3Model",local:"peft.IA3Model",headingTag:"h2"}}),j=new ce({props:{name:"class peft.IA3Model",anchor:"peft.IA3Model",parameters:[{name:"model",val:""},{name:"peft_config",val:": Union[PeftConfig, dict[str, PeftConfig]]"},{name:"adapter_name",val:": str"},{name:"low_cpu_mem_usage",val:": bool = False"},{name:"state_dict",val:": Optional[dict[str, torch.Tensor]] = None"}],parametersDescription:[{anchor:"peft.IA3Model.model",description:'<strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — The model to be adapted.',name:"model"},{anchor:"peft.IA3Model.config",description:'<strong>config</strong> (<a href="/docs/peft/pr_3207/en/package_reference/ia3#peft.IA3Config">IA3Config</a>) — The configuration of the (IA)^3 model.',name:"config"},{anchor:"peft.IA3Model.adapter_name",description:"<strong>adapter_name</strong> (<code>str</code>) — The name of the adapter, defaults to <code>"default"</code>.",name:"adapter_name"},{anchor:"peft.IA3Model.low_cpu_mem_usage",description:`<strong>low_cpu_mem_usage</strong> (<code>bool</code>, <code>optional</code>, defaults to <code>False</code>) — | |
| Create empty adapter weights on meta device. Useful to speed up the loading process.`,name:"low_cpu_mem_usage"}],source:"https://github.com/huggingface/peft/blob/vr_3207/src/peft/tuners/ia3/model.py#L36",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The (IA)^3 model.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>torch.nn.Module</code></p> | |
| `}}),x=new Ue({props:{anchor:"peft.IA3Model.example",$$slots:{default:[ke]},$$scope:{ctx:O}}}),N=new ce({props:{name:"add_weighted_adapter",anchor:"peft.IA3Model.add_weighted_adapter",parameters:[{name:"adapters",val:": list[str]"},{name:"weights",val:": list[float]"},{name:"adapter_name",val:": str"}],parametersDescription:[{anchor:"peft.IA3Model.add_weighted_adapter.adapters",description:`<strong>adapters</strong> (<code>list</code>) — | |
| List of adapter names to be merged.`,name:"adapters"},{anchor:"peft.IA3Model.add_weighted_adapter.weights",description:`<strong>weights</strong> (<code>list</code>) — | |
| List of weights for each adapter.`,name:"weights"},{anchor:"peft.IA3Model.add_weighted_adapter.adapter_name",description:`<strong>adapter_name</strong> (<code>str</code>) — | |
| Name of the new adapter.`,name:"adapter_name"}],source:"https://github.com/huggingface/peft/blob/vr_3207/src/peft/tuners/ia3/model.py#L273"}}),P=new Je({props:{source:"https://github.com/huggingface/peft/blob/main/docs/source/package_reference/ia3.md"}}),{c(){i=d("meta"),A=o(),u=d("p"),c=o(),_(f.$$.fragment),r=o(),_(h.$$.fragment),Q=o(),J=d("p"),J.innerHTML=fe,G=o(),C=d("p"),C.textContent=ge,z=o(),U=d("p"),U.innerHTML=he,R=o(),_(k.$$.fragment),X=o(),I=d("div"),_(L.$$.fragment),oe=o(),q=d("p"),q.innerHTML=ue,K=o(),_(E.$$.fragment),Y=o(),l=d("div"),_(j.$$.fragment),se=o(),W=d("p"),W.innerHTML=_e,re=o(),_(x.$$.fragment),ie=o(),B=d("p"),B.innerHTML=we,le=o(),F=d("ul"),F.innerHTML=be,de=o(),M=d("div"),_(N.$$.fragment),pe=o(),S=d("p"),S.textContent=ve,ee=o(),_(P.$$.fragment),te=o(),D=d("p"),this.h()},l(e){const t=Me("svelte-u9bgzb",document.head);i=p(t,"META",{name:!0,content:!0}),t.forEach(a),A=s(e),u=p(e,"P",{}),H(u).forEach(a),c=s(e),w(f.$$.fragment,e),r=s(e),w(h.$$.fragment,e),Q=s(e),J=p(e,"P",{"data-svelte-h":!0}),$(J)!=="svelte-1bb1wl3"&&(J.innerHTML=fe),G=s(e),C=p(e,"P",{"data-svelte-h":!0}),$(C)!=="svelte-1cwsb16"&&(C.textContent=ge),z=s(e),U=p(e,"P",{"data-svelte-h":!0}),$(U)!=="svelte-6ze8zr"&&(U.innerHTML=he),R=s(e),w(k.$$.fragment,e),X=s(e),I=p(e,"DIV",{class:!0});var Z=H(I);w(L.$$.fragment,Z),oe=s(Z),q=p(Z,"P",{"data-svelte-h":!0}),$(q)!=="svelte-1kdabp6"&&(q.innerHTML=ue),Z.forEach(a),K=s(e),w(E.$$.fragment,e),Y=s(e),l=p(e,"DIV",{class:!0});var g=H(l);w(j.$$.fragment,g),se=s(g),W=p(g,"P",{"data-svelte-h":!0}),$(W)!=="svelte-87vt46"&&(W.innerHTML=_e),re=s(g),w(x.$$.fragment,g),ie=s(g),B=p(g,"P",{"data-svelte-h":!0}),$(B)!=="svelte-1xx6nm4"&&(B.innerHTML=we),le=s(g),F=p(g,"UL",{"data-svelte-h":!0}),$(F)!=="svelte-1cvf3ph"&&(F.innerHTML=be),de=s(g),M=p(g,"DIV",{class:!0});var ne=H(M);w(N.$$.fragment,ne),pe=s(ne),S=p(ne,"P",{"data-svelte-h":!0}),$(S)!=="svelte-lg9b7q"&&(S.textContent=ve),ne.forEach(a),g.forEach(a),ee=s(e),w(P.$$.fragment,e),te=s(e),D=p(e,"P",{}),H(D).forEach(a),this.h()},h(){V(i,"name","hf:doc:metadata"),V(i,"content",Ee),V(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),V(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),V(l,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){m(document.head,i),n(e,A,t),n(e,u,t),n(e,c,t),b(f,e,t),n(e,r,t),b(h,e,t),n(e,Q,t),n(e,J,t),n(e,G,t),n(e,C,t),n(e,z,t),n(e,U,t),n(e,R,t),b(k,e,t),n(e,X,t),n(e,I,t),b(L,I,null),m(I,oe),m(I,q),n(e,K,t),b(E,e,t),n(e,Y,t),n(e,l,t),b(j,l,null),m(l,se),m(l,W),m(l,re),b(x,l,null),m(l,ie),m(l,B),m(l,le),m(l,F),m(l,de),m(l,M),b(N,M,null),m(M,pe),m(M,S),n(e,ee,t),b(P,e,t),n(e,te,t),n(e,D,t),ae=!0},p(e,[t]){const Z={};t&2&&(Z.$$scope={dirty:t,ctx:e}),x.$set(Z)},i(e){ae||(v(f.$$.fragment,e),v(h.$$.fragment,e),v(k.$$.fragment,e),v(L.$$.fragment,e),v(E.$$.fragment,e),v(j.$$.fragment,e),v(x.$$.fragment,e),v(N.$$.fragment,e),v(P.$$.fragment,e),ae=!0)},o(e){T(f.$$.fragment,e),T(h.$$.fragment,e),T(k.$$.fragment,e),T(L.$$.fragment,e),T(E.$$.fragment,e),T(j.$$.fragment,e),T(x.$$.fragment,e),T(N.$$.fragment,e),T(P.$$.fragment,e),ae=!1},d(e){e&&(a(A),a(u),a(c),a(r),a(Q),a(J),a(G),a(C),a(z),a(U),a(R),a(X),a(I),a(K),a(Y),a(l),a(ee),a(te),a(D)),a(i),y(f,e),y(h,e),y(k,e),y(L),y(E,e),y(j),y(x),y(N),y(P,e)}}}const Ee='{"title":"IA3","local":"ia3","sections":[{"title":"IA3Config","local":"peft.IA3Config","sections":[],"depth":2},{"title":"IA3Model","local":"peft.IA3Model","sections":[],"depth":2}],"depth":1}';function je(O){return ye(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Fe extends $e{constructor(i){super(),xe(this,i,je,Le,Te,{})}}export{Fe as component}; | |
Xet Storage Details
- Size:
- 15.6 kB
- Xet hash:
- 8d3f01b0730df3177ddc236e38feee8c4e3ce69cd47190e1f143b7a916f69d50
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.