Buckets:

rtrm's picture
download
raw
16.5 kB
import{s as Ht,o as St,n as At}from"../chunks/scheduler.852ec091.js";import{S as Kt,i as Ot,g as i,s,r as p,A as te,h as o,f as e,c as r,j as N,u as b,x,k as F,y as a,a as l,v as c,d as f,t as _,w as h}from"../chunks/index.28275fd3.js";import{D as J}from"../chunks/Docstring.ee6c313e.js";import{C as ee}from"../chunks/CodeBlock.c3366071.js";import{E as ne}from"../chunks/ExampleCodeBlock.00f06ed4.js";import{H as ot,E as ae}from"../chunks/EditOnGithub.582011f0.js";function se(lt){let m,I="Example:",M,v,$;return v=new ee({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5ubiUyMGFzJTIwbm4lMEElMEFpbXBvcnQlMjBiaXRzYW5kYnl0ZXMlMjBhcyUyMGJuYiUwQWZyb20lMjBibmIubm4lMjBpbXBvcnQlMjBMaW5lYXI0Yml0JTBBJTBBZnAxNl9tb2RlbCUyMCUzRCUyMG5uLlNlcXVlbnRpYWwoJTBBJTIwJTIwJTIwJTIwbm4uTGluZWFyKDY0JTJDJTIwNjQpJTJDJTBBJTIwJTIwJTIwJTIwbm4uTGluZWFyKDY0JTJDJTIwNjQpJTBBKSUwQSUwQXF1YW50aXplZF9tb2RlbCUyMCUzRCUyMG5uLlNlcXVlbnRpYWwoJTBBJTIwJTIwJTIwJTIwTGluZWFyNGJpdCg2NCUyQyUyMDY0KSUyQyUwQSUyMCUyMCUyMCUyMExpbmVhcjRiaXQoNjQlMkMlMjA2NCklMEEpJTBBJTBBcXVhbnRpemVkX21vZGVsLmxvYWRfc3RhdGVfZGljdChmcDE2X21vZGVsLnN0YXRlX2RpY3QoKSklMEFxdWFudGl6ZWRfbW9kZWwlMjAlM0QlMjBxdWFudGl6ZWRfbW9kZWwudG8oMCklMjAlMjMlMjBRdWFudGl6YXRpb24lMjBoYXBwZW5zJTIwaGVyZQ==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.nn <span class="hljs-keyword">as</span> nn
<span class="hljs-keyword">import</span> bitsandbytes <span class="hljs-keyword">as</span> bnb
<span class="hljs-keyword">from</span> bnb.nn <span class="hljs-keyword">import</span> Linear4bit
fp16_model = nn.Sequential(
nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>),
nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>)
)
quantized_model = nn.Sequential(
Linear4bit(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>),
Linear4bit(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>)
)
quantized_model.load_state_dict(fp16_model.state_dict())
quantized_model = quantized_model.to(<span class="hljs-number">0</span>) <span class="hljs-comment"># Quantization happens here</span>`,wrap:!1}}),{c(){m=i("p"),m.textContent=I,M=s(),p(v.$$.fragment)},l(d){m=o(d,"P",{"data-svelte-h":!0}),x(m)!=="svelte-11lpom8"&&(m.textContent=I),M=r(d),b(v.$$.fragment,d)},m(d,L){l(d,m,L),l(d,M,L),c(v,d,L),$=!0},p:At,i(d){$||(f(v.$$.fragment,d),$=!0)},o(d){_(v.$$.fragment,d),$=!1},d(d){d&&(e(m),e(M)),h(v,d)}}}function re(lt){let m,I,M,v,$,d,L,kt='<a href="https://hf.co/papers/2305.14314" rel="nofollow">QLoRA</a> is a finetuning method that quantizes a model to 4-bits and adds a set of low-rank adaptation (LoRA) weights to the model and tuning them through the quantized weights. This method also introduces a new data type, 4-bit NormalFloat (<code>LinearNF4</code>) in addition to the standard Float4 data type (<code>LinearFP4</code>). <code>LinearNF4</code> is a quantization data type for normally distributed data and can improve performance.',dt,z,mt,y,B,Lt,Z,Et=`This class is the base module for the 4-bit quantization algorithm presented in <a href="https://arxiv.org/abs/2305.14314" rel="nofollow">QLoRA</a>.
QLoRA 4-bit linear layers uses blockwise k-bit quantization under the hood, with the possibility of selecting various
compute datatypes such as FP4 and NF4.`,Tt,H,Rt=`In order to quantize a linear layer one should first load the original fp16 / bf16 weights into
the Linear4bit module, then call <code>quantized_module.to(&quot;cuda&quot;)</code> to quantize the fp16 / bf16 weights.`,xt,q,Mt,j,k,Nt,S,Wt="Initialize Linear4bit class.",ut,E,pt,T,R,Ft,A,Qt="Implements the FP4 data type.",Ct,K,W,bt,Q,ct,u,D,qt,O,Dt="Implements the NF4 data type.",jt,tt,Ut=`Constructs a quantization data type where each bin has equal area under a standard normal distribution N(0, 1) that
is normalized into the range [-1, 1].`,Pt,et,Gt='For more information read the paper: QLoRA: Efficient Finetuning of Quantized LLMs (<a href="https://arxiv.org/abs/2305.14314" rel="nofollow">https://arxiv.org/abs/2305.14314</a>)',Jt,nt,Xt=`Implementation of the NF4 data type in bitsandbytes can be found in the <code>create_normal_map</code> function in
the <code>functional.py</code> file: <a href="https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L236" rel="nofollow">https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L236</a>.`,It,at,U,ft,G,_t,C,X,zt,P,V,Bt,st,Vt="Initialize self. See help(type(self)) for accurate signature.",ht,Y,yt,it,gt;return $=new ot({props:{title:"4-bit quantization",local:"4-bit-quantization",headingTag:"h1"}}),z=new ot({props:{title:"Linear4bit",local:"bitsandbytes.nn.Linear4bit",headingTag:"h2"}}),B=new J({props:{name:"class bitsandbytes.nn.Linear4bit",anchor:"bitsandbytes.nn.Linear4bit",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_type",val:" = 'fp4'"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1544/bitsandbytes/nn/modules.py#L365"}}),q=new ne({props:{anchor:"bitsandbytes.nn.Linear4bit.example",$$slots:{default:[se]},$$scope:{ctx:lt}}}),k=new J({props:{name:"__init__",anchor:"bitsandbytes.nn.Linear4bit.__init__",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_type",val:" = 'fp4'"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],parametersDescription:[{anchor:"bitsandbytes.nn.Linear4bit.__init__.input_features",description:`<strong>input_features</strong> (<code>str</code>) &#x2014;
Number of input features of the linear layer.`,name:"input_features"},{anchor:"bitsandbytes.nn.Linear4bit.__init__.output_features",description:`<strong>output_features</strong> (<code>str</code>) &#x2014;
Number of output features of the linear layer.`,name:"output_features"},{anchor:"bitsandbytes.nn.Linear4bit.__init__.bias",description:`<strong>bias</strong> (<code>bool</code>, defaults to <code>True</code>) &#x2014;
Whether the linear class uses the bias term as well.`,name:"bias"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1544/bitsandbytes/nn/modules.py#L398"}}),E=new ot({props:{title:"LinearFP4",local:"bitsandbytes.nn.LinearFP4",headingTag:"h2"}}),R=new J({props:{name:"class bitsandbytes.nn.LinearFP4",anchor:"bitsandbytes.nn.LinearFP4",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1544/bitsandbytes/nn/modules.py#L486"}}),W=new J({props:{name:"__init__",anchor:"bitsandbytes.nn.LinearFP4.__init__",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],parametersDescription:[{anchor:"bitsandbytes.nn.LinearFP4.__init__.input_features",description:`<strong>input_features</strong> (<code>str</code>) &#x2014;
Number of input features of the linear layer.`,name:"input_features"},{anchor:"bitsandbytes.nn.LinearFP4.__init__.output_features",description:`<strong>output_features</strong> (<code>str</code>) &#x2014;
Number of output features of the linear layer.`,name:"output_features"},{anchor:"bitsandbytes.nn.LinearFP4.__init__.bias",description:`<strong>bias</strong> (<code>bool</code>, defaults to <code>True</code>) &#x2014;
Whether the linear class uses the bias term as well.`,name:"bias"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1544/bitsandbytes/nn/modules.py#L491"}}),Q=new ot({props:{title:"LinearNF4",local:"bitsandbytes.nn.LinearNF4",headingTag:"h2"}}),D=new J({props:{name:"class bitsandbytes.nn.LinearNF4",anchor:"bitsandbytes.nn.LinearNF4",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1544/bitsandbytes/nn/modules.py#L522"}}),U=new J({props:{name:"__init__",anchor:"bitsandbytes.nn.LinearNF4.__init__",parameters:[{name:"input_features",val:""},{name:"output_features",val:""},{name:"bias",val:" = True"},{name:"compute_dtype",val:" = None"},{name:"compress_statistics",val:" = True"},{name:"quant_storage",val:" = torch.uint8"},{name:"device",val:" = None"}],parametersDescription:[{anchor:"bitsandbytes.nn.LinearNF4.__init__.input_features",description:`<strong>input_features</strong> (<code>str</code>) &#x2014;
Number of input features of the linear layer.`,name:"input_features"},{anchor:"bitsandbytes.nn.LinearNF4.__init__.output_features",description:`<strong>output_features</strong> (<code>str</code>) &#x2014;
Number of output features of the linear layer.`,name:"output_features"},{anchor:"bitsandbytes.nn.LinearNF4.__init__.bias",description:`<strong>bias</strong> (<code>bool</code>, defaults to <code>True</code>) &#x2014;
Whether the linear class uses the bias term as well.`,name:"bias"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1544/bitsandbytes/nn/modules.py#L534"}}),G=new ot({props:{title:"Params4bit",local:"bitsandbytes.nn.Params4bit",headingTag:"h2"}}),X=new J({props:{name:"class bitsandbytes.nn.Params4bit",anchor:"bitsandbytes.nn.Params4bit",parameters:[{name:"data",val:": typing.Optional[torch.Tensor] = None"},{name:"requires_grad",val:" = False"},{name:"quant_state",val:": typing.Optional[bitsandbytes.functional.QuantState] = None"},{name:"blocksize",val:": int = 64"},{name:"compress_statistics",val:": bool = True"},{name:"quant_type",val:": str = 'fp4'"},{name:"quant_storage",val:": dtype = torch.uint8"},{name:"module",val:": typing.Optional[ForwardRef('Linear4bit')] = None"},{name:"bnb_quantized",val:": bool = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1544/bitsandbytes/nn/modules.py#L209"}}),V=new J({props:{name:"__init__",anchor:"bitsandbytes.nn.Params4bit.__init__",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}]}}),Y=new ae({props:{source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/main/docs/source/reference/nn/linear4bit.mdx"}}),{c(){m=i("meta"),I=s(),M=i("p"),v=s(),p($.$$.fragment),d=s(),L=i("p"),L.innerHTML=kt,dt=s(),p(z.$$.fragment),mt=s(),y=i("div"),p(B.$$.fragment),Lt=s(),Z=i("p"),Z.innerHTML=Et,Tt=s(),H=i("p"),H.innerHTML=Rt,xt=s(),p(q.$$.fragment),Mt=s(),j=i("div"),p(k.$$.fragment),Nt=s(),S=i("p"),S.textContent=Wt,ut=s(),p(E.$$.fragment),pt=s(),T=i("div"),p(R.$$.fragment),Ft=s(),A=i("p"),A.textContent=Qt,Ct=s(),K=i("div"),p(W.$$.fragment),bt=s(),p(Q.$$.fragment),ct=s(),u=i("div"),p(D.$$.fragment),qt=s(),O=i("p"),O.textContent=Dt,jt=s(),tt=i("p"),tt.textContent=Ut,Pt=s(),et=i("p"),et.innerHTML=Gt,Jt=s(),nt=i("p"),nt.innerHTML=Xt,It=s(),at=i("div"),p(U.$$.fragment),ft=s(),p(G.$$.fragment),_t=s(),C=i("div"),p(X.$$.fragment),zt=s(),P=i("div"),p(V.$$.fragment),Bt=s(),st=i("p"),st.textContent=Vt,ht=s(),p(Y.$$.fragment),yt=s(),it=i("p"),this.h()},l(t){const n=te("svelte-u9bgzb",document.head);m=o(n,"META",{name:!0,content:!0}),n.forEach(e),I=r(t),M=o(t,"P",{}),N(M).forEach(e),v=r(t),b($.$$.fragment,t),d=r(t),L=o(t,"P",{"data-svelte-h":!0}),x(L)!=="svelte-j7lmsi"&&(L.innerHTML=kt),dt=r(t),b(z.$$.fragment,t),mt=r(t),y=o(t,"DIV",{class:!0});var g=N(y);b(B.$$.fragment,g),Lt=r(g),Z=o(g,"P",{"data-svelte-h":!0}),x(Z)!=="svelte-b56pxf"&&(Z.innerHTML=Et),Tt=r(g),H=o(g,"P",{"data-svelte-h":!0}),x(H)!=="svelte-yrth4q"&&(H.innerHTML=Rt),xt=r(g),b(q.$$.fragment,g),Mt=r(g),j=o(g,"DIV",{class:!0});var vt=N(j);b(k.$$.fragment,vt),Nt=r(vt),S=o(vt,"P",{"data-svelte-h":!0}),x(S)!=="svelte-9bim4q"&&(S.textContent=Wt),vt.forEach(e),g.forEach(e),ut=r(t),b(E.$$.fragment,t),pt=r(t),T=o(t,"DIV",{class:!0});var rt=N(T);b(R.$$.fragment,rt),Ft=r(rt),A=o(rt,"P",{"data-svelte-h":!0}),x(A)!=="svelte-1wlk05v"&&(A.textContent=Qt),Ct=r(rt),K=o(rt,"DIV",{class:!0});var Yt=N(K);b(W.$$.fragment,Yt),Yt.forEach(e),rt.forEach(e),bt=r(t),b(Q.$$.fragment,t),ct=r(t),u=o(t,"DIV",{class:!0});var w=N(u);b(D.$$.fragment,w),qt=r(w),O=o(w,"P",{"data-svelte-h":!0}),x(O)!=="svelte-5d8bnh"&&(O.textContent=Dt),jt=r(w),tt=o(w,"P",{"data-svelte-h":!0}),x(tt)!=="svelte-4zan40"&&(tt.textContent=Ut),Pt=r(w),et=o(w,"P",{"data-svelte-h":!0}),x(et)!=="svelte-1ditbm8"&&(et.innerHTML=Gt),Jt=r(w),nt=o(w,"P",{"data-svelte-h":!0}),x(nt)!=="svelte-weczdq"&&(nt.innerHTML=Xt),It=r(w),at=o(w,"DIV",{class:!0});var Zt=N(at);b(U.$$.fragment,Zt),Zt.forEach(e),w.forEach(e),ft=r(t),b(G.$$.fragment,t),_t=r(t),C=o(t,"DIV",{class:!0});var $t=N(C);b(X.$$.fragment,$t),zt=r($t),P=o($t,"DIV",{class:!0});var wt=N(P);b(V.$$.fragment,wt),Bt=r(wt),st=o(wt,"P",{"data-svelte-h":!0}),x(st)!=="svelte-gef1cn"&&(st.textContent=Vt),wt.forEach(e),$t.forEach(e),ht=r(t),b(Y.$$.fragment,t),yt=r(t),it=o(t,"P",{}),N(it).forEach(e),this.h()},h(){F(m,"name","hf:doc:metadata"),F(m,"content",ie),F(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(K,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(at,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(u,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(t,n){a(document.head,m),l(t,I,n),l(t,M,n),l(t,v,n),c($,t,n),l(t,d,n),l(t,L,n),l(t,dt,n),c(z,t,n),l(t,mt,n),l(t,y,n),c(B,y,null),a(y,Lt),a(y,Z),a(y,Tt),a(y,H),a(y,xt),c(q,y,null),a(y,Mt),a(y,j),c(k,j,null),a(j,Nt),a(j,S),l(t,ut,n),c(E,t,n),l(t,pt,n),l(t,T,n),c(R,T,null),a(T,Ft),a(T,A),a(T,Ct),a(T,K),c(W,K,null),l(t,bt,n),c(Q,t,n),l(t,ct,n),l(t,u,n),c(D,u,null),a(u,qt),a(u,O),a(u,jt),a(u,tt),a(u,Pt),a(u,et),a(u,Jt),a(u,nt),a(u,It),a(u,at),c(U,at,null),l(t,ft,n),c(G,t,n),l(t,_t,n),l(t,C,n),c(X,C,null),a(C,zt),a(C,P),c(V,P,null),a(P,Bt),a(P,st),l(t,ht,n),c(Y,t,n),l(t,yt,n),l(t,it,n),gt=!0},p(t,[n]){const g={};n&2&&(g.$$scope={dirty:n,ctx:t}),q.$set(g)},i(t){gt||(f($.$$.fragment,t),f(z.$$.fragment,t),f(B.$$.fragment,t),f(q.$$.fragment,t),f(k.$$.fragment,t),f(E.$$.fragment,t),f(R.$$.fragment,t),f(W.$$.fragment,t),f(Q.$$.fragment,t),f(D.$$.fragment,t),f(U.$$.fragment,t),f(G.$$.fragment,t),f(X.$$.fragment,t),f(V.$$.fragment,t),f(Y.$$.fragment,t),gt=!0)},o(t){_($.$$.fragment,t),_(z.$$.fragment,t),_(B.$$.fragment,t),_(q.$$.fragment,t),_(k.$$.fragment,t),_(E.$$.fragment,t),_(R.$$.fragment,t),_(W.$$.fragment,t),_(Q.$$.fragment,t),_(D.$$.fragment,t),_(U.$$.fragment,t),_(G.$$.fragment,t),_(X.$$.fragment,t),_(V.$$.fragment,t),_(Y.$$.fragment,t),gt=!1},d(t){t&&(e(I),e(M),e(v),e(d),e(L),e(dt),e(mt),e(y),e(ut),e(pt),e(T),e(bt),e(ct),e(u),e(ft),e(_t),e(C),e(ht),e(yt),e(it)),e(m),h($,t),h(z,t),h(B),h(q),h(k),h(E,t),h(R),h(W),h(Q,t),h(D),h(U),h(G,t),h(X),h(V),h(Y,t)}}}const ie='{"title":"4-bit quantization","local":"4-bit-quantization","sections":[{"title":"Linear4bit","local":"bitsandbytes.nn.Linear4bit","sections":[],"depth":2},{"title":"LinearFP4","local":"bitsandbytes.nn.LinearFP4","sections":[],"depth":2},{"title":"LinearNF4","local":"bitsandbytes.nn.LinearNF4","sections":[],"depth":2},{"title":"Params4bit","local":"bitsandbytes.nn.Params4bit","sections":[],"depth":2}],"depth":1}';function oe(lt){return St(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ce extends Kt{constructor(m){super(),Ot(this,m,oe,re,Ht,{})}}export{ce as component};

Xet Storage Details

Size:
16.5 kB
·
Xet hash:
8e51017b5348d840e5e0d003fc003306970c35b90a13b65bd7ce8508ce9b2534

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.