Buckets:

rtrm's picture
download
raw
9.7 kB
import{s as ce,o as fe,n as he}from"../chunks/scheduler.852ec091.js";import{S as _e,i as $e,g as d,s as l,r as _,A as ge,h as u,f as n,c as r,j as P,u as $,x as Z,k as F,y as m,a as i,v as g,d as v,t as w,w as y}from"../chunks/index.28275fd3.js";import{D as ee}from"../chunks/Docstring.395987e7.js";import{C as ve}from"../chunks/CodeBlock.c3366071.js";import{E as we}from"../chunks/ExampleCodeBlock.452ced84.js";import{H as oe,E as ye}from"../chunks/EditOnGithub.582011f0.js";function Me(U){let s,x="Example:",h,b,c;return b=new ve({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5ubiUyMGFzJTIwbm4lMEElMEFpbXBvcnQlMjBiaXRzYW5kYnl0ZXMlMjBhcyUyMGJuYiUwQWZyb20lMjBibmIubm4lMjBpbXBvcnQlMjBMaW5lYXI4Yml0THQlMEElMEFmcDE2X21vZGVsJTIwJTNEJTIwbm4uU2VxdWVudGlhbCglMEElMjAlMjAlMjAlMjBubi5MaW5lYXIoNjQlMkMlMjA2NCklMkMlMEElMjAlMjAlMjAlMjBubi5MaW5lYXIoNjQlMkMlMjA2NCklMEEpJTBBJTBBaW50OF9tb2RlbCUyMCUzRCUyMG5uLlNlcXVlbnRpYWwoJTBBJTIwJTIwJTIwJTIwTGluZWFyOGJpdEx0KDY0JTJDJTIwNjQlMkMlMjBoYXNfZnAxNl93ZWlnaHRzJTNERmFsc2UpJTJDJTBBJTIwJTIwJTIwJTIwTGluZWFyOGJpdEx0KDY0JTJDJTIwNjQlMkMlMjBoYXNfZnAxNl93ZWlnaHRzJTNERmFsc2UpJTBBKSUwQSUwQWludDhfbW9kZWwubG9hZF9zdGF0ZV9kaWN0KGZwMTZfbW9kZWwuc3RhdGVfZGljdCgpKSUwQWludDhfbW9kZWwlMjAlM0QlMjBpbnQ4X21vZGVsLnRvKDApJTIwJTIzJTIwUXVhbnRpemF0aW9uJTIwaGFwcGVucyUyMGhlcmU=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.nn <span class="hljs-keyword">as</span> nn
<span class="hljs-keyword">import</span> bitsandbytes <span class="hljs-keyword">as</span> bnb
<span class="hljs-keyword">from</span> bnb.nn <span class="hljs-keyword">import</span> Linear8bitLt
fp16_model = nn.Sequential(
nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>),
nn.Linear(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>)
)
int8_model = nn.Sequential(
Linear8bitLt(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>, has_fp16_weights=<span class="hljs-literal">False</span>),
Linear8bitLt(<span class="hljs-number">64</span>, <span class="hljs-number">64</span>, has_fp16_weights=<span class="hljs-literal">False</span>)
)
int8_model.load_state_dict(fp16_model.state_dict())
int8_model = int8_model.to(<span class="hljs-number">0</span>) <span class="hljs-comment"># Quantization happens here</span>`,wrap:!1}}),{c(){s=d("p"),s.textContent=x,h=l(),_(b.$$.fragment)},l(a){s=u(a,"P",{"data-svelte-h":!0}),Z(s)!=="svelte-11lpom8"&&(s.textContent=x),h=r(a),$(b.$$.fragment,a)},m(a,f){i(a,s,f),i(a,h,f),g(b,a,f),c=!0},p:he,i(a){c||(v(b.$$.fragment,a),c=!0)},o(a){w(b.$$.fragment,a),c=!1},d(a){a&&(n(s),n(h)),y(b,a)}}}function Te(U){let s,x,h,b,c,a,f,me='<a href="https://hf.co/papers/2208.07339" rel="nofollow">LLM.int8()</a> is a quantization method that doesn’t degrade performance which makes large model inference more accessible. The key is to extract the outliers from the inputs and weights and multiply them in 16-bit. All other values are multiplied in 8-bit and quantized to Int8 before being dequantized back to 16-bits. The outputs from the 16-bit and 8-bit multiplication are combined to produce the final output.',A,I,Q,o,J,te,W,pe=`This class is the base module for the <a href="https://arxiv.org/abs/2208.07339" rel="nofollow">LLM.int8()</a> algorithm.
To read more about it, have a look at the paper.`,ne,z,de=`In order to quantize a linear layer one should first load the original fp16 / bf16 weights into
the Linear8bitLt module, then call <code>int8_module.to(&quot;cuda&quot;)</code> to quantize the fp16 weights.`,ae,T,se,L,k,le,D,ue="Initialize Linear8bitLt class.",X,E,V,M,B,re,j,C,ie,G,be="Initialize self. See help(type(self)) for accurate signature.",Y,N,R,q,H;return c=new oe({props:{title:"8-bit quantization",local:"8-bit-quantization",headingTag:"h1"}}),I=new oe({props:{title:"Linear8bitLt",local:"bitsandbytes.nn.Linear8bitLt",headingTag:"h2"}}),J=new ee({props:{name:"class bitsandbytes.nn.Linear8bitLt",anchor:"bitsandbytes.nn.Linear8bitLt",parameters:[{name:"input_features",val:": int"},{name:"output_features",val:": int"},{name:"bias",val:" = True"},{name:"has_fp16_weights",val:" = True"},{name:"memory_efficient_backward",val:" = False"},{name:"threshold",val:" = 0.0"},{name:"index",val:" = None"},{name:"device",val:" = None"}],source:"https://github.com/TimDettmers/bitsandbytes/blob/vr_1255/src/bitsandbytes/nn/modules.py#L663"}}),T=new we({props:{anchor:"bitsandbytes.nn.Linear8bitLt.example",$$slots:{default:[Me]},$$scope:{ctx:U}}}),k=new ee({props:{name:"__init__",anchor:"bitsandbytes.nn.Linear8bitLt.__init__",parameters:[{name:"input_features",val:": int"},{name:"output_features",val:": int"},{name:"bias",val:" = True"},{name:"has_fp16_weights",val:" = True"},{name:"memory_efficient_backward",val:" = False"},{name:"threshold",val:" = 0.0"},{name:"index",val:" = None"},{name:"device",val:" = None"}],parametersDescription:[{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.input_features",description:`<strong>input_features</strong> (<code>int</code>) &#x2014;
Number of input features of the linear layer.`,name:"input_features"},{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.output_features",description:`<strong>output_features</strong> (<code>int</code>) &#x2014;
Number of output features of the linear layer.`,name:"output_features"},{anchor:"bitsandbytes.nn.Linear8bitLt.__init__.bias",description:`<strong>bias</strong> (<code>bool</code>, defaults to <code>True</code>) &#x2014;
Whether the linear class uses the bias term as well.`,name:"bias"}],source:"https://github.com/TimDettmers/bitsandbytes/blob/vr_1255/src/bitsandbytes/nn/modules.py#L695"}}),E=new oe({props:{title:"Int8Params",local:"bitsandbytes.nn.Int8Params",headingTag:"h2"}}),B=new ee({props:{name:"class bitsandbytes.nn.Int8Params",anchor:"bitsandbytes.nn.Int8Params",parameters:[{name:"data",val:" = None"},{name:"requires_grad",val:" = True"},{name:"has_fp16_weights",val:" = False"},{name:"CB",val:" = None"},{name:"SCB",val:" = None"}],source:"https://github.com/TimDettmers/bitsandbytes/blob/vr_1255/src/bitsandbytes/nn/modules.py#L561"}}),C=new ee({props:{name:"__init__",anchor:"bitsandbytes.nn.Int8Params.__init__",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}]}}),N=new ye({props:{source:"https://github.com/TimDettmers/bitsandbytes/blob/main/docs/source/reference/nn/linear8bit.mdx"}}),{c(){s=d("meta"),x=l(),h=d("p"),b=l(),_(c.$$.fragment),a=l(),f=d("p"),f.innerHTML=me,A=l(),_(I.$$.fragment),Q=l(),o=d("div"),_(J.$$.fragment),te=l(),W=d("p"),W.innerHTML=pe,ne=l(),z=d("p"),z.innerHTML=de,ae=l(),_(T.$$.fragment),se=l(),L=d("div"),_(k.$$.fragment),le=l(),D=d("p"),D.textContent=ue,X=l(),_(E.$$.fragment),V=l(),M=d("div"),_(B.$$.fragment),re=l(),j=d("div"),_(C.$$.fragment),ie=l(),G=d("p"),G.textContent=be,Y=l(),_(N.$$.fragment),R=l(),q=d("p"),this.h()},l(e){const t=ge("svelte-u9bgzb",document.head);s=u(t,"META",{name:!0,content:!0}),t.forEach(n),x=r(e),h=u(e,"P",{}),P(h).forEach(n),b=r(e),$(c.$$.fragment,e),a=r(e),f=u(e,"P",{"data-svelte-h":!0}),Z(f)!=="svelte-6ius2o"&&(f.innerHTML=me),A=r(e),$(I.$$.fragment,e),Q=r(e),o=u(e,"DIV",{class:!0});var p=P(o);$(J.$$.fragment,p),te=r(p),W=u(p,"P",{"data-svelte-h":!0}),Z(W)!=="svelte-xo5v9s"&&(W.innerHTML=pe),ne=r(p),z=u(p,"P",{"data-svelte-h":!0}),Z(z)!=="svelte-13qed9e"&&(z.innerHTML=de),ae=r(p),$(T.$$.fragment,p),se=r(p),L=u(p,"DIV",{class:!0});var S=P(L);$(k.$$.fragment,S),le=r(S),D=u(S,"P",{"data-svelte-h":!0}),Z(D)!=="svelte-p49gdy"&&(D.textContent=ue),S.forEach(n),p.forEach(n),X=r(e),$(E.$$.fragment,e),V=r(e),M=u(e,"DIV",{class:!0});var K=P(M);$(B.$$.fragment,K),re=r(K),j=u(K,"DIV",{class:!0});var O=P(j);$(C.$$.fragment,O),ie=r(O),G=u(O,"P",{"data-svelte-h":!0}),Z(G)!=="svelte-gef1cn"&&(G.textContent=be),O.forEach(n),K.forEach(n),Y=r(e),$(N.$$.fragment,e),R=r(e),q=u(e,"P",{}),P(q).forEach(n),this.h()},h(){F(s,"name","hf:doc:metadata"),F(s,"content",Le),F(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(o,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),F(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){m(document.head,s),i(e,x,t),i(e,h,t),i(e,b,t),g(c,e,t),i(e,a,t),i(e,f,t),i(e,A,t),g(I,e,t),i(e,Q,t),i(e,o,t),g(J,o,null),m(o,te),m(o,W),m(o,ne),m(o,z),m(o,ae),g(T,o,null),m(o,se),m(o,L),g(k,L,null),m(L,le),m(L,D),i(e,X,t),g(E,e,t),i(e,V,t),i(e,M,t),g(B,M,null),m(M,re),m(M,j),g(C,j,null),m(j,ie),m(j,G),i(e,Y,t),g(N,e,t),i(e,R,t),i(e,q,t),H=!0},p(e,[t]){const p={};t&2&&(p.$$scope={dirty:t,ctx:e}),T.$set(p)},i(e){H||(v(c.$$.fragment,e),v(I.$$.fragment,e),v(J.$$.fragment,e),v(T.$$.fragment,e),v(k.$$.fragment,e),v(E.$$.fragment,e),v(B.$$.fragment,e),v(C.$$.fragment,e),v(N.$$.fragment,e),H=!0)},o(e){w(c.$$.fragment,e),w(I.$$.fragment,e),w(J.$$.fragment,e),w(T.$$.fragment,e),w(k.$$.fragment,e),w(E.$$.fragment,e),w(B.$$.fragment,e),w(C.$$.fragment,e),w(N.$$.fragment,e),H=!1},d(e){e&&(n(x),n(h),n(b),n(a),n(f),n(A),n(Q),n(o),n(X),n(V),n(M),n(Y),n(R),n(q)),n(s),y(c,e),y(I,e),y(J),y(T),y(k),y(E,e),y(B),y(C),y(N,e)}}}const Le='{"title":"8-bit quantization","local":"8-bit-quantization","sections":[{"title":"Linear8bitLt","local":"bitsandbytes.nn.Linear8bitLt","sections":[],"depth":2},{"title":"Int8Params","local":"bitsandbytes.nn.Int8Params","sections":[],"depth":2}],"depth":1}';function je(U){return fe(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ce extends _e{constructor(s){super(),$e(this,s,je,Te,ce,{})}}export{Ce as component};

Xet Storage Details

Size:
9.7 kB
·
Xet hash:
aa74fbc28b8bb853926f03f0690482ad3716f1e0824f74d94da1fa8592c5223c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.