Buckets:

rtrm's picture
download
raw
6.35 kB
import{s as D,n as O,o as tt}from"../chunks/scheduler.9bc65507.js";import{S as et,i as at,g as i,s as l,r as T,A as st,h as o,f as a,c as n,j as B,u as y,x as v,k as K,y as lt,a as s,v as $,d as g,t as U,w as J}from"../chunks/index.707bf1b6.js";import{C as H}from"../chunks/CodeBlock.54a9f38d.js";import{H as nt,E as pt}from"../chunks/EditOnGithub.922df6ba.js";function it(I){let p,C,Z,_,m,F,r,L='<a href="https://github.com/NetEase-FuXi/EETQ" rel="nofollow">EETQ</a> 라이브러리는 NVIDIA GPU에 대해 int8 채널별(per-channel) 가중치 전용 양자화(weight-only quantization)을 지원합니다. 고성능 GEMM 및 GEMV 커널은 FasterTransformer 및 TensorRT-LLM에서 가져왔습니다. 교정(calibration) 데이터셋이 필요 없으며, 모델을 사전에 양자화할 필요도 없습니다. 또한, 채널별 양자화(per-channel quantization) 덕분에 정확도 저하가 미미합니다.',R,u,z='<a href="https://github.com/NetEase-FuXi/EETQ/releases" rel="nofollow">릴리스 페이지</a>에서 eetq를 설치했는지 확인하세요.',V,c,j,f,S='또는 소스 코드 <a href="https://github.com/NetEase-FuXi/EETQ" rel="nofollow">https://github.com/NetEase-FuXi/EETQ</a> 에서 설치할 수 있습니다. EETQ는 CUDA 기능이 8.9 이하이고 7.0 이상이어야 합니다.',k,h,x,d,P="비양자화 모델은 “from_pretrained”를 통해 양자화할 수 있습니다.",W,M,Q,b,A="양자화된 모델은 “save_pretrained”를 통해 저장할 수 있으며, “from_pretrained”를 통해 다시 사용할 수 있습니다.",q,w,N,E,X,G,Y;return m=new nt({props:{title:"EETQ",local:"eetq",headingTag:"h1"}}),c=new H({props:{code:"cGlwJTIwaW5zdGFsbCUyMC0tbm8tY2FjaGUtZGlyJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGTmV0RWFzZS1GdVhpJTJGRUVUUSUyRnJlbGVhc2VzJTJGZG93bmxvYWQlMkZ2MS4wLjAlMkZFRVRRLTEuMC4wJTJCY3UxMjElMkJ0b3JjaDIuMS4yLWNwMzEwLWNwMzEwLWxpbnV4X3g4Nl82NC53aGw=",highlighted:'pip install --no-cache-dir https:<span class="hljs-regexp">//gi</span>thub.com<span class="hljs-regexp">/NetEase-FuXi/</span>EETQ<span class="hljs-regexp">/releases/</span>download<span class="hljs-regexp">/v1.0.0/</span>EETQ-<span class="hljs-number">1.0</span>.<span class="hljs-number">0</span>+cu121+torch2.<span class="hljs-number">1.2</span>-cp310-cp310-linux_x86_64.whl',wrap:!1}}),h=new H({props:{code:"Z2l0JTIwY2xvbmUlMjBodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZOZXRFYXNlLUZ1WGklMkZFRVRRLmdpdCUwQWNkJTIwRUVUUSUyRiUwQWdpdCUyMHN1Ym1vZHVsZSUyMHVwZGF0ZSUyMC0taW5pdCUyMC0tcmVjdXJzaXZlJTBBcGlwJTIwaW5zdGFsbCUyMC4=",highlighted:`git clone https:<span class="hljs-regexp">//gi</span>thub.com<span class="hljs-regexp">/NetEase-FuXi/</span>EETQ.git
cd EETQ/
git submodule update --init --recursive
pip install .`,wrap:!1}}),M=new H({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwRWV0cUNvbmZpZyUwQXBhdGglMjAlM0QlMjAlMjIlMkZwYXRoJTJGdG8lMkZtb2RlbCUyMi4lMEFxdWFudGl6YXRpb25fY29uZmlnJTIwJTNEJTIwRWV0cUNvbmZpZyglMjJpbnQ4JTIyKSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKHBhdGglMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUyMHF1YW50aXphdGlvbl9jb25maWclM0RxdWFudGl6YXRpb25fY29uZmlnKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, EetqConfig
path = <span class="hljs-string">&quot;/path/to/model&quot;</span>.
quantization_config = EetqConfig(<span class="hljs-string">&quot;int8&quot;</span>)
model = AutoModelForCausalLM.from_pretrained(path, device_map=<span class="hljs-string">&quot;auto&quot;</span>, quantization_config=quantization_config)`,wrap:!1}}),w=new H({props:{code:"cXVhbnRfcGF0aCUyMCUzRCUyMCUyMiUyRnBhdGglMkZ0byUyRnNhdmUlMkZxdWFudGl6ZWQlMkZtb2RlbCUyMiUwQW1vZGVsLnNhdmVfcHJldHJhaW5lZChxdWFudF9wYXRoKSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKHF1YW50X3BhdGglMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMik=",highlighted:`quant_path = <span class="hljs-string">&quot;/path/to/save/quantized/model&quot;</span>
model.save_pretrained(quant_path)
model = AutoModelForCausalLM.from_pretrained(quant_path, device_map=<span class="hljs-string">&quot;auto&quot;</span>)`,wrap:!1}}),E=new pt({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/ko/quantization/eetq.md"}}),{c(){p=i("meta"),C=l(),Z=i("p"),_=l(),T(m.$$.fragment),F=l(),r=i("p"),r.innerHTML=L,R=l(),u=i("p"),u.innerHTML=z,V=l(),T(c.$$.fragment),j=l(),f=i("p"),f.innerHTML=S,k=l(),T(h.$$.fragment),x=l(),d=i("p"),d.textContent=P,W=l(),T(M.$$.fragment),Q=l(),b=i("p"),b.textContent=A,q=l(),T(w.$$.fragment),N=l(),T(E.$$.fragment),X=l(),G=i("p"),this.h()},l(t){const e=st("svelte-u9bgzb",document.head);p=o(e,"META",{name:!0,content:!0}),e.forEach(a),C=n(t),Z=o(t,"P",{}),B(Z).forEach(a),_=n(t),y(m.$$.fragment,t),F=n(t),r=o(t,"P",{"data-svelte-h":!0}),v(r)!=="svelte-1p9a8o5"&&(r.innerHTML=L),R=n(t),u=o(t,"P",{"data-svelte-h":!0}),v(u)!=="svelte-1lo055k"&&(u.innerHTML=z),V=n(t),y(c.$$.fragment,t),j=n(t),f=o(t,"P",{"data-svelte-h":!0}),v(f)!=="svelte-16p304f"&&(f.innerHTML=S),k=n(t),y(h.$$.fragment,t),x=n(t),d=o(t,"P",{"data-svelte-h":!0}),v(d)!=="svelte-vyc9f4"&&(d.textContent=P),W=n(t),y(M.$$.fragment,t),Q=n(t),b=o(t,"P",{"data-svelte-h":!0}),v(b)!=="svelte-jnm5pv"&&(b.textContent=A),q=n(t),y(w.$$.fragment,t),N=n(t),y(E.$$.fragment,t),X=n(t),G=o(t,"P",{}),B(G).forEach(a),this.h()},h(){K(p,"name","hf:doc:metadata"),K(p,"content",ot)},m(t,e){lt(document.head,p),s(t,C,e),s(t,Z,e),s(t,_,e),$(m,t,e),s(t,F,e),s(t,r,e),s(t,R,e),s(t,u,e),s(t,V,e),$(c,t,e),s(t,j,e),s(t,f,e),s(t,k,e),$(h,t,e),s(t,x,e),s(t,d,e),s(t,W,e),$(M,t,e),s(t,Q,e),s(t,b,e),s(t,q,e),$(w,t,e),s(t,N,e),$(E,t,e),s(t,X,e),s(t,G,e),Y=!0},p:O,i(t){Y||(g(m.$$.fragment,t),g(c.$$.fragment,t),g(h.$$.fragment,t),g(M.$$.fragment,t),g(w.$$.fragment,t),g(E.$$.fragment,t),Y=!0)},o(t){U(m.$$.fragment,t),U(c.$$.fragment,t),U(h.$$.fragment,t),U(M.$$.fragment,t),U(w.$$.fragment,t),U(E.$$.fragment,t),Y=!1},d(t){t&&(a(C),a(Z),a(_),a(F),a(r),a(R),a(u),a(V),a(j),a(f),a(k),a(x),a(d),a(W),a(Q),a(b),a(q),a(N),a(X),a(G)),a(p),J(m,t),J(c,t),J(h,t),J(M,t),J(w,t),J(E,t)}}}const ot='{"title":"EETQ","local":"eetq","sections":[],"depth":1}';function mt(I){return tt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ht extends et{constructor(p){super(),at(this,p,mt,it,D,{})}}export{ht as component};

Xet Storage Details

Size:
6.35 kB
·
Xet hash:
79b3d16064f1bf66d07bfe9ddbf1704d15ff7ec97af1d2a244691cc6a92470b7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.