Buckets:
| import{s as ol,o as rl,n as ml}from"../chunks/scheduler.56730f09.js";import{S as fl,i as ul,g as a,s,r,A as dl,h as p,f as l,c as i,j as il,u as f,x as m,k as al,y as bl,a as n,v as u,d,t as b,w as c}from"../chunks/index.1f144517.js";import{T as pl}from"../chunks/Tip.41e845e5.js";import{C as g}from"../chunks/CodeBlock.738eeccb.js";import{H as $,E as cl}from"../chunks/EditOnGithub.854793f1.js";function Ml(ve){let o,y="이 기능은 다중 GPU 설정에서도 사용할 수 있습니다.";return{c(){o=a("p"),o.textContent=y},l(M){o=p(M,"P",{"data-svelte-h":!0}),m(o)!=="svelte-1wakscf"&&(o.textContent=y)},m(M,T){n(M,o,T)},p:ml,d(M){M&&l(o)}}}function $l(ve){let o,y="이 기능은 다중 GPU 설정에서도 사용할 수 있습니다.";return{c(){o=a("p"),o.textContent=y},l(M){o=p(M,"P",{"data-svelte-h":!0}),m(o)!=="svelte-1wakscf"&&(o.textContent=y)},m(M,T){n(M,o,T)},p:ml,d(M){M&&l(o)}}}function yl(ve){let o,y,M,T,_,Ce,C,wt='이 가이드 외에도, <a href="perf_train_gpu_one">단일 GPU에서의 훈련 가이드</a>와 <a href="perf_infer_cpu">CPU에서의 추론 가이드</a>에서도 관련 정보를 찾을 수 있습니다.',he,h,we,w,Jt='PyTorch 네이티브 <a href="https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/" rel="nofollow"><code>nn.MultiHeadAttention</code></a> 어텐션 패스트패스인 BetterTransformer는 <a href="https://huggingface.co/docs/optimum/bettertransformer/overview" rel="nofollow">🤗 Optimum 라이브러리</a>의 통합을 통해 Transformers와 함께 사용할 수 있습니다.',Je,J,xt='PyTorch의 어텐션 패스트패스는 커널 퓨전과 <a href="https://pytorch.org/docs/stable/nested.html" rel="nofollow">중첩된 텐서</a>의 사용을 통해 추론 속도를 높일 수 있습니다. 자세한 벤치마크는 <a href="https://medium.com/pytorch/bettertransformer-out-of-the-box-performance-for-huggingface-transformers-3fbe27d50ab2" rel="nofollow">이 블로그 글</a>에서 확인할 수 있습니다.',xe,x,Gt='<a href="https://github.com/huggingface/optimum" rel="nofollow"><code>optimum</code></a> 패키지를 설치한 후에는 추론 중 Better Transformer를 사용할 수 있도록 <code>to_bettertransformer()</code>를 호출하여 관련 내부 모듈을 대체합니다:',Ge,G,We,W,Wt="<code>reverse_bettertransformer()</code> 메소드는 정규화된 transformers 모델링을 사용하기 위해 모델을 저장하기 전 원래의 모델링으로 돌아갈 수 있도록 해줍니다:",Ze,Z,Fe,F,Zt='PyTorch 2.0부터는 어텐션 패스트패스가 인코더와 디코더 모두에서 지원됩니다. 지원되는 아키텍처 목록은 <a href="https://huggingface.co/docs/optimum/bettertransformer/overview#supported-models" rel="nofollow">여기</a>에서 확인할 수 있습니다.',je,j,Re,R,Ft="<code>bitsandbytes</code>를 설치하면 GPU에서 손쉽게 모델을 압축할 수 있습니다. FP4 양자화를 사용하면 원래의 전체 정밀도 버전과 비교하여 모델 크기를 최대 8배 줄일 수 있습니다. 아래에서 시작하는 방법을 확인하세요.",Pe,U,ke,P,Ve,k,jt=`<li><p>최신 <code>bitsandbytes</code> 라이브러리 | |
| <code>pip install bitsandbytes>=0.39.0</code></p></li> <li><p>최신 <code>accelerate</code>를 소스에서 설치 | |
| <code>pip install git+https://github.com/huggingface/accelerate.git</code></p></li> <li><p>최신 <code>transformers</code>를 소스에서 설치 | |
| <code>pip install git+https://github.com/huggingface/transformers.git</code></p></li>`,Xe,V,Ie,X,Rt="다음 코드를 실행하여 단일 GPU에서 빠르게 FP4 모델을 실행할 수 있습니다.",Le,I,He,L,Pt="<code>device_map</code>은 선택 사항입니다. 그러나 <code>device_map = 'auto'</code>로 설정하는 것이 사용 가능한 리소스를 효율적으로 디스패치하기 때문에 추론에 있어 권장됩니다.",Be,H,Ee,B,kt="다중 GPU에서 혼합 4비트 모델을 가져오는 방법은 단일 GPU 설정과 동일합니다(동일한 명령어 사용):",qe,E,Ye,q,Vt="하지만 <code>accelerate</code>를 사용하여 각 GPU에 할당할 GPU RAM을 제어할 수 있습니다. 다음과 같이 <code>max_memory</code> 인수를 사용하세요:",Qe,Y,Ne,Q,Xt="이 예에서는 첫 번째 GPU가 600MB의 메모리를 사용하고 두 번째 GPU가 1GB를 사용합니다.",ze,N,Ae,z,It='이 방법의 더 고급 사용법에 대해서는 <a href="main_classes/quantization">양자화</a> 문서 페이지를 참조하세요.',Se,A,Ke,v,Oe,S,Lt=`<a href="https://arxiv.org/abs/2208.07339" rel="nofollow"><code>LLM.int8() : 8-bit Matrix Multiplication for Transformers at Scale</code></a> 논문에서 우리는 몇 줄의 코드로 Hub의 모든 모델에 대한 Hugging Face 통합을 지원합니다. | |
| 이 방법은 <code>float16</code> 및 <code>bfloat16</code> 가중치에 대해 <code>nn.Linear</code> 크기를 2배로 줄이고, <code>float32</code> 가중치에 대해 4배로 줄입니다. 이는 절반 정밀도에서 이상치를 처리함으로써 품질에 거의 영향을 미치지 않습니다.`,De,K,Ht='<img src="https://cdn-uploads.huggingface.co/production/uploads/1659861207959-62441d1d9fdefb55a0b7d12c.png" alt="HFxbitsandbytes.png"/>',et,O,Bt=`Int8 혼합 정밀도 행렬 분해는 행렬 곱셈을 두 개의 스트림으로 분리합니다: (1) fp16로 곱해지는 체계적인 특이값 이상치 스트림 행렬(0.01%) 및 (2) int8 행렬 곱셈의 일반적인 스트림(99.9%). 이 방법을 사용하면 매우 큰 모델에 대해 예측 저하 없이 int8 추론이 가능합니다. | |
| 이 방법에 대한 자세한 내용은 <a href="https://arxiv.org/abs/2208.07339" rel="nofollow">논문</a>이나 <a href="https://huggingface.co/blog/hf-bitsandbytes-integration" rel="nofollow">통합에 관한 블로그 글</a>에서 확인할 수 있습니다.`,tt,D,Et='<img src="https://cdn-uploads.huggingface.co/production/uploads/1660567469965-62441d1d9fdefb55a0b7d12c.gif" alt="MixedInt8.gif"/>',lt,ee,qt=`커널은 GPU 전용으로 컴파일되어 있기 때문에 혼합 8비트 모델을 실행하려면 GPU가 필요합니다. 이 기능을 사용하기 전에 모델의 1/4(또는 모델 가중치가 절반 정밀도인 경우 절반)을 저장할 충분한 GPU 메모리가 있는지 확인하세요. | |
| 이 모듈을 사용하는 데 도움이 되는 몇 가지 참고 사항이 아래에 나와 있습니다. 또는 <a href="#colab-demos">Google colab</a>에서 데모를 따라할 수도 있습니다.`,nt,te,st,le,Yt=`<li><code>bitsandbytes<0.37.0</code>을 사용하는 경우, 8비트 텐서 코어(Turing, Ampere 또는 이후 아키텍처 - 예: T4, RTX20s RTX30s, A40-A100)를 지원하는 NVIDIA GPU에서 실행하는지 확인하세요. <code>bitsandbytes>=0.37.0</code>을 사용하는 경우, 모든 GPU가 지원됩니다.</li> <li>올바른 버전의 <code>bitsandbytes</code>를 다음 명령으로 설치하세요: | |
| <code>pip install bitsandbytes>=0.31.5</code></li> <li><code>accelerate</code>를 설치하세요 | |
| <code>pip install accelerate>=0.12.0</code></li>`,it,ne,at,se,Qt="필요한 라이브러리를 설치한 후 혼합 8비트 모델을 가져오는 방법은 다음과 같습니다:",pt,ie,mt,ae,Nt="텍스트 생성의 경우:",ot,pe,zt="<li><code>pipeline()</code> 함수 대신 모델의 <code>generate()</code> 메소드를 사용하는 것을 권장합니다. <code>pipeline()</code> 함수로는 추론이 가능하지만, 혼합 8비트 모델에 최적화되지 않았기 때문에 <code>generate()</code> 메소드를 사용하는 것보다 느릴 수 있습니다. 또한, nucleus 샘플링과 같은 일부 샘플링 전략은 혼합 8비트 모델에 대해 <code>pipeline()</code> 함수에서 지원되지 않습니다.</li> <li>입력을 모델과 동일한 GPU에 배치하는 것이 좋습니다.</li>",rt,me,At="다음은 간단한 예입니다:",ft,oe,ut,re,dt,fe,St="다중 GPU에서 혼합 8비트 모델을 로드하는 방법은 단일 GPU 설정과 동일합니다(동일한 명령어 사용):",bt,ue,ct,de,Kt="하지만 <code>accelerate</code>를 사용하여 각 GPU에 할당할 GPU RAM을 제어할 수 있습니다. 다음과 같이 <code>max_memory</code> 인수를 사용하세요:",Mt,be,$t,ce,Ot="이 예시에서는 첫 번째 GPU가 1GB의 메모리를 사용하고 두 번째 GPU가 2GB를 사용합니다.",yt,Me,gt,$e,Dt=`이 방법을 사용하면 이전에 Google Colab에서 추론할 수 없었던 모델에 대해 추론할 수 있습니다. | |
| Google Colab에서 8비트 양자화를 사용하여 T5-11b(42GB in fp32)를 실행하는 데모를 확인하세요:`,Tt,ye,el='<a href="https://colab.research.google.com/drive/1YORPWx4okIHXnjW7MSAidXN29mPVNT7F?usp=sharing" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab: T5-11b demo"/></a>',Ut,ge,tl="또는 BLOOM-3B에 대한 데모를 확인하세요:",vt,Te,ll='<a href="https://colab.research.google.com/drive/1qOjXfQIAULfKvZqwCen8-MoWKGdSatZ4?usp=sharing" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab: BLOOM-3b demo"/></a>',_t,Ue,Ct,_e,ht;return _=new $({props:{title:"단일 GPU에서 효율적인 추론",local:"efficient-inference-on-a-single-gpu",headingTag:"h1"}}),h=new $({props:{title:"Better Transformer: PyTorch 네이티브 Transformer 패스트패스",local:"better-transformer-pytorchnative-transformer-fastpath",headingTag:"h2"}}),G=new g({props:{code:"bW9kZWwlMjAlM0QlMjBtb2RlbC50b19iZXR0ZXJ0cmFuc2Zvcm1lcigp",highlighted:"model = model.to_bettertransformer()",wrap:!1}}),Z=new g({props:{code:"bW9kZWwlMjAlM0QlMjBtb2RlbC5yZXZlcnNlX2JldHRlcnRyYW5zZm9ybWVyKCklMEFtb2RlbC5zYXZlX3ByZXRyYWluZWQoJTIyc2F2ZWRfbW9kZWwlMjIp",highlighted:`model = model.reverse_bettertransformer() | |
| model.save_pretrained(<span class="hljs-string">"saved_model"</span>)`,wrap:!1}}),j=new $({props:{title:"FP4 혼합 정밀도 추론을 위한 bitsandbytes 통합",local:"bitsandbytes-integration-for-fp4-mixedprecision-inference",headingTag:"h2"}}),U=new pl({props:{$$slots:{default:[Ml]},$$scope:{ctx:ve}}}),P=new $({props:{title:"요구 사항",local:"requirements-for-fp4-mixedprecision-inference",headingTag:"h3"}}),V=new $({props:{title:"FP4 모델 실행 - 단일 GPU 설정 - 빠른 시작",local:"running-fp4-models-single-gpu-setup-quickstart",headingTag:"h3"}}),I=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBJTBBbW9kZWxfbmFtZSUyMCUzRCUyMCUyMmJpZ3NjaWVuY2UlMkZibG9vbS0yYjUlMjIlMEFtb2RlbF80Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKG1vZGVsX25hbWUlMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUyMGxvYWRfaW5fNGJpdCUzRFRydWUp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| model_name = <span class="hljs-string">"bigscience/bloom-2b5"</span> | |
| model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map=<span class="hljs-string">"auto"</span>, load_in_4bit=<span class="hljs-literal">True</span>)`,wrap:!1}}),H=new $({props:{title:"FP4 모델 실행 - 다중 GPU 설정",local:"running-fp4-models-multi-gpu-setup",headingTag:"h3"}}),E=new g({props:{code:"bW9kZWxfbmFtZSUyMCUzRCUyMCUyMmJpZ3NjaWVuY2UlMkZibG9vbS0yYjUlMjIlMEFtb2RlbF80Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKG1vZGVsX25hbWUlMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUyMGxvYWRfaW5fNGJpdCUzRFRydWUp",highlighted:`model_name = <span class="hljs-string">"bigscience/bloom-2b5"</span> | |
| model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map=<span class="hljs-string">"auto"</span>, load_in_4bit=<span class="hljs-literal">True</span>)`,wrap:!1}}),Y=new g({props:{code:"bWF4X21lbW9yeV9tYXBwaW5nJTIwJTNEJTIwJTdCMCUzQSUyMCUyMjYwME1CJTIyJTJDJTIwMSUzQSUyMCUyMjFHQiUyMiU3RCUwQW1vZGVsX25hbWUlMjAlM0QlMjAlMjJiaWdzY2llbmNlJTJGYmxvb20tM2IlMjIlMEFtb2RlbF80Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX25hbWUlMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUyMGxvYWRfaW5fNGJpdCUzRFRydWUlMkMlMjBtYXhfbWVtb3J5JTNEbWF4X21lbW9yeV9tYXBwaW5nJTBBKQ==",highlighted:`max_memory_mapping = {<span class="hljs-number">0</span>: <span class="hljs-string">"600MB"</span>, <span class="hljs-number">1</span>: <span class="hljs-string">"1GB"</span>} | |
| model_name = <span class="hljs-string">"bigscience/bloom-3b"</span> | |
| model_4bit = AutoModelForCausalLM.from_pretrained( | |
| model_name, device_map=<span class="hljs-string">"auto"</span>, load_in_4bit=<span class="hljs-literal">True</span>, max_memory=max_memory_mapping | |
| )`,wrap:!1}}),N=new $({props:{title:"고급 사용법",local:"advanced-usage",headingTag:"h3"}}),A=new $({props:{title:"Int8 혼합 정밀도 행렬 분해를 위한 bitsandbytes 통합",local:"bitsandbytes-integration-for-int8-mixedprecision-matrix-decomposition",headingTag:"h2"}}),v=new pl({props:{$$slots:{default:[$l]},$$scope:{ctx:ve}}}),te=new $({props:{title:"요구 사항",local:"requirements-for-int8-mixedprecision-matrix-decomposition",headingTag:"h3"}}),ne=new $({props:{title:"혼합 Int8 모델 실행 - 단일 GPU 설정",local:"running-mixedint8-models-single-gpu-setup",headingTag:"h3"}}),ie=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQml0c0FuZEJ5dGVzQ29uZmlnJTBBJTBBbW9kZWxfbmFtZSUyMCUzRCUyMCUyMmJpZ3NjaWVuY2UlMkZibG9vbS0yYjUlMjIlMEFtb2RlbF84Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKG1vZGVsX25hbWUlMkMlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEQml0c0FuZEJ5dGVzQ29uZmlnKGxvYWRfaW5fOGJpdCUzRFRydWUpKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, BitsAndBytesConfig | |
| model_name = <span class="hljs-string">"bigscience/bloom-2b5"</span> | |
| model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>))`,wrap:!1}}),oe=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUyQyUyMEJpdHNBbmRCeXRlc0NvbmZpZyUwQSUwQW1vZGVsX25hbWUlMjAlM0QlMjAlMjJiaWdzY2llbmNlJTJGYmxvb20tMmI1JTIyJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfbmFtZSklMEFtb2RlbF84Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKG1vZGVsX25hbWUlMkMlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEQml0c0FuZEJ5dGVzQ29uZmlnKGxvYWRfaW5fOGJpdCUzRFRydWUpKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkhlbGxvJTJDJTIwbXklMjBsbGFtYSUyMGlzJTIwY3V0ZSUyMiUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplcihwcm9tcHQlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKS50byglMjJjdWRhJTIyKSUwQWdlbmVyYXRlZF9pZHMlMjAlM0QlMjBtb2RlbC5nZW5lcmF0ZSgqKmlucHV0cyklMEFvdXRwdXRzJTIwJTNEJTIwdG9rZW5pemVyLmJhdGNoX2RlY29kZShnZW5lcmF0ZWRfaWRzJTJDJTIwc2tpcF9zcGVjaWFsX3Rva2VucyUzRFRydWUp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| model_name = <span class="hljs-string">"bigscience/bloom-2b5"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>)) | |
| prompt = <span class="hljs-string">"Hello, my llama is cute"</span> | |
| inputs = tokenizer(prompt, return_tensors=<span class="hljs-string">"pt"</span>).to(<span class="hljs-string">"cuda"</span>) | |
| generated_ids = model.generate(**inputs) | |
| outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)`,wrap:!1}}),re=new $({props:{title:"혼합 Int8 모델 실행 - 다중 GPU 설정",local:"running-mixedint8-models-multi-gpu-setup",headingTag:"h3"}}),ue=new g({props:{code:"bW9kZWxfbmFtZSUyMCUzRCUyMCUyMmJpZ3NjaWVuY2UlMkZibG9vbS0yYjUlMjIlMEFtb2RlbF84Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKG1vZGVsX25hbWUlMkMlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEQml0c0FuZEJ5dGVzQ29uZmlnKGxvYWRfaW5fOGJpdCUzRFRydWUpKQ==",highlighted:`model_name = <span class="hljs-string">"bigscience/bloom-2b5"</span> | |
| model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>))`,wrap:!1}}),be=new g({props:{code:"bWF4X21lbW9yeV9tYXBwaW5nJTIwJTNEJTIwJTdCMCUzQSUyMCUyMjFHQiUyMiUyQyUyMDElM0ElMjAlMjIyR0IlMjIlN0QlMEFtb2RlbF9uYW1lJTIwJTNEJTIwJTIyYmlnc2NpZW5jZSUyRmJsb29tLTNiJTIyJTBBbW9kZWxfOGJpdCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9uYW1lJTJDJTIwZGV2aWNlX21hcCUzRCUyMmF1dG8lMjIlMkMlMjBsb2FkX2luXzhiaXQlM0RUcnVlJTJDJTIwbWF4X21lbW9yeSUzRG1heF9tZW1vcnlfbWFwcGluZyUwQSk=",highlighted:`max_memory_mapping = {<span class="hljs-number">0</span>: <span class="hljs-string">"1GB"</span>, <span class="hljs-number">1</span>: <span class="hljs-string">"2GB"</span>} | |
| model_name = <span class="hljs-string">"bigscience/bloom-3b"</span> | |
| model_8bit = AutoModelForCausalLM.from_pretrained( | |
| model_name, device_map=<span class="hljs-string">"auto"</span>, load_in_8bit=<span class="hljs-literal">True</span>, max_memory=max_memory_mapping | |
| )`,wrap:!1}}),Me=new $({props:{title:"Colab 데모",local:"colab-demos",headingTag:"h3"}}),Ue=new cl({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/ko/perf_infer_gpu_one.md"}}),{c(){o=a("meta"),y=s(),M=a("p"),T=s(),r(_.$$.fragment),Ce=s(),C=a("p"),C.innerHTML=wt,he=s(),r(h.$$.fragment),we=s(),w=a("p"),w.innerHTML=Jt,Je=s(),J=a("p"),J.innerHTML=xt,xe=s(),x=a("p"),x.innerHTML=Gt,Ge=s(),r(G.$$.fragment),We=s(),W=a("p"),W.innerHTML=Wt,Ze=s(),r(Z.$$.fragment),Fe=s(),F=a("p"),F.innerHTML=Zt,je=s(),r(j.$$.fragment),Re=s(),R=a("p"),R.innerHTML=Ft,Pe=s(),r(U.$$.fragment),ke=s(),r(P.$$.fragment),Ve=s(),k=a("ul"),k.innerHTML=jt,Xe=s(),r(V.$$.fragment),Ie=s(),X=a("p"),X.textContent=Rt,Le=s(),r(I.$$.fragment),He=s(),L=a("p"),L.innerHTML=Pt,Be=s(),r(H.$$.fragment),Ee=s(),B=a("p"),B.textContent=kt,qe=s(),r(E.$$.fragment),Ye=s(),q=a("p"),q.innerHTML=Vt,Qe=s(),r(Y.$$.fragment),Ne=s(),Q=a("p"),Q.textContent=Xt,ze=s(),r(N.$$.fragment),Ae=s(),z=a("p"),z.innerHTML=It,Se=s(),r(A.$$.fragment),Ke=s(),r(v.$$.fragment),Oe=s(),S=a("p"),S.innerHTML=Lt,De=s(),K=a("p"),K.innerHTML=Ht,et=s(),O=a("p"),O.innerHTML=Bt,tt=s(),D=a("p"),D.innerHTML=Et,lt=s(),ee=a("p"),ee.innerHTML=qt,nt=s(),r(te.$$.fragment),st=s(),le=a("ul"),le.innerHTML=Yt,it=s(),r(ne.$$.fragment),at=s(),se=a("p"),se.textContent=Qt,pt=s(),r(ie.$$.fragment),mt=s(),ae=a("p"),ae.textContent=Nt,ot=s(),pe=a("ul"),pe.innerHTML=zt,rt=s(),me=a("p"),me.textContent=At,ft=s(),r(oe.$$.fragment),ut=s(),r(re.$$.fragment),dt=s(),fe=a("p"),fe.textContent=St,bt=s(),r(ue.$$.fragment),ct=s(),de=a("p"),de.innerHTML=Kt,Mt=s(),r(be.$$.fragment),$t=s(),ce=a("p"),ce.textContent=Ot,yt=s(),r(Me.$$.fragment),gt=s(),$e=a("p"),$e.textContent=Dt,Tt=s(),ye=a("p"),ye.innerHTML=el,Ut=s(),ge=a("p"),ge.textContent=tl,vt=s(),Te=a("p"),Te.innerHTML=ll,_t=s(),r(Ue.$$.fragment),Ct=s(),_e=a("p"),this.h()},l(e){const t=dl("svelte-u9bgzb",document.head);o=p(t,"META",{name:!0,content:!0}),t.forEach(l),y=i(e),M=p(e,"P",{}),il(M).forEach(l),T=i(e),f(_.$$.fragment,e),Ce=i(e),C=p(e,"P",{"data-svelte-h":!0}),m(C)!=="svelte-1m7ldpj"&&(C.innerHTML=wt),he=i(e),f(h.$$.fragment,e),we=i(e),w=p(e,"P",{"data-svelte-h":!0}),m(w)!=="svelte-ee52o1"&&(w.innerHTML=Jt),Je=i(e),J=p(e,"P",{"data-svelte-h":!0}),m(J)!=="svelte-8k8zjr"&&(J.innerHTML=xt),xe=i(e),x=p(e,"P",{"data-svelte-h":!0}),m(x)!=="svelte-1rnbe8f"&&(x.innerHTML=Gt),Ge=i(e),f(G.$$.fragment,e),We=i(e),W=p(e,"P",{"data-svelte-h":!0}),m(W)!=="svelte-1u5t675"&&(W.innerHTML=Wt),Ze=i(e),f(Z.$$.fragment,e),Fe=i(e),F=p(e,"P",{"data-svelte-h":!0}),m(F)!=="svelte-17375w3"&&(F.innerHTML=Zt),je=i(e),f(j.$$.fragment,e),Re=i(e),R=p(e,"P",{"data-svelte-h":!0}),m(R)!=="svelte-sxo64f"&&(R.innerHTML=Ft),Pe=i(e),f(U.$$.fragment,e),ke=i(e),f(P.$$.fragment,e),Ve=i(e),k=p(e,"UL",{"data-svelte-h":!0}),m(k)!=="svelte-dwg1v5"&&(k.innerHTML=jt),Xe=i(e),f(V.$$.fragment,e),Ie=i(e),X=p(e,"P",{"data-svelte-h":!0}),m(X)!=="svelte-60dj5"&&(X.textContent=Rt),Le=i(e),f(I.$$.fragment,e),He=i(e),L=p(e,"P",{"data-svelte-h":!0}),m(L)!=="svelte-1xe6z7w"&&(L.innerHTML=Pt),Be=i(e),f(H.$$.fragment,e),Ee=i(e),B=p(e,"P",{"data-svelte-h":!0}),m(B)!=="svelte-11nsoog"&&(B.textContent=kt),qe=i(e),f(E.$$.fragment,e),Ye=i(e),q=p(e,"P",{"data-svelte-h":!0}),m(q)!=="svelte-7qj9fn"&&(q.innerHTML=Vt),Qe=i(e),f(Y.$$.fragment,e),Ne=i(e),Q=p(e,"P",{"data-svelte-h":!0}),m(Q)!=="svelte-1wq62m5"&&(Q.textContent=Xt),ze=i(e),f(N.$$.fragment,e),Ae=i(e),z=p(e,"P",{"data-svelte-h":!0}),m(z)!=="svelte-19t36l7"&&(z.innerHTML=It),Se=i(e),f(A.$$.fragment,e),Ke=i(e),f(v.$$.fragment,e),Oe=i(e),S=p(e,"P",{"data-svelte-h":!0}),m(S)!=="svelte-1ynbgg5"&&(S.innerHTML=Lt),De=i(e),K=p(e,"P",{"data-svelte-h":!0}),m(K)!=="svelte-1tsrdqi"&&(K.innerHTML=Ht),et=i(e),O=p(e,"P",{"data-svelte-h":!0}),m(O)!=="svelte-sz8751"&&(O.innerHTML=Bt),tt=i(e),D=p(e,"P",{"data-svelte-h":!0}),m(D)!=="svelte-y2mgdg"&&(D.innerHTML=Et),lt=i(e),ee=p(e,"P",{"data-svelte-h":!0}),m(ee)!=="svelte-ewmhxc"&&(ee.innerHTML=qt),nt=i(e),f(te.$$.fragment,e),st=i(e),le=p(e,"UL",{"data-svelte-h":!0}),m(le)!=="svelte-r9nrya"&&(le.innerHTML=Yt),it=i(e),f(ne.$$.fragment,e),at=i(e),se=p(e,"P",{"data-svelte-h":!0}),m(se)!=="svelte-1trjkap"&&(se.textContent=Qt),pt=i(e),f(ie.$$.fragment,e),mt=i(e),ae=p(e,"P",{"data-svelte-h":!0}),m(ae)!=="svelte-bekc1k"&&(ae.textContent=Nt),ot=i(e),pe=p(e,"UL",{"data-svelte-h":!0}),m(pe)!=="svelte-daelpd"&&(pe.innerHTML=zt),rt=i(e),me=p(e,"P",{"data-svelte-h":!0}),m(me)!=="svelte-1md3fzz"&&(me.textContent=At),ft=i(e),f(oe.$$.fragment,e),ut=i(e),f(re.$$.fragment,e),dt=i(e),fe=p(e,"P",{"data-svelte-h":!0}),m(fe)!=="svelte-bnkuc8"&&(fe.textContent=St),bt=i(e),f(ue.$$.fragment,e),ct=i(e),de=p(e,"P",{"data-svelte-h":!0}),m(de)!=="svelte-7qj9fn"&&(de.innerHTML=Kt),Mt=i(e),f(be.$$.fragment,e),$t=i(e),ce=p(e,"P",{"data-svelte-h":!0}),m(ce)!=="svelte-1bub4cv"&&(ce.textContent=Ot),yt=i(e),f(Me.$$.fragment,e),gt=i(e),$e=p(e,"P",{"data-svelte-h":!0}),m($e)!=="svelte-1at89fq"&&($e.textContent=Dt),Tt=i(e),ye=p(e,"P",{"data-svelte-h":!0}),m(ye)!=="svelte-1yb5ek4"&&(ye.innerHTML=el),Ut=i(e),ge=p(e,"P",{"data-svelte-h":!0}),m(ge)!=="svelte-u2br1q"&&(ge.textContent=tl),vt=i(e),Te=p(e,"P",{"data-svelte-h":!0}),m(Te)!=="svelte-6z7881"&&(Te.innerHTML=ll),_t=i(e),f(Ue.$$.fragment,e),Ct=i(e),_e=p(e,"P",{}),il(_e).forEach(l),this.h()},h(){al(o,"name","hf:doc:metadata"),al(o,"content",gl)},m(e,t){bl(document.head,o),n(e,y,t),n(e,M,t),n(e,T,t),u(_,e,t),n(e,Ce,t),n(e,C,t),n(e,he,t),u(h,e,t),n(e,we,t),n(e,w,t),n(e,Je,t),n(e,J,t),n(e,xe,t),n(e,x,t),n(e,Ge,t),u(G,e,t),n(e,We,t),n(e,W,t),n(e,Ze,t),u(Z,e,t),n(e,Fe,t),n(e,F,t),n(e,je,t),u(j,e,t),n(e,Re,t),n(e,R,t),n(e,Pe,t),u(U,e,t),n(e,ke,t),u(P,e,t),n(e,Ve,t),n(e,k,t),n(e,Xe,t),u(V,e,t),n(e,Ie,t),n(e,X,t),n(e,Le,t),u(I,e,t),n(e,He,t),n(e,L,t),n(e,Be,t),u(H,e,t),n(e,Ee,t),n(e,B,t),n(e,qe,t),u(E,e,t),n(e,Ye,t),n(e,q,t),n(e,Qe,t),u(Y,e,t),n(e,Ne,t),n(e,Q,t),n(e,ze,t),u(N,e,t),n(e,Ae,t),n(e,z,t),n(e,Se,t),u(A,e,t),n(e,Ke,t),u(v,e,t),n(e,Oe,t),n(e,S,t),n(e,De,t),n(e,K,t),n(e,et,t),n(e,O,t),n(e,tt,t),n(e,D,t),n(e,lt,t),n(e,ee,t),n(e,nt,t),u(te,e,t),n(e,st,t),n(e,le,t),n(e,it,t),u(ne,e,t),n(e,at,t),n(e,se,t),n(e,pt,t),u(ie,e,t),n(e,mt,t),n(e,ae,t),n(e,ot,t),n(e,pe,t),n(e,rt,t),n(e,me,t),n(e,ft,t),u(oe,e,t),n(e,ut,t),u(re,e,t),n(e,dt,t),n(e,fe,t),n(e,bt,t),u(ue,e,t),n(e,ct,t),n(e,de,t),n(e,Mt,t),u(be,e,t),n(e,$t,t),n(e,ce,t),n(e,yt,t),u(Me,e,t),n(e,gt,t),n(e,$e,t),n(e,Tt,t),n(e,ye,t),n(e,Ut,t),n(e,ge,t),n(e,vt,t),n(e,Te,t),n(e,_t,t),u(Ue,e,t),n(e,Ct,t),n(e,_e,t),ht=!0},p(e,[t]){const nl={};t&2&&(nl.$$scope={dirty:t,ctx:e}),U.$set(nl);const sl={};t&2&&(sl.$$scope={dirty:t,ctx:e}),v.$set(sl)},i(e){ht||(d(_.$$.fragment,e),d(h.$$.fragment,e),d(G.$$.fragment,e),d(Z.$$.fragment,e),d(j.$$.fragment,e),d(U.$$.fragment,e),d(P.$$.fragment,e),d(V.$$.fragment,e),d(I.$$.fragment,e),d(H.$$.fragment,e),d(E.$$.fragment,e),d(Y.$$.fragment,e),d(N.$$.fragment,e),d(A.$$.fragment,e),d(v.$$.fragment,e),d(te.$$.fragment,e),d(ne.$$.fragment,e),d(ie.$$.fragment,e),d(oe.$$.fragment,e),d(re.$$.fragment,e),d(ue.$$.fragment,e),d(be.$$.fragment,e),d(Me.$$.fragment,e),d(Ue.$$.fragment,e),ht=!0)},o(e){b(_.$$.fragment,e),b(h.$$.fragment,e),b(G.$$.fragment,e),b(Z.$$.fragment,e),b(j.$$.fragment,e),b(U.$$.fragment,e),b(P.$$.fragment,e),b(V.$$.fragment,e),b(I.$$.fragment,e),b(H.$$.fragment,e),b(E.$$.fragment,e),b(Y.$$.fragment,e),b(N.$$.fragment,e),b(A.$$.fragment,e),b(v.$$.fragment,e),b(te.$$.fragment,e),b(ne.$$.fragment,e),b(ie.$$.fragment,e),b(oe.$$.fragment,e),b(re.$$.fragment,e),b(ue.$$.fragment,e),b(be.$$.fragment,e),b(Me.$$.fragment,e),b(Ue.$$.fragment,e),ht=!1},d(e){e&&(l(y),l(M),l(T),l(Ce),l(C),l(he),l(we),l(w),l(Je),l(J),l(xe),l(x),l(Ge),l(We),l(W),l(Ze),l(Fe),l(F),l(je),l(Re),l(R),l(Pe),l(ke),l(Ve),l(k),l(Xe),l(Ie),l(X),l(Le),l(He),l(L),l(Be),l(Ee),l(B),l(qe),l(Ye),l(q),l(Qe),l(Ne),l(Q),l(ze),l(Ae),l(z),l(Se),l(Ke),l(Oe),l(S),l(De),l(K),l(et),l(O),l(tt),l(D),l(lt),l(ee),l(nt),l(st),l(le),l(it),l(at),l(se),l(pt),l(mt),l(ae),l(ot),l(pe),l(rt),l(me),l(ft),l(ut),l(dt),l(fe),l(bt),l(ct),l(de),l(Mt),l($t),l(ce),l(yt),l(gt),l($e),l(Tt),l(ye),l(Ut),l(ge),l(vt),l(Te),l(_t),l(Ct),l(_e)),l(o),c(_,e),c(h,e),c(G,e),c(Z,e),c(j,e),c(U,e),c(P,e),c(V,e),c(I,e),c(H,e),c(E,e),c(Y,e),c(N,e),c(A,e),c(v,e),c(te,e),c(ne,e),c(ie,e),c(oe,e),c(re,e),c(ue,e),c(be,e),c(Me,e),c(Ue,e)}}}const gl='{"title":"단일 GPU에서 효율적인 추론","local":"efficient-inference-on-a-single-gpu","sections":[{"title":"Better Transformer: PyTorch 네이티브 Transformer 패스트패스","local":"better-transformer-pytorchnative-transformer-fastpath","sections":[],"depth":2},{"title":"FP4 혼합 정밀도 추론을 위한 bitsandbytes 통합","local":"bitsandbytes-integration-for-fp4-mixedprecision-inference","sections":[{"title":"요구 사항","local":"requirements-for-fp4-mixedprecision-inference","sections":[],"depth":3},{"title":"FP4 모델 실행 - 단일 GPU 설정 - 빠른 시작","local":"running-fp4-models-single-gpu-setup-quickstart","sections":[],"depth":3},{"title":"FP4 모델 실행 - 다중 GPU 설정","local":"running-fp4-models-multi-gpu-setup","sections":[],"depth":3},{"title":"고급 사용법","local":"advanced-usage","sections":[],"depth":3}],"depth":2},{"title":"Int8 혼합 정밀도 행렬 분해를 위한 bitsandbytes 통합","local":"bitsandbytes-integration-for-int8-mixedprecision-matrix-decomposition","sections":[{"title":"요구 사항","local":"requirements-for-int8-mixedprecision-matrix-decomposition","sections":[],"depth":3},{"title":"혼합 Int8 모델 실행 - 단일 GPU 설정","local":"running-mixedint8-models-single-gpu-setup","sections":[],"depth":3},{"title":"혼합 Int8 모델 실행 - 다중 GPU 설정","local":"running-mixedint8-models-multi-gpu-setup","sections":[],"depth":3},{"title":"Colab 데모","local":"colab-demos","sections":[],"depth":3}],"depth":2}],"depth":1}';function Tl(ve){return rl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class wl extends fl{constructor(o){super(),ul(this,o,Tl,yl,ol,{})}}export{wl as component}; | |
Xet Storage Details
- Size:
- 27.4 kB
- Xet hash:
- 75b52f35abd02ffbe9a2beee97812f66b1a72b18d8406d0972007112a8d5ed0a
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.