Buckets:
| import{s as Se,n as Xe,o as ge}from"../chunks/scheduler.23542ac5.js";import{S as xe,i as $e,e as p,s as n,c as U,q as ke,h as Ye,a as i,d as e,b as a,f as Dt,g as m,j as M,r as ve,k as h,l as Ae,m as s,n as J,t as y,o,p as c}from"../chunks/index.9b1f405b.js";import{C as He,H as r,E as Fe}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.2268ef01.js";import{C as w}from"../chunks/CodeBlock.6add2a12.js";function ze(Lt){let T,Wl,El,Nl,b,_l,C,kl,I,qt=`메모리 또는 속도에 대해 🤗 Diffusers <em>추론</em>을 최적화하기 위한 몇 가지 기술과 아이디어를 제시합니다. | |
| 일반적으로, memory-efficient attention을 위해 <a href="https://github.com/facebookresearch/xformers" rel="nofollow">xFormers</a> 사용을 추천하기 때문에, 추천하는 <a href="xformers">설치 방법</a>을 보고 설치해 보세요.`,vl,Z,Ot="다음 설정이 성능과 메모리에 미치는 영향에 대해 설명합니다.",Sl,Q,Pt="<thead><tr><th></th> <th>지연시간</th> <th>속도 향상</th></tr></thead> <tbody><tr><td>별도 설정 없음</td> <td>9.50s</td> <td>x1</td></tr> <tr><td>cuDNN auto-tuner</td> <td>9.37s</td> <td>x1.01</td></tr> <tr><td>fp16</td> <td>3.61s</td> <td>x2.63</td></tr> <tr><td>Channels Last 메모리 형식</td> <td>3.30s</td> <td>x2.88</td></tr> <tr><td>traced UNet</td> <td>3.21s</td> <td>x2.96</td></tr> <tr><td>memory-efficient attention</td> <td>2.63s</td> <td>x3.61</td></tr></tbody>",Xl,B,Kt='NVIDIA TITAN RTX에서 50 DDIM 스텝의 "a photo of an astronaut riding a horse on mars" 프롬프트로 512x512 크기의 단일 이미지를 생성하였습니다.',gl,V,xl,G,le='<a href="https://developer.nvidia.com/cudnn" rel="nofollow">NVIDIA cuDNN</a>은 컨볼루션을 계산하는 많은 알고리즘을 지원합니다. Autotuner는 짧은 벤치마크를 실행하고 주어진 입력 크기에 대해 주어진 하드웨어에서 최고의 성능을 가진 커널을 선택합니다.',$l,E,te="<strong>컨볼루션 네트워크</strong>를 활용하고 있기 때문에 (다른 유형들은 현재 지원되지 않음), 다음 설정을 통해 추론 전에 cuDNN autotuner를 활성화할 수 있습니다:",Yl,R,Al,W,Hl,N,ee=`Ampere 및 이후 CUDA 장치에서 행렬곱 및 컨볼루션은 TensorFloat32(TF32) 모드를 사용하여 더 빠르지만 약간 덜 정확할 수 있습니다. | |
| 기본적으로 PyTorch는 컨볼루션에 대해 TF32 모드를 활성화하지만 행렬 곱셈은 활성화하지 않습니다. | |
| 네트워크에 완전한 float32 정밀도가 필요한 경우가 아니면 행렬 곱셈에 대해서도 이 설정을 활성화하는 것이 좋습니다. | |
| 이는 일반적으로 무시할 수 있는 수치의 정확도 손실이 있지만, 계산 속도를 크게 높일 수 있습니다. | |
| 그것에 대해 <a href="https://huggingface.co/docs/transformers/v4.18.0/en/performance#tf32" rel="nofollow">여기</a>서 더 읽을 수 있습니다. | |
| 추론하기 전에 다음을 추가하기만 하면 됩니다:`,Fl,_,zl,k,Dl,v,se=`더 많은 GPU 메모리를 절약하고 더 빠른 속도를 얻기 위해 모델 가중치를 반정밀도(half precision)로 직접 불러오고 실행할 수 있습니다. | |
| 여기에는 <code>fp16</code>이라는 브랜치에 저장된 float16 버전의 가중치를 불러오고, 그 때 <code>float16</code> 유형을 사용하도록 PyTorch에 지시하는 작업이 포함됩니다.`,Ll,S,ql,u,ne='<p>어떤 파이프라인에서도 <a href="https://pytorch.org/docs/stable/amp.html#torch.autocast" rel="nofollow"><code>torch.autocast</code></a> 를 사용하는 것은 검은색 이미지를 생성할 수 있고, 순수한 float16 정밀도를 사용하는 것보다 항상 느리기 때문에 사용하지 않는 것이 좋습니다.</p>',Ol,X,Pl,g,ae="추가 메모리 절약을 위해, 한 번에 모두 계산하는 대신 단계적으로 계산을 수행하는 슬라이스 버전의 어텐션(attention)을 사용할 수 있습니다.",Kl,d,pe=`<p>Attention slicing은 모델이 하나 이상의 어텐션 헤드를 사용하는 한, 배치 크기가 1인 경우에도 유용합니다. | |
| 하나 이상의 어텐션 헤드가 있는 경우 <em>QK^T</em> 어텐션 매트릭스는 상당한 양의 메모리를 절약할 수 있는 각 헤드에 대해 순차적으로 계산될 수 있습니다.</p>`,lt,x,ie="각 헤드에 대해 순차적으로 어텐션 계산을 수행하려면, 다음과 같이 추론 전에 파이프라인에서 <code>enable_attention_slicing()</code>를 호출하면 됩니다:",tt,$,et,Y,Me="추론 시간이 약 10% 느려지는 약간의 성능 저하가 있지만 이 방법을 사용하면 3.2GB 정도의 작은 VRAM으로도 Stable Diffusion을 사용할 수 있습니다!",st,A,nt,H,Ue="제한된 VRAM에서 대규모 이미지 배치를 디코딩하거나 32개 이상의 이미지가 포함된 배치를 활성화하기 위해, 배치의 latent 이미지를 한 번에 하나씩 디코딩하는 슬라이스 VAE 디코드를 사용할 수 있습니다.",at,F,me="이를 <code>enable_attention_slicing()</code> 또는 <code>enable_xformers_memory_efficient_attention()</code>과 결합하여 메모리 사용을 추가로 최소화할 수 있습니다.",pt,z,Je="VAE 디코드를 한 번에 하나씩 수행하려면 추론 전에 파이프라인에서 <code>enable_vae_slicing()</code>을 호출합니다. 예를 들어:",it,D,Mt,L,ye="다중 이미지 배치에서 VAE 디코드가 약간의 성능 향상이 이루어집니다. 단일 이미지 배치에서는 성능 영향은 없습니다.",Ut,Vl,mt,q,oe="추가 메모리 절약을 위해 가중치를 CPU로 오프로드하고 순방향 전달을 수행할 때만 GPU로 로드할 수 있습니다.",Jt,O,ce="CPU 오프로딩을 수행하려면 <code>enable_sequential_cpu_offload()</code>를 호출하기만 하면 됩니다:",yt,P,ot,K,we="그러면 메모리 소비를 3GB 미만으로 줄일 수 있습니다.",ct,ll,Te="참고로 이 방법은 전체 모델이 아닌 서브모듈 수준에서 작동합니다. 이는 메모리 소비를 최소화하는 가장 좋은 방법이지만 프로세스의 반복적 특성으로 인해 추론 속도가 훨씬 느립니다. 파이프라인의 UNet 구성 요소는 여러 번 실행됩니다(‘num_inference_steps’ 만큼). 매번 UNet의 서로 다른 서브모듈이 순차적으로 온로드된 다음 필요에 따라 오프로드되므로 메모리 이동 횟수가 많습니다.",wt,f,re='<p>또 다른 최적화 방법인 <a href="#model_offloading">모델 오프로딩</a>을 사용하는 것을 고려하십시오. 이는 훨씬 빠르지만 메모리 절약이 크지는 않습니다.</p>',Tt,tl,ue="또한 ttention slicing과 연결해서 최소 메모리(< 2GB)로도 동작할 수 있습니다.",rt,el,ut,sl,de='<strong>참고</strong>: ‘enable_sequential_cpu_offload()‘를 사용할 때, 미리 파이프라인을 CUDA로 이동하지 <strong>않는</strong> 것이 중요합니다.그렇지 않으면 메모리 소비의 이득이 최소화됩니다. 더 많은 정보를 위해 <a href="https://github.com/huggingface/diffusers/issues/1934" rel="nofollow">이 이슈</a>를 보세요.',dt,Gl,ft,nl,fe='<a href="#sequential_offloading">순차적 CPU 오프로딩</a>은 이전 섹션에서 설명한 것처럼 많은 메모리를 보존하지만 필요에 따라 서브모듈을 GPU로 이동하고 새 모듈이 실행될 때 즉시 CPU로 반환되기 때문에 추론 속도가 느려집니다.',jt,al,je="전체 모델 오프로딩은 각 모델의 구성 요소인 <em>modules</em>을 처리하는 대신, 전체 모델을 GPU로 이동하는 대안입니다. 이로 인해 추론 시간에 미치는 영향은 미미하지만(파이프라인을 ‘cuda’로 이동하는 것과 비교하여) 여전히 약간의 메모리를 절약할 수 있습니다.",ht,pl,he=`이 시나리오에서는 파이프라인의 주요 구성 요소 중 하나만(일반적으로 텍스트 인코더, unet 및 vae) GPU에 있고, 나머지는 CPU에서 대기할 것입니다. | |
| 여러 반복을 위해 실행되는 UNet과 같은 구성 요소는 더 이상 필요하지 않을 때까지 GPU에 남아 있습니다.`,bt,il,be="이 기능은 아래와 같이 파이프라인에서 <code>enable_model_cpu_offload()</code>를 호출하여 활성화할 수 있습니다.",Ct,Ml,It,Ul,Ce="이는 추가적인 메모리 절약을 위한 attention slicing과도 호환됩니다.",Zt,ml,Qt,j,Ie="<p>이 기능을 사용하려면 ‘accelerate’ 버전 0.17.0 이상이 필요합니다.</p>",Bt,Jl,Vt,yl,Ze=`Channels Last 메모리 형식은 차원 순서를 보존하는 메모리에서 NCHW 텐서 배열을 대체하는 방법입니다. | |
| Channels Last 텐서는 채널이 가장 조밀한 차원이 되는 방식으로 정렬됩니다(일명 픽셀당 이미지를 저장). | |
| 현재 모든 연산자 Channels Last 형식을 지원하는 것은 아니라 성능이 저하될 수 있으므로, 사용해보고 모델에 잘 작동하는지 확인하는 것이 좋습니다.`,Gt,ol,Qe="예를 들어 파이프라인의 UNet 모델이 channels Last 형식을 사용하도록 설정하려면 다음을 사용할 수 있습니다:",Et,cl,Rt,wl,Wt,Tl,Be="추적은 모델을 통해 예제 입력 텐서를 통해 실행되는데, 해당 입력이 모델의 레이어를 통과할 때 호출되는 작업을 캡처하여 실행 파일 또는 ‘ScriptFunction’이 반환되도록 하고, 이는 just-in-time 컴파일로 최적화됩니다.",Nt,rl,Ve="UNet 모델을 추적하기 위해 다음을 사용할 수 있습니다:",_t,ul,kt,dl,Ge="그 다음, 파이프라인의 <code>unet</code> 특성을 다음과 같이 추적된 모델로 바꿀 수 있습니다.",vt,fl,St,jl,Xt,hl,Ee=`어텐션 블록의 대역폭을 최적화하는 최근 작업으로 GPU 메모리 사용량이 크게 향상되고 향상되었습니다. | |
| @tridao의 가장 최근의 플래시 어텐션: <a href="https://github.com/HazyResearch/flash-attention" rel="nofollow">code</a>, <a href="https://huggingface.co/papers/2205.14135" rel="nofollow">paper</a>.`,gt,bl,Re="배치 크기 1(프롬프트 1개)의 512x512 크기로 추론을 실행할 때 몇 가지 Nvidia GPU에서 얻은 속도 향상은 다음과 같습니다:",xt,Cl,We="<thead><tr><th>GPU</th> <th>기준 어텐션 FP16</th> <th>메모리 효율적인 어텐션 FP16</th></tr></thead> <tbody><tr><td>NVIDIA Tesla T4</td> <td>3.5it/s</td> <td>5.5it/s</td></tr> <tr><td>NVIDIA 3060 RTX</td> <td>4.6it/s</td> <td>7.8it/s</td></tr> <tr><td>NVIDIA A10G</td> <td>8.88it/s</td> <td>15.6it/s</td></tr> <tr><td>NVIDIA RTX A6000</td> <td>11.7it/s</td> <td>21.09it/s</td></tr> <tr><td>NVIDIA TITAN RTX</td> <td>12.51it/s</td> <td>18.22it/s</td></tr> <tr><td>A100-SXM4-40GB</td> <td>18.6it/s</td> <td>29.it/s</td></tr> <tr><td>A100-SXM-80GB</td> <td>18.7it/s</td> <td>29.5it/s</td></tr></tbody>",$t,Il,Ne="이를 활용하려면 다음을 만족해야 합니다:",Yt,Zl,_e='<li>PyTorch > 1.12</li> <li>Cuda 사용 가능</li> <li><a href="xformers">xformers 라이브러리를 설치함</a></li>',At,Ql,Ht,Bl,Ft,Rl,zt;return b=new He({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),C=new r({props:{title:"메모리와 속도",local:"메모리와-속도",headingTag:"h1"}}),V=new r({props:{title:"cuDNN auto-tuner 활성화하기",local:"cudnn-auto-tuner-활성화하기",headingTag:"h2"}}),R=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEF0b3JjaC5iYWNrZW5kcy5jdWRubi5iZW5jaG1hcmslMjAlM0QlMjBUcnVl",highlighted:`<span class="hljs-keyword">import</span> torch | |
| torch.backends.cudnn.benchmark = <span class="hljs-literal">True</span>`,wrap:!1}}),W=new r({props:{title:"fp32 대신 tf32 사용하기 (Ampere 및 이후 CUDA 장치들에서)",local:"fp32-대신-tf32-사용하기-ampere-및-이후-cuda-장치들에서",headingTag:"h3"}}),_=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEF0b3JjaC5iYWNrZW5kcy5jdWRhLm1hdG11bC5hbGxvd190ZjMyJTIwJTNEJTIwVHJ1ZQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| torch.backends.cuda.matmul.allow_tf32 = <span class="hljs-literal">True</span>`,wrap:!1}}),k=new r({props:{title:"반정밀도 가중치",local:"반정밀도-가중치",headingTag:"h2"}}),S=new w({props:{code:"cGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMEElMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMEEpJTBBcGlwZSUyMCUzRCUyMHBpcGUudG8oJTIyY3VkYSUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJhJTIwcGhvdG8lMjBvZiUyMGFuJTIwYXN0cm9uYXV0JTIwcmlkaW5nJTIwYSUyMGhvcnNlJTIwb24lMjBtYXJzJTIyJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),X=new r({props:{title:"추가 메모리 절약을 위한 슬라이스 어텐션",local:"추가-메모리-절약을-위한-슬라이스-어텐션",headingTag:"h2"}}),$=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEFwaXBlJTIwJTNEJTIwcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9hdHRlbnRpb25fc2xpY2luZygpJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_attention_slicing() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),A=new r({props:{title:"더 큰 배치를 위한 sliced VAE 디코드",local:"더-큰-배치를-위한-sliced-vae-디코드",headingTag:"h2"}}),D=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEFwaXBlJTIwJTNEJTIwcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV92YWVfc2xpY2luZygpJTBBaW1hZ2VzJTIwJTNEJTIwcGlwZSglNUJwcm9tcHQlNUQlMjAqJTIwMzIpLmltYWdlcw==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_vae_slicing() | |
| images = pipe([prompt] * <span class="hljs-number">32</span>).images`,wrap:!1}}),P=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJhJTIwcGhvdG8lMjBvZiUyMGFuJTIwYXN0cm9uYXV0JTIwcmlkaW5nJTIwYSUyMGhvcnNlJTIwb24lMjBtYXJzJTIyJTBBcGlwZS5lbmFibGVfc2VxdWVudGlhbF9jcHVfb2ZmbG9hZCgpJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_sequential_cpu_offload() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),el=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJhJTIwcGhvdG8lMjBvZiUyMGFuJTIwYXN0cm9uYXV0JTIwcmlkaW5nJTIwYSUyMGhvcnNlJTIwb24lMjBtYXJzJTIyJTBBcGlwZS5lbmFibGVfc2VxdWVudGlhbF9jcHVfb2ZmbG9hZCgpJTBBcGlwZS5lbmFibGVfYXR0ZW50aW9uX3NsaWNpbmcoMSklMEElMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_sequential_cpu_offload() | |
| pipe.enable_attention_slicing(<span class="hljs-number">1</span>) | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),Ml=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJhJTIwcGhvdG8lMjBvZiUyMGFuJTIwYXN0cm9uYXV0JTIwcmlkaW5nJTIwYSUyMGhvcnNlJTIwb24lMjBtYXJzJTIyJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQWltYWdlJTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQ=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_model_cpu_offload() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),ml=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJhJTIwcGhvdG8lMjBvZiUyMGFuJTIwYXN0cm9uYXV0JTIwcmlkaW5nJTIwYSUyMGhvcnNlJTIwb24lMjBtYXJzJTIyJTBBcGlwZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQXBpcGUuZW5hYmxlX2F0dGVudGlvbl9zbGljaW5nKDEpJTBBJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_model_cpu_offload() | |
| pipe.enable_attention_slicing(<span class="hljs-number">1</span>) | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),Jl=new r({props:{title:"Channels Last 메모리 형식 사용하기",local:"channels-last-메모리-형식-사용하기",headingTag:"h2"}}),cl=new w({props:{code:"cHJpbnQocGlwZS51bmV0LmNvbnZfb3V0LnN0YXRlX2RpY3QoKSU1QiUyMndlaWdodCUyMiU1RC5zdHJpZGUoKSklMjAlMjAlMjMlMjAoMjg4MCUyQyUyMDklMkMlMjAzJTJDJTIwMSklMEFwaXBlLnVuZXQudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTIwJTIwJTIzJTIwaW4tcGxhY2UlMjAlRUMlOTclQjAlRUMlODIlQjAlMEElMjMlMjAyJUVCJUIyJTg4JUVDJUE3JUI4JTIwJUVDJUIwJUE4JUVDJTlCJTkwJUVDJTk3JTkwJUVDJTg0JTlDJTIwJUVDJThBJUE0JUVEJThBJUI4JUVCJTlEJUJDJUVDJTlEJUI0JUVCJTkzJTlDJTIwMSVFQyU5RCU4NCUyMCVFQSVCMCU4MCVFQyVBNyU4MCVFQiU4QSU5NCUyMCgyODgwJTJDJTIwMSUyQyUyMDk2MCUyQyUyMDMyMCklRUIlQTElOUMlMkMlMjAlRUMlOTclQjAlRUMlODIlQjAlRUMlOUQlQjQlMjAlRUMlOUUlOTElRUIlOEYlOTklRUQlOTUlQTglRUMlOUQlODQlMjAlRUMlQTYlOUQlRUIlQUElODUlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQuJTBBcHJpbnQocGlwZS51bmV0LmNvbnZfb3V0LnN0YXRlX2RpY3QoKSU1QiUyMndlaWdodCUyMiU1RC5zdHJpZGUoKSk=",highlighted:`<span class="hljs-built_in">print</span>(pipe.unet.conv_out.state_dict()[<span class="hljs-string">"weight"</span>].stride()) <span class="hljs-comment"># (2880, 9, 3, 1)</span> | |
| pipe.unet.to(memory_format=torch.channels_last) <span class="hljs-comment"># in-place 연산</span> | |
| <span class="hljs-comment"># 2번째 차원에서 스트라이드 1을 가지는 (2880, 1, 960, 320)로, 연산이 작동함을 증명합니다.</span> | |
| <span class="hljs-built_in">print</span>(pipe.unet.conv_out.state_dict()[<span class="hljs-string">"weight"</span>].stride())`,wrap:!1}}),wl=new r({props:{title:"추적(tracing)",local:"추적tracing",headingTag:"h2"}}),ul=new w({props:{code:"aW1wb3J0JTIwdGltZSUwQWltcG9ydCUyMHRvcmNoJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwZnVuY3Rvb2xzJTBBJTBBJTIzJTIwdG9yY2glMjAlRUElQjglQjAlRUMlOUElQjglRUElQjglQjAlMjAlRUIlQjklODQlRUQlOTklOUMlRUMlODQlQjElRUQlOTklOTQlMEF0b3JjaC5zZXRfZ3JhZF9lbmFibGVkKEZhbHNlKSUwQSUwQSUyMyUyMCVFQiVCMyU4MCVFQyU4OCU5OCUyMCVFQyU4NCVBNCVFQyVBMCU5NSUwQW5fZXhwZXJpbWVudHMlMjAlM0QlMjAyJTBBdW5ldF9ydW5zX3Blcl9leHBlcmltZW50JTIwJTNEJTIwNTAlMEElMEElMEElMjMlMjAlRUMlOUUlODUlRUIlQTAlQTUlMjAlRUIlQjYlODglRUIlOUYlQUMlRUMlOTglQTQlRUElQjglQjAlMEFkZWYlMjBnZW5lcmF0ZV9pbnB1dHMoKSUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZSUyMCUzRCUyMHRvcmNoLnJhbmRuKCgyJTJDJTIwNCUyQyUyMDY0JTJDJTIwNjQpJTJDJTIwZGV2aWNlJTNEJTIyY3VkYSUyMiUyQyUyMGR0eXBlJTNEdG9yY2guZmxvYXQxNiklMEElMjAlMjAlMjAlMjB0aW1lc3RlcCUyMCUzRCUyMHRvcmNoLnJhbmQoMSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTIwKiUyMDk5OSUwQSUyMCUyMCUyMCUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyUyMCUzRCUyMHRvcmNoLnJhbmRuKCgyJTJDJTIwNzclMkMlMjA3NjgpJTJDJTIwZGV2aWNlJTNEJTIyY3VkYSUyMiUyQyUyMGR0eXBlJTNEdG9yY2guZmxvYXQxNiklMEElMjAlMjAlMjAlMjByZXR1cm4lMjBzYW1wbGUlMkMlMjB0aW1lc3RlcCUyQyUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyUwQSUwQSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyc3RhYmxlLWRpZmZ1c2lvbi12MS01JTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKS50byglMjJjdWRhJTIyKSUwQXVuZXQlMjAlM0QlMjBwaXBlLnVuZXQlMEF1bmV0LmV2YWwoKSUwQXVuZXQudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTIwJTIwJTIzJTIwQ2hhbm5lbHMlMjBMYXN0JTIwJUVCJUE5JTk0JUVCJUFBJUE4JUVCJUE2JUFDJTIwJUVEJTk4JTk1JUVDJThCJTlEJTIwJUVDJTgyJUFDJUVDJTlBJUE5JTBBdW5ldC5mb3J3YXJkJTIwJTNEJTIwZnVuY3Rvb2xzLnBhcnRpYWwodW5ldC5mb3J3YXJkJTJDJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSklMjAlMjAlMjMlMjByZXR1cm5fZGljdCUzREZhbHNlJUVDJTlEJTg0JTIwJUVBJUI4JUIwJUVCJUIzJUI4JUVBJUIwJTkyJUVDJTlDJUJDJUVCJUExJTlDJTIwJUVDJTg0JUE0JUVDJUEwJTk1JTBBJTBBJTIzJTIwJUVDJTlCJThDJUVCJUIwJThEJUVDJTk3JTg1JTBBZm9yJTIwXyUyMGluJTIwcmFuZ2UoMyklM0ElMEElMjAlMjAlMjAlMjB3aXRoJTIwdG9yY2guaW5mZXJlbmNlX21vZGUoKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGlucHV0cyUyMCUzRCUyMGdlbmVyYXRlX2lucHV0cygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3JpZ19vdXRwdXQlMjAlM0QlMjB1bmV0KCppbnB1dHMpJTBBJTBBJTIzJTIwJUVDJUI2JTk0JUVDJUEwJTgxJTBBcHJpbnQoJTIydHJhY2luZy4uJTIyKSUwQXVuZXRfdHJhY2VkJTIwJTNEJTIwdG9yY2guaml0LnRyYWNlKHVuZXQlMkMlMjBpbnB1dHMpJTBBdW5ldF90cmFjZWQuZXZhbCgpJTBBcHJpbnQoJTIyZG9uZSUyMHRyYWNpbmclMjIpJTBBJTBBJTBBJTIzJTIwJUVDJTlCJThDJUVCJUIwJThEJUVDJTk3JTg1JTIwJUVCJUIwJThGJTIwJUVBJUI3JUI4JUVCJTlFJTk4JUVEJTk0JTg0JTIwJUVDJUI1JTlDJUVDJUEwJTgxJUVEJTk5JTk0JTBBZm9yJTIwXyUyMGluJTIwcmFuZ2UoNSklM0ElMEElMjAlMjAlMjAlMjB3aXRoJTIwdG9yY2guaW5mZXJlbmNlX21vZGUoKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGlucHV0cyUyMCUzRCUyMGdlbmVyYXRlX2lucHV0cygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3JpZ19vdXRwdXQlMjAlM0QlMjB1bmV0X3RyYWNlZCgqaW5wdXRzKSUwQSUwQSUwQSUyMyUyMCVFQiVCMiVBNCVFQyVCOSU5OCVFQiVBNyU4OCVFRCU4MiVCOSUwQXdpdGglMjB0b3JjaC5pbmZlcmVuY2VfbW9kZSgpJTNBJTBBJTIwJTIwJTIwJTIwZm9yJTIwXyUyMGluJTIwcmFuZ2Uobl9leHBlcmltZW50cyklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0b3JjaC5jdWRhLnN5bmNocm9uaXplKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzdGFydF90aW1lJTIwJTNEJTIwdGltZS50aW1lKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmb3IlMjBfJTIwaW4lMjByYW5nZSh1bmV0X3J1bnNfcGVyX2V4cGVyaW1lbnQpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3JpZ19vdXRwdXQlMjAlM0QlMjB1bmV0X3RyYWNlZCgqaW5wdXRzKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRvcmNoLmN1ZGEuc3luY2hyb25pemUoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHByaW50KGYlMjJ1bmV0JTIwdHJhY2VkJTIwaW5mZXJlbmNlJTIwdG9vayUyMCU3QnRpbWUudGltZSgpJTIwLSUyMHN0YXJ0X3RpbWUlM0EuMmYlN0QlMjBzZWNvbmRzJTIyKSUwQSUyMCUyMCUyMCUyMGZvciUyMF8lMjBpbiUyMHJhbmdlKG5fZXhwZXJpbWVudHMpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5zeW5jaHJvbml6ZSgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3RhcnRfdGltZSUyMCUzRCUyMHRpbWUudGltZSgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZm9yJTIwXyUyMGluJTIwcmFuZ2UodW5ldF9ydW5zX3Blcl9leHBlcmltZW50KSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG9yaWdfb3V0cHV0JTIwJTNEJTIwdW5ldCgqaW5wdXRzKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRvcmNoLmN1ZGEuc3luY2hyb25pemUoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHByaW50KGYlMjJ1bmV0JTIwaW5mZXJlbmNlJTIwdG9vayUyMCU3QnRpbWUudGltZSgpJTIwLSUyMHN0YXJ0X3RpbWUlM0EuMmYlN0QlMjBzZWNvbmRzJTIyKSUwQSUwQSUyMyUyMCVFQiVBQSVBOCVFQiU4RCVCOCUyMCVFQyVBMCU4MCVFQyU5RSVBNSUwQXVuZXRfdHJhY2VkLnNhdmUoJTIydW5ldF90cmFjZWQucHQlMjIp",highlighted:`<span class="hljs-keyword">import</span> time | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> functools | |
| <span class="hljs-comment"># torch 기울기 비활성화</span> | |
| torch.set_grad_enabled(<span class="hljs-literal">False</span>) | |
| <span class="hljs-comment"># 변수 설정</span> | |
| n_experiments = <span class="hljs-number">2</span> | |
| unet_runs_per_experiment = <span class="hljs-number">50</span> | |
| <span class="hljs-comment"># 입력 불러오기</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">generate_inputs</span>(): | |
| sample = torch.randn((<span class="hljs-number">2</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) | |
| timestep = torch.rand(<span class="hljs-number">1</span>, device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) * <span class="hljs-number">999</span> | |
| encoder_hidden_states = torch.randn((<span class="hljs-number">2</span>, <span class="hljs-number">77</span>, <span class="hljs-number">768</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) | |
| <span class="hljs-keyword">return</span> sample, timestep, encoder_hidden_states | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| unet = pipe.unet | |
| unet.<span class="hljs-built_in">eval</span>() | |
| unet.to(memory_format=torch.channels_last) <span class="hljs-comment"># Channels Last 메모리 형식 사용</span> | |
| unet.forward = functools.partial(unet.forward, return_dict=<span class="hljs-literal">False</span>) <span class="hljs-comment"># return_dict=False을 기본값으로 설정</span> | |
| <span class="hljs-comment"># 워밍업</span> | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">3</span>): | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| inputs = generate_inputs() | |
| orig_output = unet(*inputs) | |
| <span class="hljs-comment"># 추적</span> | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"tracing.."</span>) | |
| unet_traced = torch.jit.trace(unet, inputs) | |
| unet_traced.<span class="hljs-built_in">eval</span>() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"done tracing"</span>) | |
| <span class="hljs-comment"># 워밍업 및 그래프 최적화</span> | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">5</span>): | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| inputs = generate_inputs() | |
| orig_output = unet_traced(*inputs) | |
| <span class="hljs-comment"># 벤치마킹</span> | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(n_experiments): | |
| torch.cuda.synchronize() | |
| start_time = time.time() | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(unet_runs_per_experiment): | |
| orig_output = unet_traced(*inputs) | |
| torch.cuda.synchronize() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"unet traced inference took <span class="hljs-subst">{time.time() - start_time:<span class="hljs-number">.2</span>f}</span> seconds"</span>) | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(n_experiments): | |
| torch.cuda.synchronize() | |
| start_time = time.time() | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(unet_runs_per_experiment): | |
| orig_output = unet(*inputs) | |
| torch.cuda.synchronize() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"unet inference took <span class="hljs-subst">{time.time() - start_time:<span class="hljs-number">.2</span>f}</span> seconds"</span>) | |
| <span class="hljs-comment"># 모델 저장</span> | |
| unet_traced.save(<span class="hljs-string">"unet_traced.pt"</span>)`,wrap:!1}}),fl=new w({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGF0YWNsYXNzZXMlMjBpbXBvcnQlMjBkYXRhY2xhc3MlMEElMEElMEElNDBkYXRhY2xhc3MlMEFjbGFzcyUyMFVOZXQyRENvbmRpdGlvbk91dHB1dCUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZSUzQSUyMHRvcmNoLlRlbnNvciUwQSUwQSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyc3RhYmxlLWRpZmZ1c2lvbi12MS01JTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKS50byglMjJjdWRhJTIyKSUwQSUwQSUyMyUyMGppdHRlZCUyMHVuZXQlMjAlRUMlODIlQUMlRUMlOUElQTklMEF1bmV0X3RyYWNlZCUyMCUzRCUyMHRvcmNoLmppdC5sb2FkKCUyMnVuZXRfdHJhY2VkLnB0JTIyKSUwQSUwQSUwQSUyMyUyMHBpcGUudW5ldCUyMCVFQyU4MiVBRCVFQyVBMCU5QyUwQWNsYXNzJTIwVHJhY2VkVU5ldCh0b3JjaC5ubi5Nb2R1bGUpJTNBJTBBJTIwJTIwJTIwJTIwZGVmJTIwX19pbml0X18oc2VsZiklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzdXBlcigpLl9faW5pdF9fKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzZWxmLmluX2NoYW5uZWxzJTIwJTNEJTIwcGlwZS51bmV0LmNvbmZpZy5pbl9jaGFubmVscyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYuZGV2aWNlJTIwJTNEJTIwcGlwZS51bmV0LmRldmljZSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMGxhdGVudF9tb2RlbF9pbnB1dCUyQyUyMHQlMkMlMjBlbmNvZGVyX2hpZGRlbl9zdGF0ZXMpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2FtcGxlJTIwJTNEJTIwdW5ldF90cmFjZWQobGF0ZW50X21vZGVsX2lucHV0JTJDJTIwdCUyQyUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyklNUIwJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwVU5ldDJEQ29uZGl0aW9uT3V0cHV0KHNhbXBsZSUzRHNhbXBsZSklMEElMEElMEFwaXBlLnVuZXQlMjAlM0QlMjBUcmFjZWRVTmV0KCklMEElMEF3aXRoJTIwdG9yY2guaW5mZXJlbmNlX21vZGUoKSUzQSUwQSUyMCUyMCUyMCUyMGltYWdlJTIwJTNEJTIwcGlwZSglNUJwcm9tcHQlNUQlMjAqJTIwMSUyQyUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0Q1MCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> dataclasses <span class="hljs-keyword">import</span> dataclass | |
| <span class="hljs-meta">@dataclass</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">UNet2DConditionOutput</span>: | |
| sample: torch.Tensor | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># jitted unet 사용</span> | |
| unet_traced = torch.jit.load(<span class="hljs-string">"unet_traced.pt"</span>) | |
| <span class="hljs-comment"># pipe.unet 삭제</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">TracedUNet</span>(torch.nn.Module): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>): | |
| <span class="hljs-built_in">super</span>().__init__() | |
| self.in_channels = pipe.unet.config.in_channels | |
| self.device = pipe.unet.device | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, latent_model_input, t, encoder_hidden_states</span>): | |
| sample = unet_traced(latent_model_input, t, encoder_hidden_states)[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> UNet2DConditionOutput(sample=sample) | |
| pipe.unet = TracedUNet() | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| image = pipe([prompt] * <span class="hljs-number">1</span>, num_inference_steps=<span class="hljs-number">50</span>).images[<span class="hljs-number">0</span>]`,wrap:!1}}),jl=new r({props:{title:"Memory-efficient attention",local:"memory-efficient-attention",headingTag:"h2"}}),Ql=new w({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwdG9yY2glMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSkudG8oJTIyY3VkYSUyMiklMEElMEFwaXBlLmVuYWJsZV94Zm9ybWVyc19tZW1vcnlfZWZmaWNpZW50X2F0dGVudGlvbigpJTBBJTBBd2l0aCUyMHRvcmNoLmluZmVyZW5jZV9tb2RlKCklM0ElMEElMjAlMjAlMjAlMjBzYW1wbGUlMjAlM0QlMjBwaXBlKCUyMmElMjBzbWFsbCUyMGNhdCUyMiklMEElMEElMjMlMjAlRUMlODQlQTAlRUQlODMlOUQlM0ElMjAlRUMlOUQlQjQlRUIlQTUlQkMlMjAlRUIlQjklODQlRUQlOTklOUMlRUMlODQlQjElRUQlOTklOTQlMjAlRUQlOTUlOTglRUElQjglQjAlMjAlRUMlOUMlODQlRUQlOTUlQjQlMjAlRUIlOEIlQTQlRUMlOUQlOEMlRUMlOUQlODQlMjAlRUMlODIlQUMlRUMlOUElQTklRUQlOTUlQTAlMjAlRUMlODglOTglMjAlRUMlOUUlODglRUMlOEElQjUlRUIlOEIlODglRUIlOEIlQTQuJTBBJTIzJTIwcGlwZS5kaXNhYmxlX3hmb3JtZXJzX21lbW9yeV9lZmZpY2llbnRfYXR0ZW50aW9uKCk=",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| sample = pipe(<span class="hljs-string">"a small cat"</span>) | |
| <span class="hljs-comment"># 선택: 이를 비활성화 하기 위해 다음을 사용할 수 있습니다.</span> | |
| <span class="hljs-comment"># pipe.disable_xformers_memory_efficient_attention()</span>`,wrap:!1}}),Bl=new Fe({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/ko/optimization/fp16.md"}}),{c(){T=p("meta"),Wl=n(),El=p("p"),Nl=n(),U(b.$$.fragment),_l=n(),U(C.$$.fragment),kl=n(),I=p("p"),I.innerHTML=qt,vl=n(),Z=p("p"),Z.textContent=Ot,Sl=n(),Q=p("table"),Q.innerHTML=Pt,Xl=n(),B=p("em"),B.textContent=Kt,gl=n(),U(V.$$.fragment),xl=n(),G=p("p"),G.innerHTML=le,$l=n(),E=p("p"),E.innerHTML=te,Yl=n(),U(R.$$.fragment),Al=n(),U(W.$$.fragment),Hl=n(),N=p("p"),N.innerHTML=ee,Fl=n(),U(_.$$.fragment),zl=n(),U(k.$$.fragment),Dl=n(),v=p("p"),v.innerHTML=se,Ll=n(),U(S.$$.fragment),ql=n(),u=p("blockquote"),u.innerHTML=ne,Ol=n(),U(X.$$.fragment),Pl=n(),g=p("p"),g.textContent=ae,Kl=n(),d=p("blockquote"),d.innerHTML=pe,lt=n(),x=p("p"),x.innerHTML=ie,tt=n(),U($.$$.fragment),et=n(),Y=p("p"),Y.textContent=Me,st=n(),U(A.$$.fragment),nt=n(),H=p("p"),H.textContent=Ue,at=n(),F=p("p"),F.innerHTML=me,pt=n(),z=p("p"),z.innerHTML=Je,it=n(),U(D.$$.fragment),Mt=n(),L=p("p"),L.textContent=ye,Ut=n(),Vl=p("a"),mt=ke(` | |
| ## 메모리 절약을 위해 가속 기능을 사용하여 CPU로 오프로딩 | |
| `),q=p("p"),q.textContent=oe,Jt=n(),O=p("p"),O.innerHTML=ce,yt=n(),U(P.$$.fragment),ot=n(),K=p("p"),K.textContent=we,ct=n(),ll=p("p"),ll.textContent=Te,wt=n(),f=p("blockquote"),f.innerHTML=re,Tt=n(),tl=p("p"),tl.textContent=ue,rt=n(),U(el.$$.fragment),ut=n(),sl=p("p"),sl.innerHTML=de,dt=n(),Gl=p("a"),ft=ke(` | |
| ## 빠른 추론과 메모리 메모리 절약을 위한 모델 오프로딩 | |
| `),nl=p("p"),nl.innerHTML=fe,jt=n(),al=p("p"),al.innerHTML=je,ht=n(),pl=p("p"),pl.textContent=he,bt=n(),il=p("p"),il.innerHTML=be,Ct=n(),U(Ml.$$.fragment),It=n(),Ul=p("p"),Ul.textContent=Ce,Zt=n(),U(ml.$$.fragment),Qt=n(),j=p("blockquote"),j.innerHTML=Ie,Bt=n(),U(Jl.$$.fragment),Vt=n(),yl=p("p"),yl.textContent=Ze,Gt=n(),ol=p("p"),ol.textContent=Qe,Et=n(),U(cl.$$.fragment),Rt=n(),U(wl.$$.fragment),Wt=n(),Tl=p("p"),Tl.textContent=Be,Nt=n(),rl=p("p"),rl.textContent=Ve,_t=n(),U(ul.$$.fragment),kt=n(),dl=p("p"),dl.innerHTML=Ge,vt=n(),U(fl.$$.fragment),St=n(),U(jl.$$.fragment),Xt=n(),hl=p("p"),hl.innerHTML=Ee,gt=n(),bl=p("p"),bl.textContent=Re,xt=n(),Cl=p("table"),Cl.innerHTML=We,$t=n(),Il=p("p"),Il.textContent=Ne,Yt=n(),Zl=p("ul"),Zl.innerHTML=_e,At=n(),U(Ql.$$.fragment),Ht=n(),U(Bl.$$.fragment),Ft=n(),Rl=p("p"),this.h()},l(l){const t=Ye("svelte-u9bgzb",document.head);T=i(t,"META",{name:!0,content:!0}),t.forEach(e),Wl=a(l),El=i(l,"P",{}),Dt(El).forEach(e),Nl=a(l),m(b.$$.fragment,l),_l=a(l),m(C.$$.fragment,l),kl=a(l),I=i(l,"P",{"data-svelte-h":!0}),M(I)!=="svelte-m9figs"&&(I.innerHTML=qt),vl=a(l),Z=i(l,"P",{"data-svelte-h":!0}),M(Z)!=="svelte-12e7d13"&&(Z.textContent=Ot),Sl=a(l),Q=i(l,"TABLE",{"data-svelte-h":!0}),M(Q)!=="svelte-1sy2nlq"&&(Q.innerHTML=Pt),Xl=a(l),B=i(l,"EM",{"data-svelte-h":!0}),M(B)!=="svelte-1iy2bqt"&&(B.textContent=Kt),gl=a(l),m(V.$$.fragment,l),xl=a(l),G=i(l,"P",{"data-svelte-h":!0}),M(G)!=="svelte-1q6vy6i"&&(G.innerHTML=le),$l=a(l),E=i(l,"P",{"data-svelte-h":!0}),M(E)!=="svelte-1xwtu1p"&&(E.innerHTML=te),Yl=a(l),m(R.$$.fragment,l),Al=a(l),m(W.$$.fragment,l),Hl=a(l),N=i(l,"P",{"data-svelte-h":!0}),M(N)!=="svelte-1gm9l4i"&&(N.innerHTML=ee),Fl=a(l),m(_.$$.fragment,l),zl=a(l),m(k.$$.fragment,l),Dl=a(l),v=i(l,"P",{"data-svelte-h":!0}),M(v)!=="svelte-1y9jw5r"&&(v.innerHTML=se),Ll=a(l),m(S.$$.fragment,l),ql=a(l),u=i(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),M(u)!=="svelte-1o2dyhe"&&(u.innerHTML=ne),Ol=a(l),m(X.$$.fragment,l),Pl=a(l),g=i(l,"P",{"data-svelte-h":!0}),M(g)!=="svelte-97drxa"&&(g.textContent=ae),Kl=a(l),d=i(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),M(d)!=="svelte-8hlvqr"&&(d.innerHTML=pe),lt=a(l),x=i(l,"P",{"data-svelte-h":!0}),M(x)!=="svelte-19j5lzh"&&(x.innerHTML=ie),tt=a(l),m($.$$.fragment,l),et=a(l),Y=i(l,"P",{"data-svelte-h":!0}),M(Y)!=="svelte-1809mre"&&(Y.textContent=Me),st=a(l),m(A.$$.fragment,l),nt=a(l),H=i(l,"P",{"data-svelte-h":!0}),M(H)!=="svelte-1klv9ve"&&(H.textContent=Ue),at=a(l),F=i(l,"P",{"data-svelte-h":!0}),M(F)!=="svelte-1bo4p0c"&&(F.innerHTML=me),pt=a(l),z=i(l,"P",{"data-svelte-h":!0}),M(z)!=="svelte-j8mqed"&&(z.innerHTML=Je),it=a(l),m(D.$$.fragment,l),Mt=a(l),L=i(l,"P",{"data-svelte-h":!0}),M(L)!=="svelte-1l99s96"&&(L.textContent=ye),Ut=a(l),Vl=i(l,"A",{name:!0}),Dt(Vl).forEach(e),mt=ve(l,` | |
| ## 메모리 절약을 위해 가속 기능을 사용하여 CPU로 오프로딩 | |
| `),q=i(l,"P",{"data-svelte-h":!0}),M(q)!=="svelte-131mxrq"&&(q.textContent=oe),Jt=a(l),O=i(l,"P",{"data-svelte-h":!0}),M(O)!=="svelte-f0b7n3"&&(O.innerHTML=ce),yt=a(l),m(P.$$.fragment,l),ot=a(l),K=i(l,"P",{"data-svelte-h":!0}),M(K)!=="svelte-tablwz"&&(K.textContent=we),ct=a(l),ll=i(l,"P",{"data-svelte-h":!0}),M(ll)!=="svelte-1tbver6"&&(ll.textContent=Te),wt=a(l),f=i(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),M(f)!=="svelte-y7x6e7"&&(f.innerHTML=re),Tt=a(l),tl=i(l,"P",{"data-svelte-h":!0}),M(tl)!=="svelte-1hfwhk5"&&(tl.textContent=ue),rt=a(l),m(el.$$.fragment,l),ut=a(l),sl=i(l,"P",{"data-svelte-h":!0}),M(sl)!=="svelte-ax3hx7"&&(sl.innerHTML=de),dt=a(l),Gl=i(l,"A",{name:!0}),Dt(Gl).forEach(e),ft=ve(l,` | |
| ## 빠른 추론과 메모리 메모리 절약을 위한 모델 오프로딩 | |
| `),nl=i(l,"P",{"data-svelte-h":!0}),M(nl)!=="svelte-7dwxx7"&&(nl.innerHTML=fe),jt=a(l),al=i(l,"P",{"data-svelte-h":!0}),M(al)!=="svelte-1llz1y7"&&(al.innerHTML=je),ht=a(l),pl=i(l,"P",{"data-svelte-h":!0}),M(pl)!=="svelte-1hmauk"&&(pl.textContent=he),bt=a(l),il=i(l,"P",{"data-svelte-h":!0}),M(il)!=="svelte-1gllwmo"&&(il.innerHTML=be),Ct=a(l),m(Ml.$$.fragment,l),It=a(l),Ul=i(l,"P",{"data-svelte-h":!0}),M(Ul)!=="svelte-5q5hse"&&(Ul.textContent=Ce),Zt=a(l),m(ml.$$.fragment,l),Qt=a(l),j=i(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),M(j)!=="svelte-1e9waf1"&&(j.innerHTML=Ie),Bt=a(l),m(Jl.$$.fragment,l),Vt=a(l),yl=i(l,"P",{"data-svelte-h":!0}),M(yl)!=="svelte-1rrl8zz"&&(yl.textContent=Ze),Gt=a(l),ol=i(l,"P",{"data-svelte-h":!0}),M(ol)!=="svelte-1c0oa55"&&(ol.textContent=Qe),Et=a(l),m(cl.$$.fragment,l),Rt=a(l),m(wl.$$.fragment,l),Wt=a(l),Tl=i(l,"P",{"data-svelte-h":!0}),M(Tl)!=="svelte-1jkjfr1"&&(Tl.textContent=Be),Nt=a(l),rl=i(l,"P",{"data-svelte-h":!0}),M(rl)!=="svelte-mqbplb"&&(rl.textContent=Ve),_t=a(l),m(ul.$$.fragment,l),kt=a(l),dl=i(l,"P",{"data-svelte-h":!0}),M(dl)!=="svelte-1ewkmr2"&&(dl.innerHTML=Ge),vt=a(l),m(fl.$$.fragment,l),St=a(l),m(jl.$$.fragment,l),Xt=a(l),hl=i(l,"P",{"data-svelte-h":!0}),M(hl)!=="svelte-1ele8ui"&&(hl.innerHTML=Ee),gt=a(l),bl=i(l,"P",{"data-svelte-h":!0}),M(bl)!=="svelte-1aa24j0"&&(bl.textContent=Re),xt=a(l),Cl=i(l,"TABLE",{"data-svelte-h":!0}),M(Cl)!=="svelte-13acbqe"&&(Cl.innerHTML=We),$t=a(l),Il=i(l,"P",{"data-svelte-h":!0}),M(Il)!=="svelte-j9132p"&&(Il.textContent=Ne),Yt=a(l),Zl=i(l,"UL",{"data-svelte-h":!0}),M(Zl)!=="svelte-gqxwyg"&&(Zl.innerHTML=_e),At=a(l),m(Ql.$$.fragment,l),Ht=a(l),m(Bl.$$.fragment,l),Ft=a(l),Rl=i(l,"P",{}),Dt(Rl).forEach(e),this.h()},h(){h(T,"name","hf:doc:metadata"),h(T,"content",De),h(u,"class","warning"),h(d,"class","tip"),h(Vl,"name","sequential_offloading"),h(f,"class","tip"),h(Gl,"name","model_offloading"),h(j,"class","tip")},m(l,t){Ae(document.head,T),s(l,Wl,t),s(l,El,t),s(l,Nl,t),J(b,l,t),s(l,_l,t),J(C,l,t),s(l,kl,t),s(l,I,t),s(l,vl,t),s(l,Z,t),s(l,Sl,t),s(l,Q,t),s(l,Xl,t),s(l,B,t),s(l,gl,t),J(V,l,t),s(l,xl,t),s(l,G,t),s(l,$l,t),s(l,E,t),s(l,Yl,t),J(R,l,t),s(l,Al,t),J(W,l,t),s(l,Hl,t),s(l,N,t),s(l,Fl,t),J(_,l,t),s(l,zl,t),J(k,l,t),s(l,Dl,t),s(l,v,t),s(l,Ll,t),J(S,l,t),s(l,ql,t),s(l,u,t),s(l,Ol,t),J(X,l,t),s(l,Pl,t),s(l,g,t),s(l,Kl,t),s(l,d,t),s(l,lt,t),s(l,x,t),s(l,tt,t),J($,l,t),s(l,et,t),s(l,Y,t),s(l,st,t),J(A,l,t),s(l,nt,t),s(l,H,t),s(l,at,t),s(l,F,t),s(l,pt,t),s(l,z,t),s(l,it,t),J(D,l,t),s(l,Mt,t),s(l,L,t),s(l,Ut,t),s(l,Vl,t),s(l,mt,t),s(l,q,t),s(l,Jt,t),s(l,O,t),s(l,yt,t),J(P,l,t),s(l,ot,t),s(l,K,t),s(l,ct,t),s(l,ll,t),s(l,wt,t),s(l,f,t),s(l,Tt,t),s(l,tl,t),s(l,rt,t),J(el,l,t),s(l,ut,t),s(l,sl,t),s(l,dt,t),s(l,Gl,t),s(l,ft,t),s(l,nl,t),s(l,jt,t),s(l,al,t),s(l,ht,t),s(l,pl,t),s(l,bt,t),s(l,il,t),s(l,Ct,t),J(Ml,l,t),s(l,It,t),s(l,Ul,t),s(l,Zt,t),J(ml,l,t),s(l,Qt,t),s(l,j,t),s(l,Bt,t),J(Jl,l,t),s(l,Vt,t),s(l,yl,t),s(l,Gt,t),s(l,ol,t),s(l,Et,t),J(cl,l,t),s(l,Rt,t),J(wl,l,t),s(l,Wt,t),s(l,Tl,t),s(l,Nt,t),s(l,rl,t),s(l,_t,t),J(ul,l,t),s(l,kt,t),s(l,dl,t),s(l,vt,t),J(fl,l,t),s(l,St,t),J(jl,l,t),s(l,Xt,t),s(l,hl,t),s(l,gt,t),s(l,bl,t),s(l,xt,t),s(l,Cl,t),s(l,$t,t),s(l,Il,t),s(l,Yt,t),s(l,Zl,t),s(l,At,t),J(Ql,l,t),s(l,Ht,t),J(Bl,l,t),s(l,Ft,t),s(l,Rl,t),zt=!0},p:Xe,i(l){zt||(y(b.$$.fragment,l),y(C.$$.fragment,l),y(V.$$.fragment,l),y(R.$$.fragment,l),y(W.$$.fragment,l),y(_.$$.fragment,l),y(k.$$.fragment,l),y(S.$$.fragment,l),y(X.$$.fragment,l),y($.$$.fragment,l),y(A.$$.fragment,l),y(D.$$.fragment,l),y(P.$$.fragment,l),y(el.$$.fragment,l),y(Ml.$$.fragment,l),y(ml.$$.fragment,l),y(Jl.$$.fragment,l),y(cl.$$.fragment,l),y(wl.$$.fragment,l),y(ul.$$.fragment,l),y(fl.$$.fragment,l),y(jl.$$.fragment,l),y(Ql.$$.fragment,l),y(Bl.$$.fragment,l),zt=!0)},o(l){o(b.$$.fragment,l),o(C.$$.fragment,l),o(V.$$.fragment,l),o(R.$$.fragment,l),o(W.$$.fragment,l),o(_.$$.fragment,l),o(k.$$.fragment,l),o(S.$$.fragment,l),o(X.$$.fragment,l),o($.$$.fragment,l),o(A.$$.fragment,l),o(D.$$.fragment,l),o(P.$$.fragment,l),o(el.$$.fragment,l),o(Ml.$$.fragment,l),o(ml.$$.fragment,l),o(Jl.$$.fragment,l),o(cl.$$.fragment,l),o(wl.$$.fragment,l),o(ul.$$.fragment,l),o(fl.$$.fragment,l),o(jl.$$.fragment,l),o(Ql.$$.fragment,l),o(Bl.$$.fragment,l),zt=!1},d(l){l&&(e(Wl),e(El),e(Nl),e(_l),e(kl),e(I),e(vl),e(Z),e(Sl),e(Q),e(Xl),e(B),e(gl),e(xl),e(G),e($l),e(E),e(Yl),e(Al),e(Hl),e(N),e(Fl),e(zl),e(Dl),e(v),e(Ll),e(ql),e(u),e(Ol),e(Pl),e(g),e(Kl),e(d),e(lt),e(x),e(tt),e(et),e(Y),e(st),e(nt),e(H),e(at),e(F),e(pt),e(z),e(it),e(Mt),e(L),e(Ut),e(Vl),e(mt),e(q),e(Jt),e(O),e(yt),e(ot),e(K),e(ct),e(ll),e(wt),e(f),e(Tt),e(tl),e(rt),e(ut),e(sl),e(dt),e(Gl),e(ft),e(nl),e(jt),e(al),e(ht),e(pl),e(bt),e(il),e(Ct),e(It),e(Ul),e(Zt),e(Qt),e(j),e(Bt),e(Vt),e(yl),e(Gt),e(ol),e(Et),e(Rt),e(Wt),e(Tl),e(Nt),e(rl),e(_t),e(kt),e(dl),e(vt),e(St),e(Xt),e(hl),e(gt),e(bl),e(xt),e(Cl),e($t),e(Il),e(Yt),e(Zl),e(At),e(Ht),e(Ft),e(Rl)),e(T),c(b,l),c(C,l),c(V,l),c(R,l),c(W,l),c(_,l),c(k,l),c(S,l),c(X,l),c($,l),c(A,l),c(D,l),c(P,l),c(el,l),c(Ml,l),c(ml,l),c(Jl,l),c(cl,l),c(wl,l),c(ul,l),c(fl,l),c(jl,l),c(Ql,l),c(Bl,l)}}}const De='{"title":"메모리와 속도","local":"메모리와-속도","sections":[{"title":"cuDNN auto-tuner 활성화하기","local":"cudnn-auto-tuner-활성화하기","sections":[{"title":"fp32 대신 tf32 사용하기 (Ampere 및 이후 CUDA 장치들에서)","local":"fp32-대신-tf32-사용하기-ampere-및-이후-cuda-장치들에서","sections":[],"depth":3}],"depth":2},{"title":"반정밀도 가중치","local":"반정밀도-가중치","sections":[],"depth":2},{"title":"추가 메모리 절약을 위한 슬라이스 어텐션","local":"추가-메모리-절약을-위한-슬라이스-어텐션","sections":[],"depth":2},{"title":"더 큰 배치를 위한 sliced VAE 디코드","local":"더-큰-배치를-위한-sliced-vae-디코드","sections":[],"depth":2},{"title":"Channels Last 메모리 형식 사용하기","local":"channels-last-메모리-형식-사용하기","sections":[],"depth":2},{"title":"추적(tracing)","local":"추적tracing","sections":[],"depth":2},{"title":"Memory-efficient attention","local":"memory-efficient-attention","sections":[],"depth":2}],"depth":1}';function Le(Lt){return ge(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ls extends xe{constructor(T){super(),$e(this,T,Le,ze,Se,{})}}export{ls as component}; | |
Xet Storage Details
- Size:
- 48.6 kB
- Xet hash:
- d8cb76afe9264582fc71502b3235a7c7478113641f0424850d1a2f5b15c65417
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.