Buckets:
| import{s as We,o as ge,n as ve}from"../chunks/scheduler.6e0d5ff7.js";import{S as Se,i as ke,g as p,s as n,r as J,m as Gl,E as Xe,h as M,f as e,c as a,j as zt,u as c,x as i,n as El,k as gl,y as Ne,a as s,v as y,d as o,t as w,w as r}from"../chunks/index.d7c1b260.js";import{T as Dt}from"../chunks/Tip.c000e27b.js";import{C as d}from"../chunks/CodeBlock.09a08494.js";import{H as h}from"../chunks/Heading.30a009b0.js";function Fe(f){let m;return{c(){m=Gl("어떤 파이프라인에서도 [`torch.autocast`](https://pytorch.org/docs/stable/amp.html#torch.autocast) 를 사용하는 것은 검은색 이미지를 생성할 수 있고, 순수한 float16 정밀도를 사용하는 것보다 항상 느리기 때문에 사용하지 않는 것이 좋습니다.")},l(U){m=El(U,"어떤 파이프라인에서도 [`torch.autocast`](https://pytorch.org/docs/stable/amp.html#torch.autocast) 를 사용하는 것은 검은색 이미지를 생성할 수 있고, 순수한 float16 정밀도를 사용하는 것보다 항상 느리기 때문에 사용하지 않는 것이 좋습니다.")},m(U,u){s(U,m,u)},d(U){U&&e(m)}}}function Ae(f){let m;return{c(){m=Gl(`Attention slicing은 모델이 하나 이상의 어텐션 헤드를 사용하는 한, 배치 크기가 1인 경우에도 유용합니다. | |
| 하나 이상의 어텐션 헤드가 있는 경우 *QK^T* 어텐션 매트릭스는 상당한 양의 메모리를 절약할 수 있는 각 헤드에 대해 순차적으로 계산될 수 있습니다.`)},l(U){m=El(U,`Attention slicing은 모델이 하나 이상의 어텐션 헤드를 사용하는 한, 배치 크기가 1인 경우에도 유용합니다. | |
| 하나 이상의 어텐션 헤드가 있는 경우 *QK^T* 어텐션 매트릭스는 상당한 양의 메모리를 절약할 수 있는 각 헤드에 대해 순차적으로 계산될 수 있습니다.`)},m(U,u){s(U,m,u)},d(U){U&&e(m)}}}function xe(f){let m,U,u="모델 오프로딩",j;return{c(){m=Gl("또 다른 최적화 방법인 "),U=p("a"),U.textContent=u,j=Gl("을 사용하는 것을 고려하십시오. 이는 훨씬 빠르지만 메모리 절약이 크지는 않습니다."),this.h()},l(T){m=El(T,"또 다른 최적화 방법인 "),U=M(T,"A",{href:!0,"data-svelte-h":!0}),i(U)!=="svelte-zbpoyt"&&(U.textContent=u),j=El(T,"을 사용하는 것을 고려하십시오. 이는 훨씬 빠르지만 메모리 절약이 크지는 않습니다."),this.h()},h(){gl(U,"href","#model_offloading")},m(T,C){s(T,m,C),s(T,U,C),s(T,j,C)},p:ve,d(T){T&&(e(m),e(U),e(j))}}}function Ye(f){let m;return{c(){m=Gl("이 기능을 사용하려면 'accelerate' 버전 0.17.0 이상이 필요합니다.")},l(U){m=El(U,"이 기능을 사용하려면 'accelerate' 버전 0.17.0 이상이 필요합니다.")},m(U,u){s(U,m,u)},d(U){U&&e(m)}}}function He(f){let m,U,u,j,T,C,R,Lt=`메모리 또는 속도에 대해 🤗 Diffusers <em>추론</em>을 최적화하기 위한 몇 가지 기술과 아이디어를 제시합니다. | |
| 일반적으로, memory-efficient attention을 위해 <a href="https://github.com/facebookresearch/xformers" rel="nofollow">xFormers</a> 사용을 추천하기 때문에, 추천하는 <a href="xformers">설치 방법</a>을 보고 설치해 보세요.`,vl,Q,qt="다음 설정이 성능과 메모리에 미치는 영향에 대해 설명합니다.",Sl,V,Ot="<thead><tr><th></th> <th>지연시간</th> <th>속도 향상</th></tr></thead> <tbody><tr><td>별도 설정 없음</td> <td>9.50s</td> <td>x1</td></tr> <tr><td>cuDNN auto-tuner</td> <td>9.37s</td> <td>x1.01</td></tr> <tr><td>fp16</td> <td>3.61s</td> <td>x2.63</td></tr> <tr><td>Channels Last 메모리 형식</td> <td>3.30s</td> <td>x2.88</td></tr> <tr><td>traced UNet</td> <td>3.21s</td> <td>x2.96</td></tr> <tr><td>memory-efficient attention</td> <td>2.63s</td> <td>x3.61</td></tr></tbody>",kl,G,Pt='NVIDIA TITAN RTX에서 50 DDIM 스텝의 "a photo of an astronaut riding a horse on mars" 프롬프트로 512x512 크기의 단일 이미지를 생성하였습니다.',Xl,E,Nl,_,Kt='<a href="https://developer.nvidia.com/cudnn" rel="nofollow">NVIDIA cuDNN</a>은 컨볼루션을 계산하는 많은 알고리즘을 지원합니다. Autotuner는 짧은 벤치마크를 실행하고 주어진 입력 크기에 대해 주어진 하드웨어에서 최고의 성능을 가진 커널을 선택합니다.',Fl,$,le="<strong>컨볼루션 네트워크</strong>를 활용하고 있기 때문에 (다른 유형들은 현재 지원되지 않음), 다음 설정을 통해 추론 전에 cuDNN autotuner를 활성화할 수 있습니다:",Al,W,xl,g,Yl,v,te=`Ampere 및 이후 CUDA 장치에서 행렬곱 및 컨볼루션은 TensorFloat32(TF32) 모드를 사용하여 더 빠르지만 약간 덜 정확할 수 있습니다. | |
| 기본적으로 PyTorch는 컨볼루션에 대해 TF32 모드를 활성화하지만 행렬 곱셈은 활성화하지 않습니다. | |
| 네트워크에 완전한 float32 정밀도가 필요한 경우가 아니면 행렬 곱셈에 대해서도 이 설정을 활성화하는 것이 좋습니다. | |
| 이는 일반적으로 무시할 수 있는 수치의 정확도 손실이 있지만, 계산 속도를 크게 높일 수 있습니다. | |
| 그것에 대해 <a href="https://huggingface.co/docs/transformers/v4.18.0/en/performance#tf32" rel="nofollow">여기</a>서 더 읽을 수 있습니다. | |
| 추론하기 전에 다음을 추가하기만 하면 됩니다:`,Hl,S,zl,k,Dl,X,ee=`더 많은 GPU 메모리를 절약하고 더 빠른 속도를 얻기 위해 모델 가중치를 반정밀도(half precision)로 직접 불러오고 실행할 수 있습니다. | |
| 여기에는 <code>fp16</code>이라는 브랜치에 저장된 float16 버전의 가중치를 불러오고, 그 때 <code>float16</code> 유형을 사용하도록 PyTorch에 지시하는 작업이 포함됩니다.`,Ll,N,ql,b,Ol,F,Pl,A,se="추가 메모리 절약을 위해, 한 번에 모두 계산하는 대신 단계적으로 계산을 수행하는 슬라이스 버전의 어텐션(attention)을 사용할 수 있습니다.",Kl,I,lt,x,ne="각 헤드에 대해 순차적으로 어텐션 계산을 수행하려면, 다음과 같이 추론 전에 파이프라인에서 <code>enable_attention_slicing()</code>를 호출하면 됩니다:",tt,Y,et,H,ae="추론 시간이 약 10% 느려지는 약간의 성능 저하가 있지만 이 방법을 사용하면 3.2GB 정도의 작은 VRAM으로도 Stable Diffusion을 사용할 수 있습니다!",st,z,nt,D,pe="제한된 VRAM에서 대규모 이미지 배치를 디코딩하거나 32개 이상의 이미지가 포함된 배치를 활성화하기 위해, 배치의 latent 이미지를 한 번에 하나씩 디코딩하는 슬라이스 VAE 디코드를 사용할 수 있습니다.",at,L,Me="이를 <code>enable_attention_slicing()</code> 또는 <code>enable_xformers_memory_efficient_attention()</code>과 결합하여 메모리 사용을 추가로 최소화할 수 있습니다.",pt,q,ie="VAE 디코드를 한 번에 하나씩 수행하려면 추론 전에 파이프라인에서 <code>enable_vae_slicing()</code>을 호출합니다. 예를 들어:",Mt,O,it,P,Ue="다중 이미지 배치에서 VAE 디코드가 약간의 성능 향상이 이루어집니다. 단일 이미지 배치에서는 성능 영향은 없습니다.",Ut,_l,mt,K,me="추가 메모리 절약을 위해 가중치를 CPU로 오프로드하고 순방향 전달을 수행할 때만 GPU로 로드할 수 있습니다.",Jt,ll,Je="CPU 오프로딩을 수행하려면 <code>enable_sequential_cpu_offload()</code>를 호출하기만 하면 됩니다:",ct,tl,yt,el,ce="그러면 메모리 소비를 3GB 미만으로 줄일 수 있습니다.",ot,sl,ye="참고로 이 방법은 전체 모델이 아닌 서브모듈 수준에서 작동합니다. 이는 메모리 소비를 최소화하는 가장 좋은 방법이지만 프로세스의 반복적 특성으로 인해 추론 속도가 훨씬 느립니다. 파이프라인의 UNet 구성 요소는 여러 번 실행됩니다(‘num_inference_steps’ 만큼). 매번 UNet의 서로 다른 서브모듈이 순차적으로 온로드된 다음 필요에 따라 오프로드되므로 메모리 이동 횟수가 많습니다.",wt,Z,rt,nl,oe="또한 ttention slicing과 연결해서 최소 메모리(< 2GB)로도 동작할 수 있습니다.",Tt,al,ut,pl,we='<strong>참고</strong>: ‘enable_sequential_cpu_offload()‘를 사용할 때, 미리 파이프라인을 CUDA로 이동하지 <strong>않는</strong> 것이 중요합니다.그렇지 않으면 메모리 소비의 이득이 최소화됩니다. 더 많은 정보를 위해 <a href="https://github.com/huggingface/diffusers/issues/1934" rel="nofollow">이 이슈</a>를 보세요.',dt,$l,ft,Ml,re='<a href="#sequential_offloading">순차적 CPU 오프로딩</a>은 이전 섹션에서 설명한 것처럼 많은 메모리를 보존하지만 필요에 따라 서브모듈을 GPU로 이동하고 새 모듈이 실행될 때 즉시 CPU로 반환되기 때문에 추론 속도가 느려집니다.',jt,il,Te="전체 모델 오프로딩은 각 모델의 구성 요소인 <em>modules</em>을 처리하는 대신, 전체 모델을 GPU로 이동하는 대안입니다. 이로 인해 추론 시간에 미치는 영향은 미미하지만(파이프라인을 ‘cuda’로 이동하는 것과 비교하여) 여전히 약간의 메모리를 절약할 수 있습니다.",ht,Ul,ue=`이 시나리오에서는 파이프라인의 주요 구성 요소 중 하나만(일반적으로 텍스트 인코더, unet 및 vae) GPU에 있고, 나머지는 CPU에서 대기할 것입니다. | |
| 여러 반복을 위해 실행되는 UNet과 같은 구성 요소는 더 이상 필요하지 않을 때까지 GPU에 남아 있습니다.`,Ct,ml,de="이 기능은 아래와 같이 파이프라인에서 <code>enable_model_cpu_offload()</code>를 호출하여 활성화할 수 있습니다.",bt,Jl,It,cl,fe="이는 추가적인 메모리 절약을 위한 attention slicing과도 호환됩니다.",Zt,yl,Bt,B,Rt,ol,Qt,wl,je=`Channels Last 메모리 형식은 차원 순서를 보존하는 메모리에서 NCHW 텐서 배열을 대체하는 방법입니다. | |
| Channels Last 텐서는 채널이 가장 조밀한 차원이 되는 방식으로 정렬됩니다(일명 픽셀당 이미지를 저장). | |
| 현재 모든 연산자 Channels Last 형식을 지원하는 것은 아니라 성능이 저하될 수 있으므로, 사용해보고 모델에 잘 작동하는지 확인하는 것이 좋습니다.`,Vt,rl,he="예를 들어 파이프라인의 UNet 모델이 channels Last 형식을 사용하도록 설정하려면 다음을 사용할 수 있습니다:",Gt,Tl,Et,ul,_t,dl,Ce="추적은 모델을 통해 예제 입력 텐서를 통해 실행되는데, 해당 입력이 모델의 레이어를 통과할 때 호출되는 작업을 캡처하여 실행 파일 또는 ‘ScriptFunction’이 반환되도록 하고, 이는 just-in-time 컴파일로 최적화됩니다.",$t,fl,be="UNet 모델을 추적하기 위해 다음을 사용할 수 있습니다:",Wt,jl,gt,hl,Ie="그 다음, 파이프라인의 <code>unet</code> 특성을 다음과 같이 추적된 모델로 바꿀 수 있습니다.",vt,Cl,St,bl,kt,Il,Ze=`어텐션 블록의 대역폭을 최적화하는 최근 작업으로 GPU 메모리 사용량이 크게 향상되고 향상되었습니다. | |
| @tridao의 가장 최근의 플래시 어텐션: <a href="https://github.com/HazyResearch/flash-attention" rel="nofollow">code</a>, <a href="https://arxiv.org/pdf/2205.14135.pdf" rel="nofollow">paper</a>.`,Xt,Zl,Be="배치 크기 1(프롬프트 1개)의 512x512 크기로 추론을 실행할 때 몇 가지 Nvidia GPU에서 얻은 속도 향상은 다음과 같습니다:",Nt,Bl,Re="<thead><tr><th>GPU</th> <th>기준 어텐션 FP16</th> <th>메모리 효율적인 어텐션 FP16</th></tr></thead> <tbody><tr><td>NVIDIA Tesla T4</td> <td>3.5it/s</td> <td>5.5it/s</td></tr> <tr><td>NVIDIA 3060 RTX</td> <td>4.6it/s</td> <td>7.8it/s</td></tr> <tr><td>NVIDIA A10G</td> <td>8.88it/s</td> <td>15.6it/s</td></tr> <tr><td>NVIDIA RTX A6000</td> <td>11.7it/s</td> <td>21.09it/s</td></tr> <tr><td>NVIDIA TITAN RTX</td> <td>12.51it/s</td> <td>18.22it/s</td></tr> <tr><td>A100-SXM4-40GB</td> <td>18.6it/s</td> <td>29.it/s</td></tr> <tr><td>A100-SXM-80GB</td> <td>18.7it/s</td> <td>29.5it/s</td></tr></tbody>",Ft,Rl,Qe="이를 활용하려면 다음을 만족해야 합니다:",At,Ql,Ve='<li>PyTorch > 1.12</li> <li>Cuda 사용 가능</li> <li><a href="xformers">xformers 라이브러리를 설치함</a></li>',xt,Vl,Yt,Wl,Ht;return T=new h({props:{title:"메모리와 속도",local:"메모리와-속도",headingTag:"h1"}}),E=new h({props:{title:"cuDNN auto-tuner 활성화하기",local:"cudnn-auto-tuner-활성화하기",headingTag:"h2"}}),W=new d({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEF0b3JjaC5iYWNrZW5kcy5jdWRubi5iZW5jaG1hcmslMjAlM0QlMjBUcnVl",highlighted:`<span class="hljs-keyword">import</span> torch | |
| torch.backends.cudnn.benchmark = <span class="hljs-literal">True</span>`,wrap:!1}}),g=new h({props:{title:"fp32 대신 tf32 사용하기 (Ampere 및 이후 CUDA 장치들에서)",local:"fp32-대신-tf32-사용하기-ampere-및-이후-cuda-장치들에서",headingTag:"h3"}}),S=new d({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEF0b3JjaC5iYWNrZW5kcy5jdWRhLm1hdG11bC5hbGxvd190ZjMyJTIwJTNEJTIwVHJ1ZQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| torch.backends.cuda.matmul.allow_tf32 = <span class="hljs-literal">True</span>`,wrap:!1}}),k=new h({props:{title:"반정밀도 가중치",local:"반정밀도-가중치",headingTag:"h2"}}),N=new d({props:{code:"cGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSklMEFwaXBlJTIwJTNEJTIwcGlwZS50byglMjJjdWRhJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),b=new Dt({props:{warning:!0,$$slots:{default:[Fe]},$$scope:{ctx:f}}}),F=new h({props:{title:"추가 메모리 절약을 위한 슬라이스 어텐션",local:"추가-메모리-절약을-위한-슬라이스-어텐션",headingTag:"h2"}}),I=new Dt({props:{$$slots:{default:[Ae]},$$scope:{ctx:f}}}),Y=new d({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQXBpcGUlMjAlM0QlMjBwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyYSUyMHBob3RvJTIwb2YlMjBhbiUyMGFzdHJvbmF1dCUyMHJpZGluZyUyMGElMjBob3JzZSUyMG9uJTIwbWFycyUyMiUwQXBpcGUuZW5hYmxlX2F0dGVudGlvbl9zbGljaW5nKCklMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_attention_slicing() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),z=new h({props:{title:"더 큰 배치를 위한 sliced VAE 디코드",local:"더-큰-배치를-위한-sliced-vae-디코드",headingTag:"h2"}}),O=new d({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQXBpcGUlMjAlM0QlMjBwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyYSUyMHBob3RvJTIwb2YlMjBhbiUyMGFzdHJvbmF1dCUyMHJpZGluZyUyMGElMjBob3JzZSUyMG9uJTIwbWFycyUyMiUwQXBpcGUuZW5hYmxlX3ZhZV9zbGljaW5nKCklMEFpbWFnZXMlMjAlM0QlMjBwaXBlKCU1QnByb21wdCU1RCUyMColMjAzMikuaW1hZ2Vz",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_vae_slicing() | |
| images = pipe([prompt] * <span class="hljs-number">32</span>).images`,wrap:!1}}),tl=new d({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9zZXF1ZW50aWFsX2NwdV9vZmZsb2FkKCklMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_sequential_cpu_offload() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),Z=new Dt({props:{$$slots:{default:[xe]},$$scope:{ctx:f}}}),al=new d({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9zZXF1ZW50aWFsX2NwdV9vZmZsb2FkKCklMEFwaXBlLmVuYWJsZV9hdHRlbnRpb25fc2xpY2luZygxKSUwQSUwQWltYWdlJTIwJTNEJTIwcGlwZShwcm9tcHQpLmltYWdlcyU1QjAlNUQ=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_sequential_cpu_offload() | |
| pipe.enable_attention_slicing(<span class="hljs-number">1</span>) | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),Jl=new d({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTIwJTIwJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBaW1hZ2UlMjAlM0QlMjBwaXBlKHByb21wdCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_model_cpu_offload() | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),yl=new d({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMmElMjBwaG90byUyMG9mJTIwYW4lMjBhc3Ryb25hdXQlMjByaWRpbmclMjBhJTIwaG9yc2UlMjBvbiUyMG1hcnMlMjIlMEFwaXBlLmVuYWJsZV9tb2RlbF9jcHVfb2ZmbG9hZCgpJTBBcGlwZS5lbmFibGVfYXR0ZW50aW9uX3NsaWNpbmcoMSklMEElMEFpbWFnZSUyMCUzRCUyMHBpcGUocHJvbXB0KS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ) | |
| prompt = <span class="hljs-string">"a photo of an astronaut riding a horse on mars"</span> | |
| pipe.enable_model_cpu_offload() | |
| pipe.enable_attention_slicing(<span class="hljs-number">1</span>) | |
| image = pipe(prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),B=new Dt({props:{$$slots:{default:[Ye]},$$scope:{ctx:f}}}),ol=new h({props:{title:"Channels Last 메모리 형식 사용하기",local:"channels-last-메모리-형식-사용하기",headingTag:"h2"}}),Tl=new d({props:{code:"cHJpbnQocGlwZS51bmV0LmNvbnZfb3V0LnN0YXRlX2RpY3QoKSU1QiUyMndlaWdodCUyMiU1RC5zdHJpZGUoKSklMjAlMjAlMjMlMjAoMjg4MCUyQyUyMDklMkMlMjAzJTJDJTIwMSklMEFwaXBlLnVuZXQudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTIwJTIwJTIzJTIwaW4tcGxhY2UlMjAlRUMlOTclQjAlRUMlODIlQjAlMEElMjMlMjAyJUVCJUIyJTg4JUVDJUE3JUI4JTIwJUVDJUIwJUE4JUVDJTlCJTkwJUVDJTk3JTkwJUVDJTg0JTlDJTIwJUVDJThBJUE0JUVEJThBJUI4JUVCJTlEJUJDJUVDJTlEJUI0JUVCJTkzJTlDJTIwMSVFQyU5RCU4NCUyMCVFQSVCMCU4MCVFQyVBNyU4MCVFQiU4QSU5NCUyMCgyODgwJTJDJTIwMSUyQyUyMDk2MCUyQyUyMDMyMCklRUIlQTElOUMlMkMlMjAlRUMlOTclQjAlRUMlODIlQjAlRUMlOUQlQjQlMjAlRUMlOUUlOTElRUIlOEYlOTklRUQlOTUlQTglRUMlOUQlODQlMjAlRUMlQTYlOUQlRUIlQUElODUlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQuJTBBcHJpbnQocGlwZS51bmV0LmNvbnZfb3V0LnN0YXRlX2RpY3QoKSU1QiUyMndlaWdodCUyMiU1RC5zdHJpZGUoKSk=",highlighted:`<span class="hljs-built_in">print</span>(pipe.unet.conv_out.state_dict()[<span class="hljs-string">"weight"</span>].stride()) <span class="hljs-comment"># (2880, 9, 3, 1)</span> | |
| pipe.unet.to(memory_format=torch.channels_last) <span class="hljs-comment"># in-place 연산</span> | |
| <span class="hljs-comment"># 2번째 차원에서 스트라이드 1을 가지는 (2880, 1, 960, 320)로, 연산이 작동함을 증명합니다.</span> | |
| <span class="hljs-built_in">print</span>(pipe.unet.conv_out.state_dict()[<span class="hljs-string">"weight"</span>].stride())`,wrap:!1}}),ul=new h({props:{title:"추적(tracing)",local:"추적tracing",headingTag:"h2"}}),jl=new d({props:{code:"aW1wb3J0JTIwdGltZSUwQWltcG9ydCUyMHRvcmNoJTBBZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwZnVuY3Rvb2xzJTBBJTBBJTIzJTIwdG9yY2glMjAlRUElQjglQjAlRUMlOUElQjglRUElQjglQjAlMjAlRUIlQjklODQlRUQlOTklOUMlRUMlODQlQjElRUQlOTklOTQlMEF0b3JjaC5zZXRfZ3JhZF9lbmFibGVkKEZhbHNlKSUwQSUwQSUyMyUyMCVFQiVCMyU4MCVFQyU4OCU5OCUyMCVFQyU4NCVBNCVFQyVBMCU5NSUwQW5fZXhwZXJpbWVudHMlMjAlM0QlMjAyJTBBdW5ldF9ydW5zX3Blcl9leHBlcmltZW50JTIwJTNEJTIwNTAlMEElMEElMEElMjMlMjAlRUMlOUUlODUlRUIlQTAlQTUlMjAlRUIlQjYlODglRUIlOUYlQUMlRUMlOTglQTQlRUElQjglQjAlMEFkZWYlMjBnZW5lcmF0ZV9pbnB1dHMoKSUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZSUyMCUzRCUyMHRvcmNoLnJhbmRuKCgyJTJDJTIwNCUyQyUyMDY0JTJDJTIwNjQpJTJDJTIwZGV2aWNlJTNEJTIyY3VkYSUyMiUyQyUyMGR0eXBlJTNEdG9yY2guZmxvYXQxNiklMEElMjAlMjAlMjAlMjB0aW1lc3RlcCUyMCUzRCUyMHRvcmNoLnJhbmQoMSUyQyUyMGRldmljZSUzRCUyMmN1ZGElMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTIwKiUyMDk5OSUwQSUyMCUyMCUyMCUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyUyMCUzRCUyMHRvcmNoLnJhbmRuKCgyJTJDJTIwNzclMkMlMjA3NjgpJTJDJTIwZGV2aWNlJTNEJTIyY3VkYSUyMiUyQyUyMGR0eXBlJTNEdG9yY2guZmxvYXQxNiklMEElMjAlMjAlMjAlMjByZXR1cm4lMjBzYW1wbGUlMkMlMjB0aW1lc3RlcCUyQyUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyUwQSUwQSUwQXBpcGUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIycnVud2F5bWwlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBdW5ldCUyMCUzRCUyMHBpcGUudW5ldCUwQXVuZXQuZXZhbCgpJTBBdW5ldC50byhtZW1vcnlfZm9ybWF0JTNEdG9yY2guY2hhbm5lbHNfbGFzdCklMjAlMjAlMjMlMjBDaGFubmVscyUyMExhc3QlMjAlRUIlQTklOTQlRUIlQUElQTglRUIlQTYlQUMlMjAlRUQlOTglOTUlRUMlOEIlOUQlMjAlRUMlODIlQUMlRUMlOUElQTklMEF1bmV0LmZvcndhcmQlMjAlM0QlMjBmdW5jdG9vbHMucGFydGlhbCh1bmV0LmZvcndhcmQlMkMlMjByZXR1cm5fZGljdCUzREZhbHNlKSUyMCUyMCUyMyUyMHJldHVybl9kaWN0JTNERmFsc2UlRUMlOUQlODQlMjAlRUElQjglQjAlRUIlQjMlQjglRUElQjAlOTIlRUMlOUMlQkMlRUIlQTElOUMlMjAlRUMlODQlQTQlRUMlQTAlOTUlMEElMEElMjMlMjAlRUMlOUIlOEMlRUIlQjAlOEQlRUMlOTclODUlMEFmb3IlMjBfJTIwaW4lMjByYW5nZSgzKSUzQSUwQSUyMCUyMCUyMCUyMHdpdGglMjB0b3JjaC5pbmZlcmVuY2VfbW9kZSgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaW5wdXRzJTIwJTNEJTIwZ2VuZXJhdGVfaW5wdXRzKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcmlnX291dHB1dCUyMCUzRCUyMHVuZXQoKmlucHV0cyklMEElMEElMjMlMjAlRUMlQjYlOTQlRUMlQTAlODElMEFwcmludCglMjJ0cmFjaW5nLi4lMjIpJTBBdW5ldF90cmFjZWQlMjAlM0QlMjB0b3JjaC5qaXQudHJhY2UodW5ldCUyQyUyMGlucHV0cyklMEF1bmV0X3RyYWNlZC5ldmFsKCklMEFwcmludCglMjJkb25lJTIwdHJhY2luZyUyMiklMEElMEElMEElMjMlMjAlRUMlOUIlOEMlRUIlQjAlOEQlRUMlOTclODUlMjAlRUIlQjAlOEYlMjAlRUElQjclQjglRUIlOUUlOTglRUQlOTQlODQlMjAlRUMlQjUlOUMlRUMlQTAlODElRUQlOTklOTQlMEFmb3IlMjBfJTIwaW4lMjByYW5nZSg1KSUzQSUwQSUyMCUyMCUyMCUyMHdpdGglMjB0b3JjaC5pbmZlcmVuY2VfbW9kZSgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaW5wdXRzJTIwJTNEJTIwZ2VuZXJhdGVfaW5wdXRzKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcmlnX291dHB1dCUyMCUzRCUyMHVuZXRfdHJhY2VkKCppbnB1dHMpJTBBJTBBJTBBJTIzJTIwJUVCJUIyJUE0JUVDJUI5JTk4JUVCJUE3JTg4JUVEJTgyJUI5JTBBd2l0aCUyMHRvcmNoLmluZmVyZW5jZV9tb2RlKCklM0ElMEElMjAlMjAlMjAlMjBmb3IlMjBfJTIwaW4lMjByYW5nZShuX2V4cGVyaW1lbnRzKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRvcmNoLmN1ZGEuc3luY2hyb25pemUoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN0YXJ0X3RpbWUlMjAlM0QlMjB0aW1lLnRpbWUoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZvciUyMF8lMjBpbiUyMHJhbmdlKHVuZXRfcnVuc19wZXJfZXhwZXJpbWVudCklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcmlnX291dHB1dCUyMCUzRCUyMHVuZXRfdHJhY2VkKCppbnB1dHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5zeW5jaHJvbml6ZSgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJpbnQoZiUyMnVuZXQlMjB0cmFjZWQlMjBpbmZlcmVuY2UlMjB0b29rJTIwJTdCdGltZS50aW1lKCklMjAtJTIwc3RhcnRfdGltZSUzQS4yZiU3RCUyMHNlY29uZHMlMjIpJTBBJTIwJTIwJTIwJTIwZm9yJTIwXyUyMGluJTIwcmFuZ2Uobl9leHBlcmltZW50cyklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0b3JjaC5jdWRhLnN5bmNocm9uaXplKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzdGFydF90aW1lJTIwJTNEJTIwdGltZS50aW1lKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmb3IlMjBfJTIwaW4lMjByYW5nZSh1bmV0X3J1bnNfcGVyX2V4cGVyaW1lbnQpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3JpZ19vdXRwdXQlMjAlM0QlMjB1bmV0KCppbnB1dHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5zeW5jaHJvbml6ZSgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJpbnQoZiUyMnVuZXQlMjBpbmZlcmVuY2UlMjB0b29rJTIwJTdCdGltZS50aW1lKCklMjAtJTIwc3RhcnRfdGltZSUzQS4yZiU3RCUyMHNlY29uZHMlMjIpJTBBJTBBJTIzJTIwJUVCJUFBJUE4JUVCJThEJUI4JTIwJUVDJUEwJTgwJUVDJTlFJUE1JTBBdW5ldF90cmFjZWQuc2F2ZSglMjJ1bmV0X3RyYWNlZC5wdCUyMik=",highlighted:`<span class="hljs-keyword">import</span> time | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> functools | |
| <span class="hljs-comment"># torch 기울기 비활성화</span> | |
| torch.set_grad_enabled(<span class="hljs-literal">False</span>) | |
| <span class="hljs-comment"># 변수 설정</span> | |
| n_experiments = <span class="hljs-number">2</span> | |
| unet_runs_per_experiment = <span class="hljs-number">50</span> | |
| <span class="hljs-comment"># 입력 불러오기</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">generate_inputs</span>(): | |
| sample = torch.randn((<span class="hljs-number">2</span>, <span class="hljs-number">4</span>, <span class="hljs-number">64</span>, <span class="hljs-number">64</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) | |
| timestep = torch.rand(<span class="hljs-number">1</span>, device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) * <span class="hljs-number">999</span> | |
| encoder_hidden_states = torch.randn((<span class="hljs-number">2</span>, <span class="hljs-number">77</span>, <span class="hljs-number">768</span>), device=<span class="hljs-string">"cuda"</span>, dtype=torch.float16) | |
| <span class="hljs-keyword">return</span> sample, timestep, encoder_hidden_states | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| unet = pipe.unet | |
| unet.<span class="hljs-built_in">eval</span>() | |
| unet.to(memory_format=torch.channels_last) <span class="hljs-comment"># Channels Last 메모리 형식 사용</span> | |
| unet.forward = functools.partial(unet.forward, return_dict=<span class="hljs-literal">False</span>) <span class="hljs-comment"># return_dict=False을 기본값으로 설정</span> | |
| <span class="hljs-comment"># 워밍업</span> | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">3</span>): | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| inputs = generate_inputs() | |
| orig_output = unet(*inputs) | |
| <span class="hljs-comment"># 추적</span> | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"tracing.."</span>) | |
| unet_traced = torch.jit.trace(unet, inputs) | |
| unet_traced.<span class="hljs-built_in">eval</span>() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"done tracing"</span>) | |
| <span class="hljs-comment"># 워밍업 및 그래프 최적화</span> | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">5</span>): | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| inputs = generate_inputs() | |
| orig_output = unet_traced(*inputs) | |
| <span class="hljs-comment"># 벤치마킹</span> | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(n_experiments): | |
| torch.cuda.synchronize() | |
| start_time = time.time() | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(unet_runs_per_experiment): | |
| orig_output = unet_traced(*inputs) | |
| torch.cuda.synchronize() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"unet traced inference took <span class="hljs-subst">{time.time() - start_time:<span class="hljs-number">.2</span>f}</span> seconds"</span>) | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(n_experiments): | |
| torch.cuda.synchronize() | |
| start_time = time.time() | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(unet_runs_per_experiment): | |
| orig_output = unet(*inputs) | |
| torch.cuda.synchronize() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"unet inference took <span class="hljs-subst">{time.time() - start_time:<span class="hljs-number">.2</span>f}</span> seconds"</span>) | |
| <span class="hljs-comment"># 모델 저장</span> | |
| unet_traced.save(<span class="hljs-string">"unet_traced.pt"</span>)`,wrap:!1}}),Cl=new d({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGF0YWNsYXNzZXMlMjBpbXBvcnQlMjBkYXRhY2xhc3MlMEElMEElMEElNDBkYXRhY2xhc3MlMEFjbGFzcyUyMFVOZXQyRENvbmRpdGlvbk91dHB1dCUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZSUzQSUyMHRvcmNoLkZsb2F0VGVuc29yJTBBJTBBJTBBcGlwZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSkudG8oJTIyY3VkYSUyMiklMEElMEElMjMlMjBqaXR0ZWQlMjB1bmV0JTIwJUVDJTgyJUFDJUVDJTlBJUE5JTBBdW5ldF90cmFjZWQlMjAlM0QlMjB0b3JjaC5qaXQubG9hZCglMjJ1bmV0X3RyYWNlZC5wdCUyMiklMEElMEElMEElMjMlMjBwaXBlLnVuZXQlMjAlRUMlODIlQUQlRUMlQTAlOUMlMEFjbGFzcyUyMFRyYWNlZFVOZXQodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5pbl9jaGFubmVscyUyMCUzRCUyMHBpcGUudW5ldC5pbl9jaGFubmVscyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYuZGV2aWNlJTIwJTNEJTIwcGlwZS51bmV0LmRldmljZSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMGxhdGVudF9tb2RlbF9pbnB1dCUyQyUyMHQlMkMlMjBlbmNvZGVyX2hpZGRlbl9zdGF0ZXMpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2FtcGxlJTIwJTNEJTIwdW5ldF90cmFjZWQobGF0ZW50X21vZGVsX2lucHV0JTJDJTIwdCUyQyUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyklNUIwJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwVU5ldDJEQ29uZGl0aW9uT3V0cHV0KHNhbXBsZSUzRHNhbXBsZSklMEElMEElMEFwaXBlLnVuZXQlMjAlM0QlMjBUcmFjZWRVTmV0KCklMEElMEF3aXRoJTIwdG9yY2guaW5mZXJlbmNlX21vZGUoKSUzQSUwQSUyMCUyMCUyMCUyMGltYWdlJTIwJTNEJTIwcGlwZSglNUJwcm9tcHQlNUQlMjAqJTIwMSUyQyUyMG51bV9pbmZlcmVuY2Vfc3RlcHMlM0Q1MCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> dataclasses <span class="hljs-keyword">import</span> dataclass | |
| <span class="hljs-meta">@dataclass</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">UNet2DConditionOutput</span>: | |
| sample: torch.FloatTensor | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># jitted unet 사용</span> | |
| unet_traced = torch.jit.load(<span class="hljs-string">"unet_traced.pt"</span>) | |
| <span class="hljs-comment"># pipe.unet 삭제</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">TracedUNet</span>(torch.nn.Module): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>): | |
| <span class="hljs-built_in">super</span>().__init__() | |
| self.in_channels = pipe.unet.in_channels | |
| self.device = pipe.unet.device | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, latent_model_input, t, encoder_hidden_states</span>): | |
| sample = unet_traced(latent_model_input, t, encoder_hidden_states)[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> UNet2DConditionOutput(sample=sample) | |
| pipe.unet = TracedUNet() | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| image = pipe([prompt] * <span class="hljs-number">1</span>, num_inference_steps=<span class="hljs-number">50</span>).images[<span class="hljs-number">0</span>]`,wrap:!1}}),bl=new h({props:{title:"Memory-efficient attention",local:"memory-efficient-attention",headingTag:"h2"}}),Vl=new d({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBaW1wb3J0JTIwdG9yY2glMEElMEFwaXBlJTIwJTNEJTIwU3RhYmxlRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTBBKS50byglMjJjdWRhJTIyKSUwQSUwQXBpcGUuZW5hYmxlX3hmb3JtZXJzX21lbW9yeV9lZmZpY2llbnRfYXR0ZW50aW9uKCklMEElMEF3aXRoJTIwdG9yY2guaW5mZXJlbmNlX21vZGUoKSUzQSUwQSUyMCUyMCUyMCUyMHNhbXBsZSUyMCUzRCUyMHBpcGUoJTIyYSUyMHNtYWxsJTIwY2F0JTIyKSUwQSUwQSUyMyUyMCVFQyU4NCVBMCVFRCU4MyU5RCUzQSUyMCVFQyU5RCVCNCVFQiVBNSVCQyUyMCVFQiVCOSU4NCVFRCU5OSU5QyVFQyU4NCVCMSVFRCU5OSU5NCUyMCVFRCU5NSU5OCVFQSVCOCVCMCUyMCVFQyU5QyU4NCVFRCU5NSVCNCUyMCVFQiU4QiVBNCVFQyU5RCU4QyVFQyU5RCU4NCUyMCVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQyU4QSVCNSVFQiU4QiU4OCVFQiU4QiVBNC4lMEElMjMlMjBwaXBlLmRpc2FibGVfeGZvcm1lcnNfbWVtb3J5X2VmZmljaWVudF9hdHRlbnRpb24oKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, | |
| torch_dtype=torch.float16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipe.enable_xformers_memory_efficient_attention() | |
| <span class="hljs-keyword">with</span> torch.inference_mode(): | |
| sample = pipe(<span class="hljs-string">"a small cat"</span>) | |
| <span class="hljs-comment"># 선택: 이를 비활성화 하기 위해 다음을 사용할 수 있습니다.</span> | |
| <span class="hljs-comment"># pipe.disable_xformers_memory_efficient_attention()</span>`,wrap:!1}}),{c(){m=p("meta"),U=n(),u=p("p"),j=n(),J(T.$$.fragment),C=n(),R=p("p"),R.innerHTML=Lt,vl=n(),Q=p("p"),Q.textContent=qt,Sl=n(),V=p("table"),V.innerHTML=Ot,kl=n(),G=p("em"),G.textContent=Pt,Xl=n(),J(E.$$.fragment),Nl=n(),_=p("p"),_.innerHTML=Kt,Fl=n(),$=p("p"),$.innerHTML=le,Al=n(),J(W.$$.fragment),xl=n(),J(g.$$.fragment),Yl=n(),v=p("p"),v.innerHTML=te,Hl=n(),J(S.$$.fragment),zl=n(),J(k.$$.fragment),Dl=n(),X=p("p"),X.innerHTML=ee,Ll=n(),J(N.$$.fragment),ql=n(),J(b.$$.fragment),Ol=n(),J(F.$$.fragment),Pl=n(),A=p("p"),A.textContent=se,Kl=n(),J(I.$$.fragment),lt=n(),x=p("p"),x.innerHTML=ne,tt=n(),J(Y.$$.fragment),et=n(),H=p("p"),H.textContent=ae,st=n(),J(z.$$.fragment),nt=n(),D=p("p"),D.textContent=pe,at=n(),L=p("p"),L.innerHTML=Me,pt=n(),q=p("p"),q.innerHTML=ie,Mt=n(),J(O.$$.fragment),it=n(),P=p("p"),P.textContent=Ue,Ut=n(),_l=p("a"),mt=Gl(` | |
| ## 메모리 절약을 위해 가속 기능을 사용하여 CPU로 오프로딩 | |
| `),K=p("p"),K.textContent=me,Jt=n(),ll=p("p"),ll.innerHTML=Je,ct=n(),J(tl.$$.fragment),yt=n(),el=p("p"),el.textContent=ce,ot=n(),sl=p("p"),sl.textContent=ye,wt=n(),J(Z.$$.fragment),rt=n(),nl=p("p"),nl.textContent=oe,Tt=n(),J(al.$$.fragment),ut=n(),pl=p("p"),pl.innerHTML=we,dt=n(),$l=p("a"),ft=Gl(` | |
| ## 빠른 추론과 메모리 메모리 절약을 위한 모델 오프로딩 | |
| `),Ml=p("p"),Ml.innerHTML=re,jt=n(),il=p("p"),il.innerHTML=Te,ht=n(),Ul=p("p"),Ul.textContent=ue,Ct=n(),ml=p("p"),ml.innerHTML=de,bt=n(),J(Jl.$$.fragment),It=n(),cl=p("p"),cl.textContent=fe,Zt=n(),J(yl.$$.fragment),Bt=n(),J(B.$$.fragment),Rt=n(),J(ol.$$.fragment),Qt=n(),wl=p("p"),wl.textContent=je,Vt=n(),rl=p("p"),rl.textContent=he,Gt=n(),J(Tl.$$.fragment),Et=n(),J(ul.$$.fragment),_t=n(),dl=p("p"),dl.textContent=Ce,$t=n(),fl=p("p"),fl.textContent=be,Wt=n(),J(jl.$$.fragment),gt=n(),hl=p("p"),hl.innerHTML=Ie,vt=n(),J(Cl.$$.fragment),St=n(),J(bl.$$.fragment),kt=n(),Il=p("p"),Il.innerHTML=Ze,Xt=n(),Zl=p("p"),Zl.textContent=Be,Nt=n(),Bl=p("table"),Bl.innerHTML=Re,Ft=n(),Rl=p("p"),Rl.textContent=Qe,At=n(),Ql=p("ul"),Ql.innerHTML=Ve,xt=n(),J(Vl.$$.fragment),Yt=n(),Wl=p("p"),this.h()},l(l){const t=Xe("svelte-u9bgzb",document.head);m=M(t,"META",{name:!0,content:!0}),t.forEach(e),U=a(l),u=M(l,"P",{}),zt(u).forEach(e),j=a(l),c(T.$$.fragment,l),C=a(l),R=M(l,"P",{"data-svelte-h":!0}),i(R)!=="svelte-m9figs"&&(R.innerHTML=Lt),vl=a(l),Q=M(l,"P",{"data-svelte-h":!0}),i(Q)!=="svelte-12e7d13"&&(Q.textContent=qt),Sl=a(l),V=M(l,"TABLE",{"data-svelte-h":!0}),i(V)!=="svelte-1sy2nlq"&&(V.innerHTML=Ot),kl=a(l),G=M(l,"EM",{"data-svelte-h":!0}),i(G)!=="svelte-oqvaw5"&&(G.textContent=Pt),Xl=a(l),c(E.$$.fragment,l),Nl=a(l),_=M(l,"P",{"data-svelte-h":!0}),i(_)!=="svelte-1q6vy6i"&&(_.innerHTML=Kt),Fl=a(l),$=M(l,"P",{"data-svelte-h":!0}),i($)!=="svelte-1xwtu1p"&&($.innerHTML=le),Al=a(l),c(W.$$.fragment,l),xl=a(l),c(g.$$.fragment,l),Yl=a(l),v=M(l,"P",{"data-svelte-h":!0}),i(v)!=="svelte-1gm9l4i"&&(v.innerHTML=te),Hl=a(l),c(S.$$.fragment,l),zl=a(l),c(k.$$.fragment,l),Dl=a(l),X=M(l,"P",{"data-svelte-h":!0}),i(X)!=="svelte-1y9jw5r"&&(X.innerHTML=ee),Ll=a(l),c(N.$$.fragment,l),ql=a(l),c(b.$$.fragment,l),Ol=a(l),c(F.$$.fragment,l),Pl=a(l),A=M(l,"P",{"data-svelte-h":!0}),i(A)!=="svelte-97drxa"&&(A.textContent=se),Kl=a(l),c(I.$$.fragment,l),lt=a(l),x=M(l,"P",{"data-svelte-h":!0}),i(x)!=="svelte-19j5lzh"&&(x.innerHTML=ne),tt=a(l),c(Y.$$.fragment,l),et=a(l),H=M(l,"P",{"data-svelte-h":!0}),i(H)!=="svelte-1809mre"&&(H.textContent=ae),st=a(l),c(z.$$.fragment,l),nt=a(l),D=M(l,"P",{"data-svelte-h":!0}),i(D)!=="svelte-1klv9ve"&&(D.textContent=pe),at=a(l),L=M(l,"P",{"data-svelte-h":!0}),i(L)!=="svelte-1bo4p0c"&&(L.innerHTML=Me),pt=a(l),q=M(l,"P",{"data-svelte-h":!0}),i(q)!=="svelte-j8mqed"&&(q.innerHTML=ie),Mt=a(l),c(O.$$.fragment,l),it=a(l),P=M(l,"P",{"data-svelte-h":!0}),i(P)!=="svelte-1l99s96"&&(P.textContent=Ue),Ut=a(l),_l=M(l,"A",{name:!0}),zt(_l).forEach(e),mt=El(l,` | |
| ## 메모리 절약을 위해 가속 기능을 사용하여 CPU로 오프로딩 | |
| `),K=M(l,"P",{"data-svelte-h":!0}),i(K)!=="svelte-131mxrq"&&(K.textContent=me),Jt=a(l),ll=M(l,"P",{"data-svelte-h":!0}),i(ll)!=="svelte-f0b7n3"&&(ll.innerHTML=Je),ct=a(l),c(tl.$$.fragment,l),yt=a(l),el=M(l,"P",{"data-svelte-h":!0}),i(el)!=="svelte-tablwz"&&(el.textContent=ce),ot=a(l),sl=M(l,"P",{"data-svelte-h":!0}),i(sl)!=="svelte-1tbver6"&&(sl.textContent=ye),wt=a(l),c(Z.$$.fragment,l),rt=a(l),nl=M(l,"P",{"data-svelte-h":!0}),i(nl)!=="svelte-59z5kh"&&(nl.textContent=oe),Tt=a(l),c(al.$$.fragment,l),ut=a(l),pl=M(l,"P",{"data-svelte-h":!0}),i(pl)!=="svelte-ax3hx7"&&(pl.innerHTML=we),dt=a(l),$l=M(l,"A",{name:!0}),zt($l).forEach(e),ft=El(l,` | |
| ## 빠른 추론과 메모리 메모리 절약을 위한 모델 오프로딩 | |
| `),Ml=M(l,"P",{"data-svelte-h":!0}),i(Ml)!=="svelte-7dwxx7"&&(Ml.innerHTML=re),jt=a(l),il=M(l,"P",{"data-svelte-h":!0}),i(il)!=="svelte-1llz1y7"&&(il.innerHTML=Te),ht=a(l),Ul=M(l,"P",{"data-svelte-h":!0}),i(Ul)!=="svelte-1hmauk"&&(Ul.textContent=ue),Ct=a(l),ml=M(l,"P",{"data-svelte-h":!0}),i(ml)!=="svelte-1gllwmo"&&(ml.innerHTML=de),bt=a(l),c(Jl.$$.fragment,l),It=a(l),cl=M(l,"P",{"data-svelte-h":!0}),i(cl)!=="svelte-5q5hse"&&(cl.textContent=fe),Zt=a(l),c(yl.$$.fragment,l),Bt=a(l),c(B.$$.fragment,l),Rt=a(l),c(ol.$$.fragment,l),Qt=a(l),wl=M(l,"P",{"data-svelte-h":!0}),i(wl)!=="svelte-1rrl8zz"&&(wl.textContent=je),Vt=a(l),rl=M(l,"P",{"data-svelte-h":!0}),i(rl)!=="svelte-1c0oa55"&&(rl.textContent=he),Gt=a(l),c(Tl.$$.fragment,l),Et=a(l),c(ul.$$.fragment,l),_t=a(l),dl=M(l,"P",{"data-svelte-h":!0}),i(dl)!=="svelte-1jkjfr1"&&(dl.textContent=Ce),$t=a(l),fl=M(l,"P",{"data-svelte-h":!0}),i(fl)!=="svelte-mqbplb"&&(fl.textContent=be),Wt=a(l),c(jl.$$.fragment,l),gt=a(l),hl=M(l,"P",{"data-svelte-h":!0}),i(hl)!=="svelte-1ewkmr2"&&(hl.innerHTML=Ie),vt=a(l),c(Cl.$$.fragment,l),St=a(l),c(bl.$$.fragment,l),kt=a(l),Il=M(l,"P",{"data-svelte-h":!0}),i(Il)!=="svelte-1lkmvkn"&&(Il.innerHTML=Ze),Xt=a(l),Zl=M(l,"P",{"data-svelte-h":!0}),i(Zl)!=="svelte-1aa24j0"&&(Zl.textContent=Be),Nt=a(l),Bl=M(l,"TABLE",{"data-svelte-h":!0}),i(Bl)!=="svelte-13acbqe"&&(Bl.innerHTML=Re),Ft=a(l),Rl=M(l,"P",{"data-svelte-h":!0}),i(Rl)!=="svelte-1h75gf9"&&(Rl.textContent=Qe),At=a(l),Ql=M(l,"UL",{"data-svelte-h":!0}),i(Ql)!=="svelte-gqxwyg"&&(Ql.innerHTML=Ve),xt=a(l),c(Vl.$$.fragment,l),Yt=a(l),Wl=M(l,"P",{}),zt(Wl).forEach(e),this.h()},h(){gl(m,"name","hf:doc:metadata"),gl(m,"content",ze),gl(_l,"name","sequential_offloading"),gl($l,"name","model_offloading")},m(l,t){Ne(document.head,m),s(l,U,t),s(l,u,t),s(l,j,t),y(T,l,t),s(l,C,t),s(l,R,t),s(l,vl,t),s(l,Q,t),s(l,Sl,t),s(l,V,t),s(l,kl,t),s(l,G,t),s(l,Xl,t),y(E,l,t),s(l,Nl,t),s(l,_,t),s(l,Fl,t),s(l,$,t),s(l,Al,t),y(W,l,t),s(l,xl,t),y(g,l,t),s(l,Yl,t),s(l,v,t),s(l,Hl,t),y(S,l,t),s(l,zl,t),y(k,l,t),s(l,Dl,t),s(l,X,t),s(l,Ll,t),y(N,l,t),s(l,ql,t),y(b,l,t),s(l,Ol,t),y(F,l,t),s(l,Pl,t),s(l,A,t),s(l,Kl,t),y(I,l,t),s(l,lt,t),s(l,x,t),s(l,tt,t),y(Y,l,t),s(l,et,t),s(l,H,t),s(l,st,t),y(z,l,t),s(l,nt,t),s(l,D,t),s(l,at,t),s(l,L,t),s(l,pt,t),s(l,q,t),s(l,Mt,t),y(O,l,t),s(l,it,t),s(l,P,t),s(l,Ut,t),s(l,_l,t),s(l,mt,t),s(l,K,t),s(l,Jt,t),s(l,ll,t),s(l,ct,t),y(tl,l,t),s(l,yt,t),s(l,el,t),s(l,ot,t),s(l,sl,t),s(l,wt,t),y(Z,l,t),s(l,rt,t),s(l,nl,t),s(l,Tt,t),y(al,l,t),s(l,ut,t),s(l,pl,t),s(l,dt,t),s(l,$l,t),s(l,ft,t),s(l,Ml,t),s(l,jt,t),s(l,il,t),s(l,ht,t),s(l,Ul,t),s(l,Ct,t),s(l,ml,t),s(l,bt,t),y(Jl,l,t),s(l,It,t),s(l,cl,t),s(l,Zt,t),y(yl,l,t),s(l,Bt,t),y(B,l,t),s(l,Rt,t),y(ol,l,t),s(l,Qt,t),s(l,wl,t),s(l,Vt,t),s(l,rl,t),s(l,Gt,t),y(Tl,l,t),s(l,Et,t),y(ul,l,t),s(l,_t,t),s(l,dl,t),s(l,$t,t),s(l,fl,t),s(l,Wt,t),y(jl,l,t),s(l,gt,t),s(l,hl,t),s(l,vt,t),y(Cl,l,t),s(l,St,t),y(bl,l,t),s(l,kt,t),s(l,Il,t),s(l,Xt,t),s(l,Zl,t),s(l,Nt,t),s(l,Bl,t),s(l,Ft,t),s(l,Rl,t),s(l,At,t),s(l,Ql,t),s(l,xt,t),y(Vl,l,t),s(l,Yt,t),s(l,Wl,t),Ht=!0},p(l,[t]){const Ge={};t&2&&(Ge.$$scope={dirty:t,ctx:l}),b.$set(Ge);const Ee={};t&2&&(Ee.$$scope={dirty:t,ctx:l}),I.$set(Ee);const _e={};t&2&&(_e.$$scope={dirty:t,ctx:l}),Z.$set(_e);const $e={};t&2&&($e.$$scope={dirty:t,ctx:l}),B.$set($e)},i(l){Ht||(o(T.$$.fragment,l),o(E.$$.fragment,l),o(W.$$.fragment,l),o(g.$$.fragment,l),o(S.$$.fragment,l),o(k.$$.fragment,l),o(N.$$.fragment,l),o(b.$$.fragment,l),o(F.$$.fragment,l),o(I.$$.fragment,l),o(Y.$$.fragment,l),o(z.$$.fragment,l),o(O.$$.fragment,l),o(tl.$$.fragment,l),o(Z.$$.fragment,l),o(al.$$.fragment,l),o(Jl.$$.fragment,l),o(yl.$$.fragment,l),o(B.$$.fragment,l),o(ol.$$.fragment,l),o(Tl.$$.fragment,l),o(ul.$$.fragment,l),o(jl.$$.fragment,l),o(Cl.$$.fragment,l),o(bl.$$.fragment,l),o(Vl.$$.fragment,l),Ht=!0)},o(l){w(T.$$.fragment,l),w(E.$$.fragment,l),w(W.$$.fragment,l),w(g.$$.fragment,l),w(S.$$.fragment,l),w(k.$$.fragment,l),w(N.$$.fragment,l),w(b.$$.fragment,l),w(F.$$.fragment,l),w(I.$$.fragment,l),w(Y.$$.fragment,l),w(z.$$.fragment,l),w(O.$$.fragment,l),w(tl.$$.fragment,l),w(Z.$$.fragment,l),w(al.$$.fragment,l),w(Jl.$$.fragment,l),w(yl.$$.fragment,l),w(B.$$.fragment,l),w(ol.$$.fragment,l),w(Tl.$$.fragment,l),w(ul.$$.fragment,l),w(jl.$$.fragment,l),w(Cl.$$.fragment,l),w(bl.$$.fragment,l),w(Vl.$$.fragment,l),Ht=!1},d(l){l&&(e(U),e(u),e(j),e(C),e(R),e(vl),e(Q),e(Sl),e(V),e(kl),e(G),e(Xl),e(Nl),e(_),e(Fl),e($),e(Al),e(xl),e(Yl),e(v),e(Hl),e(zl),e(Dl),e(X),e(Ll),e(ql),e(Ol),e(Pl),e(A),e(Kl),e(lt),e(x),e(tt),e(et),e(H),e(st),e(nt),e(D),e(at),e(L),e(pt),e(q),e(Mt),e(it),e(P),e(Ut),e(_l),e(mt),e(K),e(Jt),e(ll),e(ct),e(yt),e(el),e(ot),e(sl),e(wt),e(rt),e(nl),e(Tt),e(ut),e(pl),e(dt),e($l),e(ft),e(Ml),e(jt),e(il),e(ht),e(Ul),e(Ct),e(ml),e(bt),e(It),e(cl),e(Zt),e(Bt),e(Rt),e(Qt),e(wl),e(Vt),e(rl),e(Gt),e(Et),e(_t),e(dl),e($t),e(fl),e(Wt),e(gt),e(hl),e(vt),e(St),e(kt),e(Il),e(Xt),e(Zl),e(Nt),e(Bl),e(Ft),e(Rl),e(At),e(Ql),e(xt),e(Yt),e(Wl)),e(m),r(T,l),r(E,l),r(W,l),r(g,l),r(S,l),r(k,l),r(N,l),r(b,l),r(F,l),r(I,l),r(Y,l),r(z,l),r(O,l),r(tl,l),r(Z,l),r(al,l),r(Jl,l),r(yl,l),r(B,l),r(ol,l),r(Tl,l),r(ul,l),r(jl,l),r(Cl,l),r(bl,l),r(Vl,l)}}}const ze='{"title":"메모리와 속도","local":"메모리와-속도","sections":[{"title":"cuDNN auto-tuner 활성화하기","local":"cudnn-auto-tuner-활성화하기","sections":[{"title":"fp32 대신 tf32 사용하기 (Ampere 및 이후 CUDA 장치들에서)","local":"fp32-대신-tf32-사용하기-ampere-및-이후-cuda-장치들에서","sections":[],"depth":3}],"depth":2},{"title":"반정밀도 가중치","local":"반정밀도-가중치","sections":[],"depth":2},{"title":"추가 메모리 절약을 위한 슬라이스 어텐션","local":"추가-메모리-절약을-위한-슬라이스-어텐션","sections":[],"depth":2},{"title":"더 큰 배치를 위한 sliced VAE 디코드","local":"더-큰-배치를-위한-sliced-vae-디코드","sections":[],"depth":2},{"title":"Channels Last 메모리 형식 사용하기","local":"channels-last-메모리-형식-사용하기","sections":[],"depth":2},{"title":"추적(tracing)","local":"추적tracing","sections":[],"depth":2},{"title":"Memory-efficient attention","local":"memory-efficient-attention","sections":[],"depth":2}],"depth":1}';function De(f){return ge(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ls extends Se{constructor(m){super(),ke(this,m,De,He,We,{})}}export{ls as component}; | |
Xet Storage Details
- Size:
- 49.5 kB
- Xet hash:
- 157ea530fb3c37219e3229cd1d73eb8e14de2f5c3b07d417c3183c861c7fb739
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.