Buckets:
| import{s as bt,n as ut,o as Mt}from"../chunks/scheduler.e4ff9b64.js";import{S as dt,i as ht,e as p,s as n,c as Z,h as wt,a as i,d as l,b as s,f as ct,g,j as o,k as O,l as Jt,m as a,n as $,t as X,o as k,p as W}from"../chunks/index.09f1bca0.js";import{C as yt,H as tt,E as _t}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.03f148a7.js";import{C as ft}from"../chunks/CodeBlock.84d3c589.js";function Tt(et){let m,x,B,Y,c,N,f,R,b,lt="缓存通过存储和重用不同层的中间输出(如注意力层和前馈层)来加速推理,而不是在每个推理步骤执行整个计算。它显著提高了生成速度,但以更多内存为代价,并且不需要额外的训练。",v,u,at="本指南向您展示如何在 Diffusers 中使用支持的缓存方法。",F,M,I,d,nt='<a href="https://huggingface.co/papers/2408.12588" rel="nofollow">金字塔注意力广播 (PAB)</a> 基于这样一种观察:在生成过程的连续时间步之间,注意力输出差异不大。注意力差异在交叉注意力层中最小,并且通常在一个较长的时间步范围内被缓存。其次是时间注意力和空间注意力层。',Q,r,st="<p>并非所有视频模型都有三种类型的注意力(交叉、时间和空间)!</p>",E,h,pt="PAB 可以与其他技术(如序列并行性和无分类器引导并行性(数据并行性))结合,实现近乎实时的视频生成。",V,w,it="设置并传递一个 <code>PyramidAttentionBroadcastConfig</code> 到管道的变换器以启用它。<code>spatial_attention_block_skip_range</code> 控制跳过空间注意力块中注意力计算的频率,<code>spatial_attention_timestep_skip_range</code> 是要跳过的时间步范围。注意选择一个合适的范围,因为较小的间隔可能导致推理速度变慢,而较大的间隔可能导致生成质量降低。",H,J,P,y,S,_,mt='<a href="https://huggingface.co/papers/2410.19355" rel="nofollow">FasterCache</a> 缓存并重用注意力特征,类似于 <a href="#pyramid-attention-broadcast">PAB</a>,因为每个连续时间步的输出差异很小。',z,T,ot=`此方法在使用无分类器引导进行采样时(在大多数基础模型中常见),也可能选择跳过无条件分支预测,并且 | |
| 如果连续时间步之间的预测潜在输出存在显著冗余,则从条件分支预测中估计它。`,A,j,rt="设置并将 <code>FasterCacheConfig</code> 传递给管道的 transformer 以启用它。",L,C,D,U,q,G,K;return c=new yt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),f=new tt({props:{title:"缓存",local:"缓存",headingTag:"h1"}}),M=new tt({props:{title:"金字塔注意力广播",local:"金字塔注意力广播",headingTag:"h2"}}),J=new ft({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQ29nVmlkZW9YUGlwZWxpbmUlMkMlMjBQeXJhbWlkQXR0ZW50aW9uQnJvYWRjYXN0Q29uZmlnJTBBJTBBcGlwZWxpbmUlMjAlM0QlMjBDb2dWaWRlb1hQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTIyVEhVRE0lMkZDb2dWaWRlb1gtNWIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGVsaW5lLnRvKCUyMmN1ZGElMjIpJTBBJTBBY29uZmlnJTIwJTNEJTIwUHlyYW1pZEF0dGVudGlvbkJyb2FkY2FzdENvbmZpZyglMEElMjAlMjAlMjAlMjBzcGF0aWFsX2F0dGVudGlvbl9ibG9ja19za2lwX3JhbmdlJTNEMiUyQyUwQSUyMCUyMCUyMCUyMHNwYXRpYWxfYXR0ZW50aW9uX3RpbWVzdGVwX3NraXBfcmFuZ2UlM0QoMTAwJTJDJTIwODAwKSUyQyUwQSUyMCUyMCUyMCUyMGN1cnJlbnRfdGltZXN0ZXBfY2FsbGJhY2slM0RsYW1iZGElM0ElMjBwaXBlLmN1cnJlbnRfdGltZXN0ZXAlMkMlMEEpJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIuZW5hYmxlX2NhY2hlKGNvbmZpZyk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> CogVideoXPipeline, PyramidAttentionBroadcastConfig | |
| pipeline = CogVideoXPipeline.from_pretrained(<span class="hljs-string">"THUDM/CogVideoX-5b"</span>, torch_dtype=torch.bfloat16) | |
| pipeline.to(<span class="hljs-string">"cuda"</span>) | |
| config = PyramidAttentionBroadcastConfig( | |
| spatial_attention_block_skip_range=<span class="hljs-number">2</span>, | |
| spatial_attention_timestep_skip_range=(<span class="hljs-number">100</span>, <span class="hljs-number">800</span>), | |
| current_timestep_callback=<span class="hljs-keyword">lambda</span>: pipe.current_timestep, | |
| ) | |
| pipeline.transformer.enable_cache(config)`,wrap:!1}}),y=new tt({props:{title:"FasterCache",local:"fastercache",headingTag:"h2"}}),C=new ft({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQ29nVmlkZW9YUGlwZWxpbmUlMkMlMjBGYXN0ZXJDYWNoZUNvbmZpZyUwQSUwQXBpcGUlMjBsaW5lJTNEJTIwQ29nVmlkZW9YUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUyMlRIVURNJTJGQ29nVmlkZW9YLTViJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlbGluZS50byglMjJjdWRhJTIyKSUwQSUwQWNvbmZpZyUyMCUzRCUyMEZhc3RlckNhY2hlQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHNwYXRpYWxfYXR0ZW50aW9uX2Jsb2NrX3NraXBfcmFuZ2UlM0QyJTJDJTBBJTIwJTIwJTIwJTIwc3BhdGlhbF9hdHRlbnRpb25fdGltZXN0ZXBfc2tpcF9yYW5nZSUzRCgtMSUyQyUyMDY4MSklMkMlMEElMjAlMjAlMjAlMjBjdXJyZW50X3RpbWVzdGVwX2NhbGxiYWNrJTNEbGFtYmRhJTNBJTIwcGlwZS5jdXJyZW50X3RpbWVzdGVwJTJDJTBBJTIwJTIwJTIwJTIwYXR0ZW50aW9uX3dlaWdodF9jYWxsYmFjayUzRGxhbWJkYSUyMF8lM0ElMjAwLjMlMkMlMEElMjAlMjAlMjAlMjB1bmNvbmRpdGlvbmFsX2JhdGNoX3NraXBfcmFuZ2UlM0Q1JTJDJTBBJTIwJTIwJTIwJTIwdW5jb25kaXRpb25hbF9iYXRjaF90aW1lc3RlcF9za2lwX3JhbmdlJTNEKC0xJTJDJTIwNzgxKSUyQyUwQSUyMCUyMCUyMCUyMHRlbnNvcl9mb3JtYXQlM0QlMjJCRkNIVyUyMiUyQyUwQSklMEFwaXBlbGluZS50cmFuc2Zvcm1lci5lbmFibGVfY2FjaGUoY29uZmlnKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> CogVideoXPipeline, FasterCacheConfig | |
| pipe line= CogVideoXPipeline.from_pretrained(<span class="hljs-string">"THUDM/CogVideoX-5b"</span>, torch_dtype=torch.bfloat16) | |
| pipeline.to(<span class="hljs-string">"cuda"</span>) | |
| config = FasterCacheConfig( | |
| spatial_attention_block_skip_range=<span class="hljs-number">2</span>, | |
| spatial_attention_timestep_skip_range=(-<span class="hljs-number">1</span>, <span class="hljs-number">681</span>), | |
| current_timestep_callback=<span class="hljs-keyword">lambda</span>: pipe.current_timestep, | |
| attention_weight_callback=<span class="hljs-keyword">lambda</span> _: <span class="hljs-number">0.3</span>, | |
| unconditional_batch_skip_range=<span class="hljs-number">5</span>, | |
| unconditional_batch_timestep_skip_range=(-<span class="hljs-number">1</span>, <span class="hljs-number">781</span>), | |
| tensor_format=<span class="hljs-string">"BFCHW"</span>, | |
| ) | |
| pipeline.transformer.enable_cache(config)`,wrap:!1}}),U=new _t({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/optimization/cache.md"}}),{c(){m=p("meta"),x=n(),B=p("p"),Y=n(),Z(c.$$.fragment),N=n(),Z(f.$$.fragment),R=n(),b=p("p"),b.textContent=lt,v=n(),u=p("p"),u.textContent=at,F=n(),Z(M.$$.fragment),I=n(),d=p("p"),d.innerHTML=nt,Q=n(),r=p("blockquote"),r.innerHTML=st,E=n(),h=p("p"),h.textContent=pt,V=n(),w=p("p"),w.innerHTML=it,H=n(),Z(J.$$.fragment),P=n(),Z(y.$$.fragment),S=n(),_=p("p"),_.innerHTML=mt,z=n(),T=p("p"),T.textContent=ot,A=n(),j=p("p"),j.innerHTML=rt,L=n(),Z(C.$$.fragment),D=n(),Z(U.$$.fragment),q=n(),G=p("p"),this.h()},l(t){const e=wt("svelte-u9bgzb",document.head);m=i(e,"META",{name:!0,content:!0}),e.forEach(l),x=s(t),B=i(t,"P",{}),ct(B).forEach(l),Y=s(t),g(c.$$.fragment,t),N=s(t),g(f.$$.fragment,t),R=s(t),b=i(t,"P",{"data-svelte-h":!0}),o(b)!=="svelte-1f1nqg4"&&(b.textContent=lt),v=s(t),u=i(t,"P",{"data-svelte-h":!0}),o(u)!=="svelte-1onzyg1"&&(u.textContent=at),F=s(t),g(M.$$.fragment,t),I=s(t),d=i(t,"P",{"data-svelte-h":!0}),o(d)!=="svelte-3i3btm"&&(d.innerHTML=nt),Q=s(t),r=i(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),o(r)!=="svelte-r7ebaz"&&(r.innerHTML=st),E=s(t),h=i(t,"P",{"data-svelte-h":!0}),o(h)!=="svelte-1js4ud9"&&(h.textContent=pt),V=s(t),w=i(t,"P",{"data-svelte-h":!0}),o(w)!=="svelte-jg5pkc"&&(w.innerHTML=it),H=s(t),g(J.$$.fragment,t),P=s(t),g(y.$$.fragment,t),S=s(t),_=i(t,"P",{"data-svelte-h":!0}),o(_)!=="svelte-1op45ku"&&(_.innerHTML=mt),z=s(t),T=i(t,"P",{"data-svelte-h":!0}),o(T)!=="svelte-1dt33dj"&&(T.textContent=ot),A=s(t),j=i(t,"P",{"data-svelte-h":!0}),o(j)!=="svelte-1emi3m1"&&(j.innerHTML=rt),L=s(t),g(C.$$.fragment,t),D=s(t),g(U.$$.fragment,t),q=s(t),G=i(t,"P",{}),ct(G).forEach(l),this.h()},h(){O(m,"name","hf:doc:metadata"),O(m,"content",jt),O(r,"class","tip")},m(t,e){Jt(document.head,m),a(t,x,e),a(t,B,e),a(t,Y,e),$(c,t,e),a(t,N,e),$(f,t,e),a(t,R,e),a(t,b,e),a(t,v,e),a(t,u,e),a(t,F,e),$(M,t,e),a(t,I,e),a(t,d,e),a(t,Q,e),a(t,r,e),a(t,E,e),a(t,h,e),a(t,V,e),a(t,w,e),a(t,H,e),$(J,t,e),a(t,P,e),$(y,t,e),a(t,S,e),a(t,_,e),a(t,z,e),a(t,T,e),a(t,A,e),a(t,j,e),a(t,L,e),$(C,t,e),a(t,D,e),$(U,t,e),a(t,q,e),a(t,G,e),K=!0},p:ut,i(t){K||(X(c.$$.fragment,t),X(f.$$.fragment,t),X(M.$$.fragment,t),X(J.$$.fragment,t),X(y.$$.fragment,t),X(C.$$.fragment,t),X(U.$$.fragment,t),K=!0)},o(t){k(c.$$.fragment,t),k(f.$$.fragment,t),k(M.$$.fragment,t),k(J.$$.fragment,t),k(y.$$.fragment,t),k(C.$$.fragment,t),k(U.$$.fragment,t),K=!1},d(t){t&&(l(x),l(B),l(Y),l(N),l(R),l(b),l(v),l(u),l(F),l(I),l(d),l(Q),l(r),l(E),l(h),l(V),l(w),l(H),l(P),l(S),l(_),l(z),l(T),l(A),l(j),l(L),l(D),l(q),l(G)),l(m),W(c,t),W(f,t),W(M,t),W(J,t),W(y,t),W(C,t),W(U,t)}}}const jt='{"title":"缓存","local":"缓存","sections":[{"title":"金字塔注意力广播","local":"金字塔注意力广播","sections":[],"depth":2},{"title":"FasterCache","local":"fastercache","sections":[],"depth":2}],"depth":1}';function Ct(et){return Mt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Xt extends dt{constructor(m){super(),ht(this,m,Ct,Tt,bt,{})}}export{Xt as component}; | |
Xet Storage Details
- Size:
- 9.67 kB
- Xet hash:
- acbc3b93b1ad8d3e6ec7913e3890f9119ea38c84f9cbf0a161198dfc6e9a9a33
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.