Buckets:

rtrm's picture
download
raw
11.9 kB
import{s as yt,n as wt,o as vt}from"../chunks/scheduler.8c3d61f6.js";import{S as Ot,i as Jt,g as l,s as i,r as _,A as Ut,h as s,f as d,c as n,j as bt,u as Z,x as o,k as rt,y as $t,a as r,v as j,d as x,t as B,w as L}from"../chunks/index.da70eac4.js";import{C as it}from"../chunks/CodeBlock.00a903b3.js";import{H as Tt,E as Ht}from"../chunks/EditOnGithub.1e64e623.js";function _t(nt){let a,k,C,G,f,P,m,lt='<a href="https://huggingface.co/papers/2303.17604" rel="nofollow">Token merging</a> (ToMe) merges redundant tokens/patches progressively in the forward pass of a Transformer-based network which can speed-up the inference latency of <a href="/docs/diffusers/pr_10101/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a>.',I,u,st="Install ToMe from <code>pip</code>:",V,h,E,c,ot='You can use ToMe from the <a href="https://github.com/dbolya/tomesd" rel="nofollow"><code>tomesd</code></a> library with the <a href="https://github.com/dbolya/tomesd?tab=readme-ov-file#usage" rel="nofollow"><code>apply_patch</code></a> function:',z,M,A,g,at='The <code>apply_patch</code> function exposes a number of <a href="https://github.com/dbolya/tomesd#usage" rel="nofollow">arguments</a> to help strike a balance between pipeline inference speed and the quality of the generated tokens. The most important argument is <code>ratio</code> which controls the number of tokens that are merged during the forward pass.',W,b,pt='As reported in the <a href="https://huggingface.co/papers/2303.17604" rel="nofollow">paper</a>, ToMe can greatly preserve the quality of the generated images while boosting inference speed. By increasing the <code>ratio</code>, you can speed-up inference even further, but at the cost of some degraded image quality.',F,T,ft='To test the quality of the generated images, we sampled a few prompts from <a href="https://parti.research.google/" rel="nofollow">Parti Prompts</a> and performed inference with the <a href="/docs/diffusers/pr_10101/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a> with the following settings:',R,p,mt='<img src="https://huggingface.co/datasets/diffusers/docs-images/resolve/main/tome/tome_samples.png"/>',Y,y,ut='We didn’t notice any significant decrease in the quality of the generated samples, and you can check out the generated samples in this <a href="https://wandb.ai/sayakpaul/tomesd-results/runs/23j4bj3i?workspace=" rel="nofollow">WandB report</a>. If you’re interested in reproducing this experiment, use this <a href="https://gist.github.com/sayakpaul/8cac98d7f22399085a060992f411ecbd" rel="nofollow">script</a>.',N,w,q,v,ht='We also benchmarked the impact of <code>tomesd</code> on the <a href="/docs/diffusers/pr_10101/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a> with <a href="https://huggingface.co/docs/diffusers/optimization/xformers" rel="nofollow">xFormers</a> enabled across several image resolutions. The results are obtained from A100 and V100 GPUs in the following development environment:',Q,O,D,J,ct='To reproduce this benchmark, feel free to use this <a href="https://gist.github.com/sayakpaul/27aec6bca7eb7b0e0aa4112205850335" rel="nofollow">script</a>. The results are reported in seconds, and where applicable we report the speed-up percentage over the vanilla pipeline when using ToMe and ToMe + xFormers.',X,U,Mt="<thead><tr><th><strong>GPU</strong></th> <th><strong>Resolution</strong></th> <th><strong>Batch size</strong></th> <th><strong>Vanilla</strong></th> <th><strong>ToMe</strong></th> <th><strong>ToMe + xFormers</strong></th></tr></thead> <tbody><tr><td><strong>A100</strong></td> <td>512</td> <td>10</td> <td>6.88</td> <td>5.26 (+23.55%)</td> <td>4.69 (+31.83%)</td></tr> <tr><td></td> <td>768</td> <td>10</td> <td>OOM</td> <td>14.71</td> <td>11</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>11.56</td> <td>8.84</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>5.98</td> <td>4.66</td></tr> <tr><td></td> <td></td> <td>2</td> <td>4.99</td> <td>3.24 (+35.07%)</td> <td>2.1 (+37.88%)</td></tr> <tr><td></td> <td></td> <td>1</td> <td>3.29</td> <td>2.24 (+31.91%)</td> <td>2.03 (+38.3%)</td></tr> <tr><td></td> <td>1024</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>12.51</td> <td>9.09</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>6.52</td> <td>4.96</td></tr> <tr><td></td> <td></td> <td>1</td> <td>6.4</td> <td>3.61 (+43.59%)</td> <td>2.81 (+56.09%)</td></tr> <tr><td><strong>V100</strong></td> <td>512</td> <td>10</td> <td>OOM</td> <td>10.03</td> <td>9.29</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>8.05</td> <td>7.47</td></tr> <tr><td></td> <td></td> <td>4</td> <td>5.7</td> <td>4.3 (+24.56%)</td> <td>3.98 (+30.18%)</td></tr> <tr><td></td> <td></td> <td>2</td> <td>3.14</td> <td>2.43 (+22.61%)</td> <td>2.27 (+27.71%)</td></tr> <tr><td></td> <td></td> <td>1</td> <td>1.88</td> <td>1.57 (+16.49%)</td> <td>1.57 (+16.49%)</td></tr> <tr><td></td> <td>768</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>23.67</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>18.81</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>11.81</td> <td>9.7</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>6.27</td> <td>5.2</td></tr> <tr><td></td> <td></td> <td>1</td> <td>5.43</td> <td>3.38 (+37.75%)</td> <td>2.82 (+48.07%)</td></tr> <tr><td></td> <td>1024</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>OOM</td> <td>19.35</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>13</td> <td>10.78</td></tr> <tr><td></td> <td></td> <td>1</td> <td>OOM</td> <td>6.66</td> <td>5.54</td></tr></tbody>",K,$,gt='As seen in the tables above, the speed-up from <code>tomesd</code> becomes more pronounced for larger image resolutions. It is also interesting to note that with <code>tomesd</code>, it is possible to run the pipeline on a higher resolution like 1024x1024. You may be able to speed-up inference even more with <a href="torch2.0"><code>torch.compile</code></a>.',tt,H,et,S,dt;return f=new Tt({props:{title:"Token merging",local:"token-merging",headingTag:"h1"}}),h=new it({props:{code:"cGlwJTIwaW5zdGFsbCUyMHRvbWVzZA==",highlighted:"pip install tomesd",wrap:!1}}),M=new it({props:{code:"JTIwJTIwZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBJTIwJTIwaW1wb3J0JTIwdG9yY2glMEElMjAlMjBpbXBvcnQlMjB0b21lc2QlMEElMEElMjAlMjBwaXBlbGluZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMjB1c2Vfc2FmZXRlbnNvcnMlM0RUcnVlJTJDJTBBJTIwJTIwKS50byglMjJjdWRhJTIyKSUwQSUyQiUyMHRvbWVzZC5hcHBseV9wYXRjaChwaXBlbGluZSUyQyUyMHJhdGlvJTNEMC41KSUwQSUwQSUyMCUyMGltYWdlJTIwJTNEJTIwcGlwZWxpbmUoJTIyYSUyMHBob3RvJTIwb2YlMjBhbiUyMGFzdHJvbmF1dCUyMHJpZGluZyUyMGElMjBob3JzZSUyMG9uJTIwbWFycyUyMikuaW1hZ2VzJTVCMCU1RA==",highlighted:` from diffusers import StableDiffusionPipeline
import torch
import tomesd
pipeline = StableDiffusionPipeline.from_pretrained(
&quot;stable-diffusion-v1-5/stable-diffusion-v1-5&quot;, torch_dtype=torch.float16, use_safetensors=True,
).to(&quot;cuda&quot;)
<span class="hljs-addition">+ tomesd.apply_patch(pipeline, ratio=0.5)</span>
image = pipeline(&quot;a photo of an astronaut riding a horse on mars&quot;).images[0]`,wrap:!1}}),w=new Tt({props:{title:"Benchmarks",local:"benchmarks",headingTag:"h2"}}),O=new it({props:{code:"LSUyMCU2MGRpZmZ1c2VycyU2MCUyMHZlcnNpb24lM0ElMjAwLjE1LjElMEEtJTIwUHl0aG9uJTIwdmVyc2lvbiUzQSUyMDMuOC4xNiUwQS0lMjBQeVRvcmNoJTIwdmVyc2lvbiUyMChHUFUlM0YpJTNBJTIwMS4xMy4xJTJCY3UxMTYlMjAoVHJ1ZSklMEEtJTIwSHVnZ2luZ2ZhY2VfaHViJTIwdmVyc2lvbiUzQSUyMDAuMTMuMiUwQS0lMjBUcmFuc2Zvcm1lcnMlMjB2ZXJzaW9uJTNBJTIwNC4yNy4yJTBBLSUyMEFjY2VsZXJhdGUlMjB2ZXJzaW9uJTNBJTIwMC4xOC4wJTBBLSUyMHhGb3JtZXJzJTIwdmVyc2lvbiUzQSUyMDAuMC4xNiUwQS0lMjB0b21lc2QlMjB2ZXJzaW9uJTNBJTIwMC4xLjI=",highlighted:`- \`diffusers\` version: 0.15.1
- Python version: 3.8.16
- PyTorch version (GPU?): 1.13.1+cu116 (True)
- Huggingface_hub version: 0.13.2
- Transformers version: 4.27.2
- Accelerate version: 0.18.0
- xFormers version: 0.0.16
- tomesd version: 0.1.2`,wrap:!1}}),H=new Ht({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/optimization/tome.md"}}),{c(){a=l("meta"),k=i(),C=l("p"),G=i(),_(f.$$.fragment),P=i(),m=l("p"),m.innerHTML=lt,I=i(),u=l("p"),u.innerHTML=st,V=i(),_(h.$$.fragment),E=i(),c=l("p"),c.innerHTML=ot,z=i(),_(M.$$.fragment),A=i(),g=l("p"),g.innerHTML=at,W=i(),b=l("p"),b.innerHTML=pt,F=i(),T=l("p"),T.innerHTML=ft,R=i(),p=l("div"),p.innerHTML=mt,Y=i(),y=l("p"),y.innerHTML=ut,N=i(),_(w.$$.fragment),q=i(),v=l("p"),v.innerHTML=ht,Q=i(),_(O.$$.fragment),D=i(),J=l("p"),J.innerHTML=ct,X=i(),U=l("table"),U.innerHTML=Mt,K=i(),$=l("p"),$.innerHTML=gt,tt=i(),_(H.$$.fragment),et=i(),S=l("p"),this.h()},l(t){const e=Ut("svelte-u9bgzb",document.head);a=s(e,"META",{name:!0,content:!0}),e.forEach(d),k=n(t),C=s(t,"P",{}),bt(C).forEach(d),G=n(t),Z(f.$$.fragment,t),P=n(t),m=s(t,"P",{"data-svelte-h":!0}),o(m)!=="svelte-10o8r46"&&(m.innerHTML=lt),I=n(t),u=s(t,"P",{"data-svelte-h":!0}),o(u)!=="svelte-l5huoo"&&(u.innerHTML=st),V=n(t),Z(h.$$.fragment,t),E=n(t),c=s(t,"P",{"data-svelte-h":!0}),o(c)!=="svelte-1q1myt5"&&(c.innerHTML=ot),z=n(t),Z(M.$$.fragment,t),A=n(t),g=s(t,"P",{"data-svelte-h":!0}),o(g)!=="svelte-dcsdl5"&&(g.innerHTML=at),W=n(t),b=s(t,"P",{"data-svelte-h":!0}),o(b)!=="svelte-rxgnrl"&&(b.innerHTML=pt),F=n(t),T=s(t,"P",{"data-svelte-h":!0}),o(T)!=="svelte-hr96eo"&&(T.innerHTML=ft),R=n(t),p=s(t,"DIV",{class:!0,"data-svelte-h":!0}),o(p)!=="svelte-ng3g1s"&&(p.innerHTML=mt),Y=n(t),y=s(t,"P",{"data-svelte-h":!0}),o(y)!=="svelte-1skh0rp"&&(y.innerHTML=ut),N=n(t),Z(w.$$.fragment,t),q=n(t),v=s(t,"P",{"data-svelte-h":!0}),o(v)!=="svelte-ua4b9e"&&(v.innerHTML=ht),Q=n(t),Z(O.$$.fragment,t),D=n(t),J=s(t,"P",{"data-svelte-h":!0}),o(J)!=="svelte-5yzaqq"&&(J.innerHTML=ct),X=n(t),U=s(t,"TABLE",{"data-svelte-h":!0}),o(U)!=="svelte-1dvc6a"&&(U.innerHTML=Mt),K=n(t),$=s(t,"P",{"data-svelte-h":!0}),o($)!=="svelte-v0a950"&&($.innerHTML=gt),tt=n(t),Z(H.$$.fragment,t),et=n(t),S=s(t,"P",{}),bt(S).forEach(d),this.h()},h(){rt(a,"name","hf:doc:metadata"),rt(a,"content",Zt),rt(p,"class","flex justify-center")},m(t,e){$t(document.head,a),r(t,k,e),r(t,C,e),r(t,G,e),j(f,t,e),r(t,P,e),r(t,m,e),r(t,I,e),r(t,u,e),r(t,V,e),j(h,t,e),r(t,E,e),r(t,c,e),r(t,z,e),j(M,t,e),r(t,A,e),r(t,g,e),r(t,W,e),r(t,b,e),r(t,F,e),r(t,T,e),r(t,R,e),r(t,p,e),r(t,Y,e),r(t,y,e),r(t,N,e),j(w,t,e),r(t,q,e),r(t,v,e),r(t,Q,e),j(O,t,e),r(t,D,e),r(t,J,e),r(t,X,e),r(t,U,e),r(t,K,e),r(t,$,e),r(t,tt,e),j(H,t,e),r(t,et,e),r(t,S,e),dt=!0},p:wt,i(t){dt||(x(f.$$.fragment,t),x(h.$$.fragment,t),x(M.$$.fragment,t),x(w.$$.fragment,t),x(O.$$.fragment,t),x(H.$$.fragment,t),dt=!0)},o(t){B(f.$$.fragment,t),B(h.$$.fragment,t),B(M.$$.fragment,t),B(w.$$.fragment,t),B(O.$$.fragment,t),B(H.$$.fragment,t),dt=!1},d(t){t&&(d(k),d(C),d(G),d(P),d(m),d(I),d(u),d(V),d(E),d(c),d(z),d(A),d(g),d(W),d(b),d(F),d(T),d(R),d(p),d(Y),d(y),d(N),d(q),d(v),d(Q),d(D),d(J),d(X),d(U),d(K),d($),d(tt),d(et),d(S)),d(a),L(f,t),L(h,t),L(M,t),L(w,t),L(O,t),L(H,t)}}}const Zt='{"title":"Token merging","local":"token-merging","sections":[{"title":"Benchmarks","local":"benchmarks","sections":[],"depth":2}],"depth":1}';function jt(nt){return vt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class St extends Ot{constructor(a){super(),Jt(this,a,jt,_t,yt,{})}}export{St as component};

Xet Storage Details

Size:
11.9 kB
·
Xet hash:
f0144d4124646416509b3979d44ace7d07cd1f2d69f0c00604b66b114700a8a3

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.