Buckets:

rtrm's picture
download
raw
11.9 kB
import{s as wt,n as yt,o as vt}from"../chunks/scheduler.182ea377.js";import{S as Jt,i as Ot,g as l,s as r,r as j,A as $t,h as s,f as d,c as n,j as Tt,u as H,x as o,k as it,y as Ut,a as i,v as x,d as _,t as Z,w as L}from"../chunks/index.abf12888.js";import{C as rt}from"../chunks/CodeBlock.57fe6e13.js";import{H as bt,E as Bt}from"../chunks/EditOnGithub.9b8e78e4.js";function jt(nt){let a,I,k,P,f,S,m,lt='<a href="https://huggingface.co/papers/2303.17604" rel="nofollow">Token merging</a> (ToMe) merges redundant tokens/patches progressively in the forward pass of a Transformer-based network which can speed-up the inference latency of <a href="/docs/diffusers/v0.28.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a>.',E,u,st="Install ToMe from <code>pip</code>:",G,c,A,h,ot='You can use ToMe from the <a href="https://github.com/dbolya/tomesd" rel="nofollow"><code>tomesd</code></a> library with the <a href="https://github.com/dbolya/tomesd?tab=readme-ov-file#usage" rel="nofollow"><code>apply_patch</code></a> function:',V,M,N,g,at='The <code>apply_patch</code> function exposes a number of <a href="https://github.com/dbolya/tomesd#usage" rel="nofollow">arguments</a> to help strike a balance between pipeline inference speed and the quality of the generated tokens. The most important argument is <code>ratio</code> which controls the number of tokens that are merged during the forward pass.',W,T,pt='As reported in the <a href="https://huggingface.co/papers/2303.17604" rel="nofollow">paper</a>, ToMe can greatly preserve the quality of the generated images while boosting inference speed. By increasing the <code>ratio</code>, you can speed-up inference even further, but at the cost of some degraded image quality.',z,b,ft='To test the quality of the generated images, we sampled a few prompts from <a href="https://parti.research.google/" rel="nofollow">Parti Prompts</a> and performed inference with the <a href="/docs/diffusers/v0.28.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a> with the following settings:',X,p,mt='<img src="https://huggingface.co/datasets/diffusers/docs-images/resolve/main/tome/tome_samples.png"/>',Y,w,ut='We didn’t notice any significant decrease in the quality of the generated samples, and you can check out the generated samples in this <a href="https://wandb.ai/sayakpaul/tomesd-results/runs/23j4bj3i?workspace=" rel="nofollow">WandB report</a>. If you’re interested in reproducing this experiment, use this <a href="https://gist.github.com/sayakpaul/8cac98d7f22399085a060992f411ecbd" rel="nofollow">script</a>.',q,y,F,v,ct='We also benchmarked the impact of <code>tomesd</code> on the <a href="/docs/diffusers/v0.28.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a> with <a href="https://huggingface.co/docs/diffusers/optimization/xformers" rel="nofollow">xFormers</a> enabled across several image resolutions. The results are obtained from A100 and V100 GPUs in the following development environment:',Q,J,R,O,ht='To reproduce this benchmark, feel free to use this <a href="https://gist.github.com/sayakpaul/27aec6bca7eb7b0e0aa4112205850335" rel="nofollow">script</a>. The results are reported in seconds, and where applicable we report the speed-up percentage over the vanilla pipeline when using ToMe and ToMe + xFormers.',D,$,Mt="<thead><tr><th><strong>GPU</strong></th> <th><strong>Resolution</strong></th> <th><strong>Batch size</strong></th> <th><strong>Vanilla</strong></th> <th><strong>ToMe</strong></th> <th><strong>ToMe + xFormers</strong></th></tr></thead> <tbody><tr><td><strong>A100</strong></td> <td>512</td> <td>10</td> <td>6.88</td> <td>5.26 (+23.55%)</td> <td>4.69 (+31.83%)</td></tr> <tr><td></td> <td>768</td> <td>10</td> <td>OOM</td> <td>14.71</td> <td>11</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>11.56</td> <td>8.84</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>5.98</td> <td>4.66</td></tr> <tr><td></td> <td></td> <td>2</td> <td>4.99</td> <td>3.24 (+35.07%)</td> <td>2.1 (+37.88%)</td></tr> <tr><td></td> <td></td> <td>1</td> <td>3.29</td> <td>2.24 (+31.91%)</td> <td>2.03 (+38.3%)</td></tr> <tr><td></td> <td>1024</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>12.51</td> <td>9.09</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>6.52</td> <td>4.96</td></tr> <tr><td></td> <td></td> <td>1</td> <td>6.4</td> <td>3.61 (+43.59%)</td> <td>2.81 (+56.09%)</td></tr> <tr><td><strong>V100</strong></td> <td>512</td> <td>10</td> <td>OOM</td> <td>10.03</td> <td>9.29</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>8.05</td> <td>7.47</td></tr> <tr><td></td> <td></td> <td>4</td> <td>5.7</td> <td>4.3 (+24.56%)</td> <td>3.98 (+30.18%)</td></tr> <tr><td></td> <td></td> <td>2</td> <td>3.14</td> <td>2.43 (+22.61%)</td> <td>2.27 (+27.71%)</td></tr> <tr><td></td> <td></td> <td>1</td> <td>1.88</td> <td>1.57 (+16.49%)</td> <td>1.57 (+16.49%)</td></tr> <tr><td></td> <td>768</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>23.67</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>18.81</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>11.81</td> <td>9.7</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>6.27</td> <td>5.2</td></tr> <tr><td></td> <td></td> <td>1</td> <td>5.43</td> <td>3.38 (+37.75%)</td> <td>2.82 (+48.07%)</td></tr> <tr><td></td> <td>1024</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>OOM</td> <td>19.35</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>13</td> <td>10.78</td></tr> <tr><td></td> <td></td> <td>1</td> <td>OOM</td> <td>6.66</td> <td>5.54</td></tr></tbody>",K,U,gt='As seen in the tables above, the speed-up from <code>tomesd</code> becomes more pronounced for larger image resolutions. It is also interesting to note that with <code>tomesd</code>, it is possible to run the pipeline on a higher resolution like 1024x1024. You may be able to speed-up inference even more with <a href="torch2.0"><code>torch.compile</code></a>.',tt,B,et,C,dt;return f=new bt({props:{title:"Token merging",local:"token-merging",headingTag:"h1"}}),c=new rt({props:{code:"cGlwJTIwaW5zdGFsbCUyMHRvbWVzZA==",highlighted:"pip install tomesd",wrap:!1}}),M=new rt({props:{code:"JTIwJTIwZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBJTIwJTIwaW1wb3J0JTIwdG9yY2glMEElMjAlMjBpbXBvcnQlMjB0b21lc2QlMEElMEElMjAlMjBwaXBlbGluZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHVzZV9zYWZldGVuc29ycyUzRFRydWUlMkMlMEElMjAlMjApLnRvKCUyMmN1ZGElMjIpJTBBJTJCJTIwdG9tZXNkLmFwcGx5X3BhdGNoKHBpcGVsaW5lJTJDJTIwcmF0aW8lM0QwLjUpJTBBJTBBJTIwJTIwaW1hZ2UlMjAlM0QlMjBwaXBlbGluZSglMjJhJTIwcGhvdG8lMjBvZiUyMGFuJTIwYXN0cm9uYXV0JTIwcmlkaW5nJTIwYSUyMGhvcnNlJTIwb24lMjBtYXJzJTIyKS5pbWFnZXMlNUIwJTVE",highlighted:` from diffusers import StableDiffusionPipeline
import torch
import tomesd
pipeline = StableDiffusionPipeline.from_pretrained(
&quot;runwayml/stable-diffusion-v1-5&quot;, torch_dtype=torch.float16, use_safetensors=True,
).to(&quot;cuda&quot;)
<span class="hljs-addition">+ tomesd.apply_patch(pipeline, ratio=0.5)</span>
image = pipeline(&quot;a photo of an astronaut riding a horse on mars&quot;).images[0]`,wrap:!1}}),y=new bt({props:{title:"Benchmarks",local:"benchmarks",headingTag:"h2"}}),J=new rt({props:{code:"LSUyMCU2MGRpZmZ1c2VycyU2MCUyMHZlcnNpb24lM0ElMjAwLjE1LjElMEEtJTIwUHl0aG9uJTIwdmVyc2lvbiUzQSUyMDMuOC4xNiUwQS0lMjBQeVRvcmNoJTIwdmVyc2lvbiUyMChHUFUlM0YpJTNBJTIwMS4xMy4xJTJCY3UxMTYlMjAoVHJ1ZSklMEEtJTIwSHVnZ2luZ2ZhY2VfaHViJTIwdmVyc2lvbiUzQSUyMDAuMTMuMiUwQS0lMjBUcmFuc2Zvcm1lcnMlMjB2ZXJzaW9uJTNBJTIwNC4yNy4yJTBBLSUyMEFjY2VsZXJhdGUlMjB2ZXJzaW9uJTNBJTIwMC4xOC4wJTBBLSUyMHhGb3JtZXJzJTIwdmVyc2lvbiUzQSUyMDAuMC4xNiUwQS0lMjB0b21lc2QlMjB2ZXJzaW9uJTNBJTIwMC4xLjI=",highlighted:`- \`diffusers\` version: 0.15.1
- Python version: 3.8.16
- PyTorch version (GPU?): 1.13.1+cu116 (True)
- Huggingface_hub version: 0.13.2
- Transformers version: 4.27.2
- Accelerate version: 0.18.0
- xFormers version: 0.0.16
- tomesd version: 0.1.2`,wrap:!1}}),B=new Bt({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/optimization/tome.md"}}),{c(){a=l("meta"),I=r(),k=l("p"),P=r(),j(f.$$.fragment),S=r(),m=l("p"),m.innerHTML=lt,E=r(),u=l("p"),u.innerHTML=st,G=r(),j(c.$$.fragment),A=r(),h=l("p"),h.innerHTML=ot,V=r(),j(M.$$.fragment),N=r(),g=l("p"),g.innerHTML=at,W=r(),T=l("p"),T.innerHTML=pt,z=r(),b=l("p"),b.innerHTML=ft,X=r(),p=l("div"),p.innerHTML=mt,Y=r(),w=l("p"),w.innerHTML=ut,q=r(),j(y.$$.fragment),F=r(),v=l("p"),v.innerHTML=ct,Q=r(),j(J.$$.fragment),R=r(),O=l("p"),O.innerHTML=ht,D=r(),$=l("table"),$.innerHTML=Mt,K=r(),U=l("p"),U.innerHTML=gt,tt=r(),j(B.$$.fragment),et=r(),C=l("p"),this.h()},l(t){const e=$t("svelte-u9bgzb",document.head);a=s(e,"META",{name:!0,content:!0}),e.forEach(d),I=n(t),k=s(t,"P",{}),Tt(k).forEach(d),P=n(t),H(f.$$.fragment,t),S=n(t),m=s(t,"P",{"data-svelte-h":!0}),o(m)!=="svelte-4h3qpa"&&(m.innerHTML=lt),E=n(t),u=s(t,"P",{"data-svelte-h":!0}),o(u)!=="svelte-l5huoo"&&(u.innerHTML=st),G=n(t),H(c.$$.fragment,t),A=n(t),h=s(t,"P",{"data-svelte-h":!0}),o(h)!=="svelte-1q1myt5"&&(h.innerHTML=ot),V=n(t),H(M.$$.fragment,t),N=n(t),g=s(t,"P",{"data-svelte-h":!0}),o(g)!=="svelte-dcsdl5"&&(g.innerHTML=at),W=n(t),T=s(t,"P",{"data-svelte-h":!0}),o(T)!=="svelte-rxgnrl"&&(T.innerHTML=pt),z=n(t),b=s(t,"P",{"data-svelte-h":!0}),o(b)!=="svelte-zsei7e"&&(b.innerHTML=ft),X=n(t),p=s(t,"DIV",{class:!0,"data-svelte-h":!0}),o(p)!=="svelte-ng3g1s"&&(p.innerHTML=mt),Y=n(t),w=s(t,"P",{"data-svelte-h":!0}),o(w)!=="svelte-1skh0rp"&&(w.innerHTML=ut),q=n(t),H(y.$$.fragment,t),F=n(t),v=s(t,"P",{"data-svelte-h":!0}),o(v)!=="svelte-1pt9cuq"&&(v.innerHTML=ct),Q=n(t),H(J.$$.fragment,t),R=n(t),O=s(t,"P",{"data-svelte-h":!0}),o(O)!=="svelte-5yzaqq"&&(O.innerHTML=ht),D=n(t),$=s(t,"TABLE",{"data-svelte-h":!0}),o($)!=="svelte-1dvc6a"&&($.innerHTML=Mt),K=n(t),U=s(t,"P",{"data-svelte-h":!0}),o(U)!=="svelte-v0a950"&&(U.innerHTML=gt),tt=n(t),H(B.$$.fragment,t),et=n(t),C=s(t,"P",{}),Tt(C).forEach(d),this.h()},h(){it(a,"name","hf:doc:metadata"),it(a,"content",Ht),it(p,"class","flex justify-center")},m(t,e){Ut(document.head,a),i(t,I,e),i(t,k,e),i(t,P,e),x(f,t,e),i(t,S,e),i(t,m,e),i(t,E,e),i(t,u,e),i(t,G,e),x(c,t,e),i(t,A,e),i(t,h,e),i(t,V,e),x(M,t,e),i(t,N,e),i(t,g,e),i(t,W,e),i(t,T,e),i(t,z,e),i(t,b,e),i(t,X,e),i(t,p,e),i(t,Y,e),i(t,w,e),i(t,q,e),x(y,t,e),i(t,F,e),i(t,v,e),i(t,Q,e),x(J,t,e),i(t,R,e),i(t,O,e),i(t,D,e),i(t,$,e),i(t,K,e),i(t,U,e),i(t,tt,e),x(B,t,e),i(t,et,e),i(t,C,e),dt=!0},p:yt,i(t){dt||(_(f.$$.fragment,t),_(c.$$.fragment,t),_(M.$$.fragment,t),_(y.$$.fragment,t),_(J.$$.fragment,t),_(B.$$.fragment,t),dt=!0)},o(t){Z(f.$$.fragment,t),Z(c.$$.fragment,t),Z(M.$$.fragment,t),Z(y.$$.fragment,t),Z(J.$$.fragment,t),Z(B.$$.fragment,t),dt=!1},d(t){t&&(d(I),d(k),d(P),d(S),d(m),d(E),d(u),d(G),d(A),d(h),d(V),d(N),d(g),d(W),d(T),d(z),d(b),d(X),d(p),d(Y),d(w),d(q),d(F),d(v),d(Q),d(R),d(O),d(D),d($),d(K),d(U),d(tt),d(et),d(C)),d(a),L(f,t),L(c,t),L(M,t),L(y,t),L(J,t),L(B,t)}}}const Ht='{"title":"Token merging","local":"token-merging","sections":[{"title":"Benchmarks","local":"benchmarks","sections":[],"depth":2}],"depth":1}';function xt(nt){return vt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ct extends Jt{constructor(a){super(),Ot(this,a,xt,jt,wt,{})}}export{Ct as component};

Xet Storage Details

Size:
11.9 kB
·
Xet hash:
a5769d37c8a8ee406216e1a8a9c7dd38d7835e32b0eb0eca4a7dfb79bfaf746b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.