Buckets:

rtrm's picture
download
raw
11.6 kB
import{s as Tt,n as wt,o as yt}from"../chunks/scheduler.182ea377.js";import{S as bt,i as vt,g as i,s as l,r as H,A as Jt,h as s,f as d,c as n,j as Mt,u as x,x as o,k as et,y as Ot,a as r,v as _,d as Z,t as L,w as k}from"../chunks/index.abf12888.js";import{C as dt}from"../chunks/CodeBlock.57fe6e13.js";import{H as gt}from"../chunks/Heading.16916d63.js";function Ut(rt){let a,C,$,I,f,P,m,lt='<a href="https://huggingface.co/papers/2303.17604" rel="nofollow">Token merging</a> (ToMe) merges redundant tokens/patches progressively in the forward pass of a Transformer-based network which can speed-up the inference latency of <a href="/docs/diffusers/v0.26.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a>.',S,c,nt="Install ToMe from <code>pip</code>:",G,h,E,u,it='You can use ToMe from the <a href="https://github.com/dbolya/tomesd" rel="nofollow"><code>tomesd</code></a> library with the <a href="https://github.com/dbolya/tomesd?tab=readme-ov-file#usage" rel="nofollow"><code>apply_patch</code></a> function:',A,M,V,g,st='The <code>apply_patch</code> function exposes a number of <a href="https://github.com/dbolya/tomesd#usage" rel="nofollow">arguments</a> to help strike a balance between pipeline inference speed and the quality of the generated tokens. The most important argument is <code>ratio</code> which controls the number of tokens that are merged during the forward pass.',N,T,ot='As reported in the <a href="https://huggingface.co/papers/2303.17604" rel="nofollow">paper</a>, ToMe can greatly preserve the quality of the generated images while boosting inference speed. By increasing the <code>ratio</code>, you can speed-up inference even further, but at the cost of some degraded image quality.',W,w,at='To test the quality of the generated images, we sampled a few prompts from <a href="https://parti.research.google/" rel="nofollow">Parti Prompts</a> and performed inference with the <a href="/docs/diffusers/v0.26.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a> with the following settings:',z,p,pt='<img src="https://huggingface.co/datasets/diffusers/docs-images/resolve/main/tome/tome_samples.png"/>',X,y,ft='We didn’t notice any significant decrease in the quality of the generated samples, and you can check out the generated samples in this <a href="https://wandb.ai/sayakpaul/tomesd-results/runs/23j4bj3i?workspace=" rel="nofollow">WandB report</a>. If you’re interested in reproducing this experiment, use this <a href="https://gist.github.com/sayakpaul/8cac98d7f22399085a060992f411ecbd" rel="nofollow">script</a>.',Y,b,q,v,mt='We also benchmarked the impact of <code>tomesd</code> on the <a href="/docs/diffusers/v0.26.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a> with <a href="https://huggingface.co/docs/diffusers/optimization/xformers" rel="nofollow">xFormers</a> enabled across several image resolutions. The results are obtained from A100 and V100 GPUs in the following development environment:',F,J,Q,O,ct='To reproduce this benchmark, feel free to use this <a href="https://gist.github.com/sayakpaul/27aec6bca7eb7b0e0aa4112205850335" rel="nofollow">script</a>. The results are reported in seconds, and where applicable we report the speed-up percentage over the vanilla pipeline when using ToMe and ToMe + xFormers.',R,U,ht="<thead><tr><th><strong>GPU</strong></th> <th><strong>Resolution</strong></th> <th><strong>Batch size</strong></th> <th><strong>Vanilla</strong></th> <th><strong>ToMe</strong></th> <th><strong>ToMe + xFormers</strong></th></tr></thead> <tbody><tr><td><strong>A100</strong></td> <td>512</td> <td>10</td> <td>6.88</td> <td>5.26 (+23.55%)</td> <td>4.69 (+31.83%)</td></tr> <tr><td></td> <td>768</td> <td>10</td> <td>OOM</td> <td>14.71</td> <td>11</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>11.56</td> <td>8.84</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>5.98</td> <td>4.66</td></tr> <tr><td></td> <td></td> <td>2</td> <td>4.99</td> <td>3.24 (+35.07%)</td> <td>2.1 (+37.88%)</td></tr> <tr><td></td> <td></td> <td>1</td> <td>3.29</td> <td>2.24 (+31.91%)</td> <td>2.03 (+38.3%)</td></tr> <tr><td></td> <td>1024</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>12.51</td> <td>9.09</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>6.52</td> <td>4.96</td></tr> <tr><td></td> <td></td> <td>1</td> <td>6.4</td> <td>3.61 (+43.59%)</td> <td>2.81 (+56.09%)</td></tr> <tr><td><strong>V100</strong></td> <td>512</td> <td>10</td> <td>OOM</td> <td>10.03</td> <td>9.29</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>8.05</td> <td>7.47</td></tr> <tr><td></td> <td></td> <td>4</td> <td>5.7</td> <td>4.3 (+24.56%)</td> <td>3.98 (+30.18%)</td></tr> <tr><td></td> <td></td> <td>2</td> <td>3.14</td> <td>2.43 (+22.61%)</td> <td>2.27 (+27.71%)</td></tr> <tr><td></td> <td></td> <td>1</td> <td>1.88</td> <td>1.57 (+16.49%)</td> <td>1.57 (+16.49%)</td></tr> <tr><td></td> <td>768</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>23.67</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>18.81</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>11.81</td> <td>9.7</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>6.27</td> <td>5.2</td></tr> <tr><td></td> <td></td> <td>1</td> <td>5.43</td> <td>3.38 (+37.75%)</td> <td>2.82 (+48.07%)</td></tr> <tr><td></td> <td>1024</td> <td>10</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>8</td> <td>OOM</td> <td>OOM</td> <td>OOM</td></tr> <tr><td></td> <td></td> <td>4</td> <td>OOM</td> <td>OOM</td> <td>19.35</td></tr> <tr><td></td> <td></td> <td>2</td> <td>OOM</td> <td>13</td> <td>10.78</td></tr> <tr><td></td> <td></td> <td>1</td> <td>OOM</td> <td>6.66</td> <td>5.54</td></tr></tbody>",D,B,ut='As seen in the tables above, the speed-up from <code>tomesd</code> becomes more pronounced for larger image resolutions. It is also interesting to note that with <code>tomesd</code>, it is possible to run the pipeline on a higher resolution like 1024x1024. You may be able to speed-up inference even more with <a href="torch2.0"><code>torch.compile</code></a>.',K,j,tt;return f=new gt({props:{title:"Token merging",local:"token-merging",headingTag:"h1"}}),h=new dt({props:{code:"cGlwJTIwaW5zdGFsbCUyMHRvbWVzZA==",highlighted:"pip install tomesd",wrap:!1}}),M=new dt({props:{code:"JTIwJTIwZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lJTBBJTIwJTIwaW1wb3J0JTIwdG9yY2glMEElMjAlMjBpbXBvcnQlMjB0b21lc2QlMEElMEElMjAlMjBwaXBlbGluZSUyMCUzRCUyMFN0YWJsZURpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHVzZV9zYWZldGVuc29ycyUzRFRydWUlMkMlMEElMjAlMjApLnRvKCUyMmN1ZGElMjIpJTBBJTJCJTIwdG9tZXNkLmFwcGx5X3BhdGNoKHBpcGVsaW5lJTJDJTIwcmF0aW8lM0QwLjUpJTBBJTBBJTIwJTIwaW1hZ2UlMjAlM0QlMjBwaXBlbGluZSglMjJhJTIwcGhvdG8lMjBvZiUyMGFuJTIwYXN0cm9uYXV0JTIwcmlkaW5nJTIwYSUyMGhvcnNlJTIwb24lMjBtYXJzJTIyKS5pbWFnZXMlNUIwJTVE",highlighted:` from diffusers import StableDiffusionPipeline
import torch
import tomesd
pipeline = StableDiffusionPipeline.from_pretrained(
&quot;runwayml/stable-diffusion-v1-5&quot;, torch_dtype=torch.float16, use_safetensors=True,
).to(&quot;cuda&quot;)
<span class="hljs-addition">+ tomesd.apply_patch(pipeline, ratio=0.5)</span>
image = pipeline(&quot;a photo of an astronaut riding a horse on mars&quot;).images[0]`,wrap:!1}}),b=new gt({props:{title:"Benchmarks",local:"benchmarks",headingTag:"h2"}}),J=new dt({props:{code:"LSUyMCU2MGRpZmZ1c2VycyU2MCUyMHZlcnNpb24lM0ElMjAwLjE1LjElMEEtJTIwUHl0aG9uJTIwdmVyc2lvbiUzQSUyMDMuOC4xNiUwQS0lMjBQeVRvcmNoJTIwdmVyc2lvbiUyMChHUFUlM0YpJTNBJTIwMS4xMy4xJTJCY3UxMTYlMjAoVHJ1ZSklMEEtJTIwSHVnZ2luZ2ZhY2VfaHViJTIwdmVyc2lvbiUzQSUyMDAuMTMuMiUwQS0lMjBUcmFuc2Zvcm1lcnMlMjB2ZXJzaW9uJTNBJTIwNC4yNy4yJTBBLSUyMEFjY2VsZXJhdGUlMjB2ZXJzaW9uJTNBJTIwMC4xOC4wJTBBLSUyMHhGb3JtZXJzJTIwdmVyc2lvbiUzQSUyMDAuMC4xNiUwQS0lMjB0b21lc2QlMjB2ZXJzaW9uJTNBJTIwMC4xLjI=",highlighted:`- \`diffusers\` version: 0.15.1
- Python version: 3.8.16
- PyTorch version (GPU?): 1.13.1+cu116 (True)
- Huggingface_hub version: 0.13.2
- Transformers version: 4.27.2
- Accelerate version: 0.18.0
- xFormers version: 0.0.16
- tomesd version: 0.1.2`,wrap:!1}}),{c(){a=i("meta"),C=l(),$=i("p"),I=l(),H(f.$$.fragment),P=l(),m=i("p"),m.innerHTML=lt,S=l(),c=i("p"),c.innerHTML=nt,G=l(),H(h.$$.fragment),E=l(),u=i("p"),u.innerHTML=it,A=l(),H(M.$$.fragment),V=l(),g=i("p"),g.innerHTML=st,N=l(),T=i("p"),T.innerHTML=ot,W=l(),w=i("p"),w.innerHTML=at,z=l(),p=i("div"),p.innerHTML=pt,X=l(),y=i("p"),y.innerHTML=ft,Y=l(),H(b.$$.fragment),q=l(),v=i("p"),v.innerHTML=mt,F=l(),H(J.$$.fragment),Q=l(),O=i("p"),O.innerHTML=ct,R=l(),U=i("table"),U.innerHTML=ht,D=l(),B=i("p"),B.innerHTML=ut,K=l(),j=i("p"),this.h()},l(t){const e=Jt("svelte-u9bgzb",document.head);a=s(e,"META",{name:!0,content:!0}),e.forEach(d),C=n(t),$=s(t,"P",{}),Mt($).forEach(d),I=n(t),x(f.$$.fragment,t),P=n(t),m=s(t,"P",{"data-svelte-h":!0}),o(m)!=="svelte-dgg61o"&&(m.innerHTML=lt),S=n(t),c=s(t,"P",{"data-svelte-h":!0}),o(c)!=="svelte-l5huoo"&&(c.innerHTML=nt),G=n(t),x(h.$$.fragment,t),E=n(t),u=s(t,"P",{"data-svelte-h":!0}),o(u)!=="svelte-1q1myt5"&&(u.innerHTML=it),A=n(t),x(M.$$.fragment,t),V=n(t),g=s(t,"P",{"data-svelte-h":!0}),o(g)!=="svelte-dcsdl5"&&(g.innerHTML=st),N=n(t),T=s(t,"P",{"data-svelte-h":!0}),o(T)!=="svelte-rxgnrl"&&(T.innerHTML=ot),W=n(t),w=s(t,"P",{"data-svelte-h":!0}),o(w)!=="svelte-1tywpms"&&(w.innerHTML=at),z=n(t),p=s(t,"DIV",{class:!0,"data-svelte-h":!0}),o(p)!=="svelte-ng3g1s"&&(p.innerHTML=pt),X=n(t),y=s(t,"P",{"data-svelte-h":!0}),o(y)!=="svelte-1skh0rp"&&(y.innerHTML=ft),Y=n(t),x(b.$$.fragment,t),q=n(t),v=s(t,"P",{"data-svelte-h":!0}),o(v)!=="svelte-9mm4z4"&&(v.innerHTML=mt),F=n(t),x(J.$$.fragment,t),Q=n(t),O=s(t,"P",{"data-svelte-h":!0}),o(O)!=="svelte-5yzaqq"&&(O.innerHTML=ct),R=n(t),U=s(t,"TABLE",{"data-svelte-h":!0}),o(U)!=="svelte-1dvc6a"&&(U.innerHTML=ht),D=n(t),B=s(t,"P",{"data-svelte-h":!0}),o(B)!=="svelte-v0a950"&&(B.innerHTML=ut),K=n(t),j=s(t,"P",{}),Mt(j).forEach(d),this.h()},h(){et(a,"name","hf:doc:metadata"),et(a,"content",Bt),et(p,"class","flex justify-center")},m(t,e){Ot(document.head,a),r(t,C,e),r(t,$,e),r(t,I,e),_(f,t,e),r(t,P,e),r(t,m,e),r(t,S,e),r(t,c,e),r(t,G,e),_(h,t,e),r(t,E,e),r(t,u,e),r(t,A,e),_(M,t,e),r(t,V,e),r(t,g,e),r(t,N,e),r(t,T,e),r(t,W,e),r(t,w,e),r(t,z,e),r(t,p,e),r(t,X,e),r(t,y,e),r(t,Y,e),_(b,t,e),r(t,q,e),r(t,v,e),r(t,F,e),_(J,t,e),r(t,Q,e),r(t,O,e),r(t,R,e),r(t,U,e),r(t,D,e),r(t,B,e),r(t,K,e),r(t,j,e),tt=!0},p:wt,i(t){tt||(Z(f.$$.fragment,t),Z(h.$$.fragment,t),Z(M.$$.fragment,t),Z(b.$$.fragment,t),Z(J.$$.fragment,t),tt=!0)},o(t){L(f.$$.fragment,t),L(h.$$.fragment,t),L(M.$$.fragment,t),L(b.$$.fragment,t),L(J.$$.fragment,t),tt=!1},d(t){t&&(d(C),d($),d(I),d(P),d(m),d(S),d(c),d(G),d(E),d(u),d(A),d(V),d(g),d(N),d(T),d(W),d(w),d(z),d(p),d(X),d(y),d(Y),d(q),d(v),d(F),d(Q),d(O),d(R),d(U),d(D),d(B),d(K),d(j)),d(a),k(f,t),k(h,t),k(M,t),k(b,t),k(J,t)}}}const Bt='{"title":"Token merging","local":"token-merging","sections":[{"title":"Benchmarks","local":"benchmarks","sections":[],"depth":2}],"depth":1}';function $t(rt){return yt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Zt extends bt{constructor(a){super(),vt(this,a,$t,Ut,Tt,{})}}export{Zt as component};

Xet Storage Details

Size:
11.6 kB
·
Xet hash:
213888f162547839edfdf13b17ac9b55fc1a7129e11fc0efc3c3ff377158d212

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.