Buckets:
| import{s as ve,o as ke,n as Ge}from"../chunks/scheduler.182ea377.js";import{S as Ie,i as He,g as r,s as n,r as d,A as We,h as p,f as s,c as a,j as Ze,u,x as o,k as Ce,y as ze,a as l,v as f,d as m,t as h,w}from"../chunks/index.abf12888.js";import{T as Pe}from"../chunks/Tip.230e2334.js";import{C as S}from"../chunks/CodeBlock.57fe6e13.js";import{H as fe}from"../chunks/Heading.16916d63.js";function Ee(x){let i,M='To learn more, take a look at the <a href="https://huggingface.co/docs/accelerate/en/usage_guides/distributed_inference#distributed-inference-with-accelerate" rel="nofollow">Distributed Inference with 🤗 Accelerate</a> guide.';return{c(){i=r("p"),i.innerHTML=M},l(c){i=p(c,"P",{"data-svelte-h":!0}),o(i)!=="svelte-c8rg1s"&&(i.innerHTML=M)},m(c,B){l(c,i,B)},p:Ge,d(c){c&&s(i)}}}function Re(x){let i,M,c,B,J,A,T,me='On distributed setups, you can run inference across multiple GPUs with 🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> or <a href="https://pytorch.org/tutorials/beginner/dist_overview.html" rel="nofollow">PyTorch Distributed</a>, which is useful for generating with multiple prompts in parallel.',V,b,he="This guide will show you how to use 🤗 Accelerate and PyTorch Distributed for distributed inference.",Y,U,L,j,we='🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> is a library designed to make it easy to train or run inference across distributed setups. It simplifies the process of setting up the distributed environment, allowing you to focus on your PyTorch code.',D,g,ye='To begin, create a Python file and initialize an <a href="https://huggingface.co/docs/accelerate/v0.28.0/en/package_reference/state#accelerate.PartialState" rel="nofollow">accelerate.PartialState</a> to create a distributed environment; your setup is automatically detected so you don’t need to explicitly define the <code>rank</code> or <code>world_size</code>. Move the <a href="/docs/diffusers/v0.27.0/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> to <code>distributed_state.device</code> to assign a GPU to each process.',Q,_,Me='Now use the <a href="https://huggingface.co/docs/accelerate/v0.28.0/en/package_reference/state#accelerate.PartialState.split_between_processes" rel="nofollow">split_between_processes</a> utility as a context manager to automatically distribute the prompts between the number of processes.',F,$,q,Z,Je="Use the <code>--num_processes</code> argument to specify the number of GPUs to use, and call <code>accelerate launch</code> to run the script:",K,C,O,y,ee,v,te,k,Te='PyTorch supports <a href="https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html" rel="nofollow"><code>DistributedDataParallel</code></a> which enables data parallelism.',se,G,be='To start, create a Python file and import <code>torch.distributed</code> and <code>torch.multiprocessing</code> to set up the distributed process group and to spawn the processes for inference on each GPU. You should also initialize a <a href="/docs/diffusers/v0.27.0/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>:',le,I,ne,H,Ue='You’ll want to create a function to run inference; <a href="https://pytorch.org/docs/stable/distributed.html?highlight=init_process_group#torch.distributed.init_process_group" rel="nofollow"><code>init_process_group</code></a> handles creating a distributed environment with the type of backend to use, the <code>rank</code> of the current process, and the <code>world_size</code> or the number of processes participating. If you’re running inference in parallel over 2 GPUs, then the <code>world_size</code> is 2.',ae,W,je='Move the <a href="/docs/diffusers/v0.27.0/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> to <code>rank</code> and use <code>get_rank</code> to assign a GPU to each process, where each process handles a different prompt:',ie,z,re,P,ge='To run the distributed inference, call <a href="https://pytorch.org/docs/stable/multiprocessing.html#torch.multiprocessing.spawn" rel="nofollow"><code>mp.spawn</code></a> to run the <code>run_inference</code> function on the number of GPUs defined in <code>world_size</code>:',pe,E,oe,R,_e="Once you’ve completed the inference script, use the <code>--nproc_per_node</code> argument to specify the number of GPUs to use and call <code>torchrun</code> to run the script:",ce,N,de,X,ue;return J=new fe({props:{title:"Distributed inference with multiple GPUs",local:"distributed-inference-with-multiple-gpus",headingTag:"h1"}}),U=new fe({props:{title:"🤗 Accelerate",local:"-accelerate",headingTag:"h2"}}),$=new S({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMFBhcnRpYWxTdGF0ZSUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBEaWZmdXNpb25QaXBlbGluZSUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnJ1bndheW1sJTJGc3RhYmxlLWRpZmZ1c2lvbi12MS01JTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwdXNlX3NhZmV0ZW5zb3JzJTNEVHJ1ZSUwQSklMEFkaXN0cmlidXRlZF9zdGF0ZSUyMCUzRCUyMFBhcnRpYWxTdGF0ZSgpJTBBcGlwZWxpbmUudG8oZGlzdHJpYnV0ZWRfc3RhdGUuZGV2aWNlKSUwQSUwQXdpdGglMjBkaXN0cmlidXRlZF9zdGF0ZS5zcGxpdF9iZXR3ZWVuX3Byb2Nlc3NlcyglNUIlMjJhJTIwZG9nJTIyJTJDJTIwJTIyYSUyMGNhdCUyMiU1RCklMjBhcyUyMHByb21wdCUzQSUwQSUyMCUyMCUyMCUyMHJlc3VsdCUyMCUzRCUyMHBpcGVsaW5lKHByb21wdCkuaW1hZ2VzJTVCMCU1RCUwQSUyMCUyMCUyMCUyMHJlc3VsdC5zYXZlKGYlMjJyZXN1bHRfJTdCZGlzdHJpYnV0ZWRfc3RhdGUucHJvY2Vzc19pbmRleCU3RC5wbmclMjIp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| ) | |
| distributed_state = PartialState() | |
| pipeline.to(distributed_state.device) | |
| <span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">"a dog"</span>, <span class="hljs-string">"a cat"</span>]) <span class="hljs-keyword">as</span> prompt: | |
| result = pipeline(prompt).images[<span class="hljs-number">0</span>] | |
| result.save(<span class="hljs-string">f"result_<span class="hljs-subst">{distributed_state.process_index}</span>.png"</span>)`,wrap:!1}}),C=new S({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHJ1bl9kaXN0cmlidXRlZC5weSUyMC0tbnVtX3Byb2Nlc3NlcyUzRDI=",highlighted:"accelerate launch run_distributed.py --num_processes=2",wrap:!1}}),y=new Pe({props:{$$slots:{default:[Ee]},$$scope:{ctx:x}}}),v=new fe({props:{title:"PyTorch Distributed",local:"pytorch-distributed",headingTag:"h2"}}),I=new S({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5kaXN0cmlidXRlZCUyMGFzJTIwZGlzdCUwQWltcG9ydCUyMHRvcmNoLm11bHRpcHJvY2Vzc2luZyUyMGFzJTIwbXAlMEElMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFzZCUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHVzZV9zYWZldGVuc29ycyUzRFRydWUlMEEp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> torch.distributed <span class="hljs-keyword">as</span> dist | |
| <span class="hljs-keyword">import</span> torch.multiprocessing <span class="hljs-keyword">as</span> mp | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| sd = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| )`,wrap:!1}}),z=new S({props:{code:"ZGVmJTIwcnVuX2luZmVyZW5jZShyYW5rJTJDJTIwd29ybGRfc2l6ZSklM0ElMEElMjAlMjAlMjAlMjBkaXN0LmluaXRfcHJvY2Vzc19ncm91cCglMjJuY2NsJTIyJTJDJTIwcmFuayUzRHJhbmslMkMlMjB3b3JsZF9zaXplJTNEd29ybGRfc2l6ZSklMEElMEElMjAlMjAlMjAlMjBzZC50byhyYW5rKSUwQSUwQSUyMCUyMCUyMCUyMGlmJTIwdG9yY2guZGlzdHJpYnV0ZWQuZ2V0X3JhbmsoKSUyMCUzRCUzRCUyMDAlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlMjAlM0QlMjAlMjJhJTIwZG9nJTIyJTBBJTIwJTIwJTIwJTIwZWxpZiUyMHRvcmNoLmRpc3RyaWJ1dGVkLmdldF9yYW5rKCklMjAlM0QlM0QlMjAxJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJvbXB0JTIwJTNEJTIwJTIyYSUyMGNhdCUyMiUwQSUwQSUyMCUyMCUyMCUyMGltYWdlJTIwJTNEJTIwc2QocHJvbXB0KS5pbWFnZXMlNUIwJTVEJTBBJTIwJTIwJTIwJTIwaW1hZ2Uuc2F2ZShmJTIyLiUyRiU3QidfJy5qb2luKHByb21wdCklN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">run_inference</span>(<span class="hljs-params">rank, world_size</span>): | |
| dist.init_process_group(<span class="hljs-string">"nccl"</span>, rank=rank, world_size=world_size) | |
| sd.to(rank) | |
| <span class="hljs-keyword">if</span> torch.distributed.get_rank() == <span class="hljs-number">0</span>: | |
| prompt = <span class="hljs-string">"a dog"</span> | |
| <span class="hljs-keyword">elif</span> torch.distributed.get_rank() == <span class="hljs-number">1</span>: | |
| prompt = <span class="hljs-string">"a cat"</span> | |
| image = sd(prompt).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">f"./<span class="hljs-subst">{<span class="hljs-string">'_'</span>.join(prompt)}</span>.png"</span>)`,wrap:!1}}),E=new S({props:{code:"ZGVmJTIwbWFpbigpJTNBJTBBJTIwJTIwJTIwJTIwd29ybGRfc2l6ZSUyMCUzRCUyMDIlMEElMjAlMjAlMjAlMjBtcC5zcGF3bihydW5faW5mZXJlbmNlJTJDJTIwYXJncyUzRCh3b3JsZF9zaXplJTJDKSUyQyUyMG5wcm9jcyUzRHdvcmxkX3NpemUlMkMlMjBqb2luJTNEVHJ1ZSklMEElMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBtYWluKCk=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>(): | |
| world_size = <span class="hljs-number">2</span> | |
| mp.spawn(run_inference, args=(world_size,), nprocs=world_size, join=<span class="hljs-literal">True</span>) | |
| <span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">"__main__"</span>: | |
| main()`,wrap:!1}}),N=new S({props:{code:"dG9yY2hydW4lMjBydW5fZGlzdHJpYnV0ZWQucHklMjAtLW5wcm9jX3Blcl9ub2RlJTNEMg==",highlighted:"torchrun run_distributed.py --nproc_per_node=2",wrap:!1}}),{c(){i=r("meta"),M=n(),c=r("p"),B=n(),d(J.$$.fragment),A=n(),T=r("p"),T.innerHTML=me,V=n(),b=r("p"),b.textContent=he,Y=n(),d(U.$$.fragment),L=n(),j=r("p"),j.innerHTML=we,D=n(),g=r("p"),g.innerHTML=ye,Q=n(),_=r("p"),_.innerHTML=Me,F=n(),d($.$$.fragment),q=n(),Z=r("p"),Z.innerHTML=Je,K=n(),d(C.$$.fragment),O=n(),d(y.$$.fragment),ee=n(),d(v.$$.fragment),te=n(),k=r("p"),k.innerHTML=Te,se=n(),G=r("p"),G.innerHTML=be,le=n(),d(I.$$.fragment),ne=n(),H=r("p"),H.innerHTML=Ue,ae=n(),W=r("p"),W.innerHTML=je,ie=n(),d(z.$$.fragment),re=n(),P=r("p"),P.innerHTML=ge,pe=n(),d(E.$$.fragment),oe=n(),R=r("p"),R.innerHTML=_e,ce=n(),d(N.$$.fragment),de=n(),X=r("p"),this.h()},l(e){const t=We("svelte-u9bgzb",document.head);i=p(t,"META",{name:!0,content:!0}),t.forEach(s),M=a(e),c=p(e,"P",{}),Ze(c).forEach(s),B=a(e),u(J.$$.fragment,e),A=a(e),T=p(e,"P",{"data-svelte-h":!0}),o(T)!=="svelte-a7bv7i"&&(T.innerHTML=me),V=a(e),b=p(e,"P",{"data-svelte-h":!0}),o(b)!=="svelte-1qu3csy"&&(b.textContent=he),Y=a(e),u(U.$$.fragment,e),L=a(e),j=p(e,"P",{"data-svelte-h":!0}),o(j)!=="svelte-13uq1g2"&&(j.innerHTML=we),D=a(e),g=p(e,"P",{"data-svelte-h":!0}),o(g)!=="svelte-dflnah"&&(g.innerHTML=ye),Q=a(e),_=p(e,"P",{"data-svelte-h":!0}),o(_)!=="svelte-1sjkx0a"&&(_.innerHTML=Me),F=a(e),u($.$$.fragment,e),q=a(e),Z=p(e,"P",{"data-svelte-h":!0}),o(Z)!=="svelte-1ohh8as"&&(Z.innerHTML=Je),K=a(e),u(C.$$.fragment,e),O=a(e),u(y.$$.fragment,e),ee=a(e),u(v.$$.fragment,e),te=a(e),k=p(e,"P",{"data-svelte-h":!0}),o(k)!=="svelte-jtiddl"&&(k.innerHTML=Te),se=a(e),G=p(e,"P",{"data-svelte-h":!0}),o(G)!=="svelte-1sgzedr"&&(G.innerHTML=be),le=a(e),u(I.$$.fragment,e),ne=a(e),H=p(e,"P",{"data-svelte-h":!0}),o(H)!=="svelte-qkdvuf"&&(H.innerHTML=Ue),ae=a(e),W=p(e,"P",{"data-svelte-h":!0}),o(W)!=="svelte-gq9f0m"&&(W.innerHTML=je),ie=a(e),u(z.$$.fragment,e),re=a(e),P=p(e,"P",{"data-svelte-h":!0}),o(P)!=="svelte-1ecd3vq"&&(P.innerHTML=ge),pe=a(e),u(E.$$.fragment,e),oe=a(e),R=p(e,"P",{"data-svelte-h":!0}),o(R)!=="svelte-ykaora"&&(R.innerHTML=_e),ce=a(e),u(N.$$.fragment,e),de=a(e),X=p(e,"P",{}),Ze(X).forEach(s),this.h()},h(){Ce(i,"name","hf:doc:metadata"),Ce(i,"content",Ne)},m(e,t){ze(document.head,i),l(e,M,t),l(e,c,t),l(e,B,t),f(J,e,t),l(e,A,t),l(e,T,t),l(e,V,t),l(e,b,t),l(e,Y,t),f(U,e,t),l(e,L,t),l(e,j,t),l(e,D,t),l(e,g,t),l(e,Q,t),l(e,_,t),l(e,F,t),f($,e,t),l(e,q,t),l(e,Z,t),l(e,K,t),f(C,e,t),l(e,O,t),f(y,e,t),l(e,ee,t),f(v,e,t),l(e,te,t),l(e,k,t),l(e,se,t),l(e,G,t),l(e,le,t),f(I,e,t),l(e,ne,t),l(e,H,t),l(e,ae,t),l(e,W,t),l(e,ie,t),f(z,e,t),l(e,re,t),l(e,P,t),l(e,pe,t),f(E,e,t),l(e,oe,t),l(e,R,t),l(e,ce,t),f(N,e,t),l(e,de,t),l(e,X,t),ue=!0},p(e,[t]){const $e={};t&2&&($e.$$scope={dirty:t,ctx:e}),y.$set($e)},i(e){ue||(m(J.$$.fragment,e),m(U.$$.fragment,e),m($.$$.fragment,e),m(C.$$.fragment,e),m(y.$$.fragment,e),m(v.$$.fragment,e),m(I.$$.fragment,e),m(z.$$.fragment,e),m(E.$$.fragment,e),m(N.$$.fragment,e),ue=!0)},o(e){h(J.$$.fragment,e),h(U.$$.fragment,e),h($.$$.fragment,e),h(C.$$.fragment,e),h(y.$$.fragment,e),h(v.$$.fragment,e),h(I.$$.fragment,e),h(z.$$.fragment,e),h(E.$$.fragment,e),h(N.$$.fragment,e),ue=!1},d(e){e&&(s(M),s(c),s(B),s(A),s(T),s(V),s(b),s(Y),s(L),s(j),s(D),s(g),s(Q),s(_),s(F),s(q),s(Z),s(K),s(O),s(ee),s(te),s(k),s(se),s(G),s(le),s(ne),s(H),s(ae),s(W),s(ie),s(re),s(P),s(pe),s(oe),s(R),s(ce),s(de),s(X)),s(i),w(J,e),w(U,e),w($,e),w(C,e),w(y,e),w(v,e),w(I,e),w(z,e),w(E,e),w(N,e)}}}const Ne='{"title":"Distributed inference with multiple GPUs","local":"distributed-inference-with-multiple-gpus","sections":[{"title":"🤗 Accelerate","local":"-accelerate","sections":[],"depth":2},{"title":"PyTorch Distributed","local":"pytorch-distributed","sections":[],"depth":2}],"depth":1}';function Be(x){return ke(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ye extends Ie{constructor(i){super(),He(this,i,Be,Re,ve,{})}}export{Ye as component}; | |
Xet Storage Details
- Size:
- 14.3 kB
- Xet hash:
- acf358b6da8614b290cf6ae07dbbbce90fe6f13bf147141fb2a0e9f312a82de2
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.