Buckets:

rtrm's picture
download
raw
31 kB
import{s as It,o as kt,n as tt}from"../chunks/scheduler.8c3d61f6.js";import{S as Bt,i as Gt,g as o,s as a,r as d,A as Ct,h as p,f as l,c as n,j as $t,u as m,x as c,k as vt,y as Wt,a as s,v as f,d as u,t as M,w as h}from"../chunks/index.da70eac4.js";import{T as Oe}from"../chunks/Tip.1d9b8c37.js";import{C as J}from"../chunks/CodeBlock.00a903b3.js";import{H as et,E as Et}from"../chunks/EditOnGithub.1e64e623.js";function Xt(b){let i,y='Refer to this minimal example <a href="https://gist.github.com/sayakpaul/cfaebd221820d7b43fae638b4dfa01ba" rel="nofollow">script</a> for running inference across multiple GPUs. To learn more, take a look at the <a href="https://huggingface.co/docs/accelerate/en/usage_guides/distributed_inference#distributed-inference-with-accelerate" rel="nofollow">Distributed Inference with 🤗 Accelerate</a> guide.';return{c(){i=o("p"),i.innerHTML=y},l(r){i=p(r,"P",{"data-svelte-h":!0}),c(i)!=="svelte-gctvmu"&&(i.innerHTML=y)},m(r,w){s(r,i,w)},p:tt,d(r){r&&l(i)}}}function Rt(b){let i,y='You can use <code>device_map</code> within a <a href="/docs/diffusers/pr_9580/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> to distribute its model-level components on multiple devices. Refer to the <a href="../tutorials/inference_with_big_models#device-placement">Device placement</a> guide to learn more.';return{c(){i=o("p"),i.innerHTML=y},l(r){i=p(r,"P",{"data-svelte-h":!0}),c(i)!=="svelte-z8x0dk"&&(i.innerHTML=y)},m(r,w){s(r,i,w)},p:tt,d(r){r&&l(i)}}}function Nt(b){let i,y="<strong>Only</strong> load the text encoders for this step! The diffusion transformer and VAE are loaded in a later step to preserve memory.";return{c(){i=o("p"),i.innerHTML=y},l(r){i=p(r,"P",{"data-svelte-h":!0}),c(i)!=="svelte-a3ie7d"&&(i.innerHTML=y)},m(r,w){s(r,i,w)},p:tt,d(r){r&&l(i)}}}function Ht(b){let i,y="At any point, you can try <code>print(pipeline.hf_device_map)</code> to see how the various models are distributed across devices. This is useful for tracking the device placement of the models.";return{c(){i=o("p"),i.innerHTML=y},l(r){i=p(r,"P",{"data-svelte-h":!0}),c(i)!=="svelte-11wg8kr"&&(i.innerHTML=y)},m(r,w){s(r,i,w)},p:tt,d(r){r&&l(i)}}}function xt(b){let i,y,r,w,Z,ce,_,lt='On distributed setups, you can run inference across multiple GPUs with 🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> or <a href="https://pytorch.org/tutorials/beginner/dist_overview.html" rel="nofollow">PyTorch Distributed</a>, which is useful for generating with multiple prompts in parallel.',de,$,st="This guide will show you how to use 🤗 Accelerate and PyTorch Distributed for distributed inference.",me,v,fe,I,at='🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> is a library designed to make it easy to train or run inference across distributed setups. It simplifies the process of setting up the distributed environment, allowing you to focus on your PyTorch code.',ue,k,nt='To begin, create a Python file and initialize an <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/state#accelerate.PartialState" rel="nofollow">accelerate.PartialState</a> to create a distributed environment; your setup is automatically detected so you don’t need to explicitly define the <code>rank</code> or <code>world_size</code>. Move the <a href="/docs/diffusers/pr_9580/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> to <code>distributed_state.device</code> to assign a GPU to each process.',Me,B,it='Now use the <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/state#accelerate.PartialState.split_between_processes" rel="nofollow">split_between_processes</a> utility as a context manager to automatically distribute the prompts between the number of processes.',he,G,ye,C,rt="Use the <code>--num_processes</code> argument to specify the number of GPUs to use, and call <code>accelerate launch</code> to run the script:",we,W,Je,j,be,E,je,X,ot='PyTorch supports <a href="https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html" rel="nofollow"><code>DistributedDataParallel</code></a> which enables data parallelism.',Te,R,pt='To start, create a Python file and import <code>torch.distributed</code> and <code>torch.multiprocessing</code> to set up the distributed process group and to spawn the processes for inference on each GPU. You should also initialize a <a href="/docs/diffusers/pr_9580/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>:',Ue,N,ge,H,ct='You’ll want to create a function to run inference; <a href="https://pytorch.org/docs/stable/distributed.html?highlight=init_process_group#torch.distributed.init_process_group" rel="nofollow"><code>init_process_group</code></a> handles creating a distributed environment with the type of backend to use, the <code>rank</code> of the current process, and the <code>world_size</code> or the number of processes participating. If you’re running inference in parallel over 2 GPUs, then the <code>world_size</code> is 2.',Ze,x,dt='Move the <a href="/docs/diffusers/pr_9580/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a> to <code>rank</code> and use <code>get_rank</code> to assign a GPU to each process, where each process handles a different prompt:',_e,V,$e,A,mt='To run the distributed inference, call <a href="https://pytorch.org/docs/stable/multiprocessing.html#torch.multiprocessing.spawn" rel="nofollow"><code>mp.spawn</code></a> to run the <code>run_inference</code> function on the number of GPUs defined in <code>world_size</code>:',ve,Y,Ie,Q,ft="Once you’ve completed the inference script, use the <code>--nproc_per_node</code> argument to specify the number of GPUs to use and call <code>torchrun</code> to run the script:",ke,z,Be,T,Ge,F,Ce,S,ut='Modern diffusion systems such as <a href="../api/pipelines/flux">Flux</a> are very large and have multiple models. For example, <a href="https://hf.co/black-forest-labs/FLUX.1-dev" rel="nofollow">Flux.1-Dev</a> is made up of two text encoders - <a href="https://hf.co/google/t5-v1_1-xxl" rel="nofollow">T5-XXL</a> and <a href="https://hf.co/openai/clip-vit-large-patch14" rel="nofollow">CLIP-L</a> - a <a href="../api/models/flux_transformer">diffusion transformer</a>, and a <a href="../api/models/autoencoderkl">VAE</a>. With a model this size, it can be challenging to run inference on consumer GPUs.',We,P,Mt="Model sharding is a technique that distributes models across GPUs when the models don’t fit on a single GPU. The example below assumes two 16GB GPUs are available for inference.",Ee,L,ht="Start by computing the text embeddings with the text encoders. Keep the text encoders on two GPUs by setting <code>device_map=&quot;balanced&quot;</code>. The <code>balanced</code> strategy evenly distributes the model on all available GPUs. Use the <code>max_memory</code> parameter to allocate the maximum amount of memory for each text encoder on each GPU.",Xe,U,Re,q,Ne,D,yt="Once the text embeddings are computed, remove them from the GPU to make space for the diffusion transformer.",He,K,xe,O,wt='Load the diffusion transformer next which has 12.5B parameters. This time, set <code>device_map=&quot;auto&quot;</code> to automatically distribute the model across two 16GB GPUs. The <code>auto</code> strategy is backed by <a href="https://hf.co/docs/accelerate/index" rel="nofollow">Accelerate</a> and available as a part of the <a href="https://hf.co/docs/accelerate/concept_guides/big_model_inference" rel="nofollow">Big Model Inference</a> feature. It starts by distributing a model across the fastest device first (GPU) before moving to slower devices like the CPU and hard drive if needed. The trade-off of storing model parameters on slower devices is slower inference latency.',Ve,ee,Ae,g,Ye,te,Jt="Add the transformer model to the pipeline for denoising, but set the other model-level components like the text encoders and VAE to <code>None</code> because you don’t need them yet.",Qe,le,ze,se,bt="Remove the pipeline and transformer from memory as they’re no longer needed.",Fe,ae,Se,ne,jt="Finally, decode the latents with the VAE into an image. The VAE is typically small enough to be loaded on a single GPU.",Pe,ie,Le,re,Tt="By selectively loading and unloading the models you need at a given stage and sharding the largest models across multiple GPUs, it is possible to run inference with large models on consumer GPUs.",qe,oe,De,pe,Ke;return Z=new et({props:{title:"Distributed inference",local:"distributed-inference",headingTag:"h1"}}),v=new et({props:{title:"🤗 Accelerate",local:"-accelerate",headingTag:"h2"}}),G=new J({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMFBhcnRpYWxTdGF0ZSUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBEaWZmdXNpb25QaXBlbGluZSUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHVzZV9zYWZldGVuc29ycyUzRFRydWUlMEEpJTBBZGlzdHJpYnV0ZWRfc3RhdGUlMjAlM0QlMjBQYXJ0aWFsU3RhdGUoKSUwQXBpcGVsaW5lLnRvKGRpc3RyaWJ1dGVkX3N0YXRlLmRldmljZSklMEElMEF3aXRoJTIwZGlzdHJpYnV0ZWRfc3RhdGUuc3BsaXRfYmV0d2Vlbl9wcm9jZXNzZXMoJTVCJTIyYSUyMGRvZyUyMiUyQyUyMCUyMmElMjBjYXQlMjIlNUQpJTIwYXMlMjBwcm9tcHQlM0ElMEElMjAlMjAlMjAlMjByZXN1bHQlMjAlM0QlMjBwaXBlbGluZShwcm9tcHQpLmltYWdlcyU1QjAlNUQlMEElMjAlMjAlMjAlMjByZXN1bHQuc2F2ZShmJTIycmVzdWx0XyU3QmRpc3RyaWJ1dGVkX3N0YXRlLnByb2Nlc3NfaW5kZXglN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
pipeline = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;stable-diffusion-v1-5/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>
)
distributed_state = PartialState()
pipeline.to(distributed_state.device)
<span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">&quot;a dog&quot;</span>, <span class="hljs-string">&quot;a cat&quot;</span>]) <span class="hljs-keyword">as</span> prompt:
result = pipeline(prompt).images[<span class="hljs-number">0</span>]
result.save(<span class="hljs-string">f&quot;result_<span class="hljs-subst">{distributed_state.process_index}</span>.png&quot;</span>)`,wrap:!1}}),W=new J({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHJ1bl9kaXN0cmlidXRlZC5weSUyMC0tbnVtX3Byb2Nlc3NlcyUzRDI=",highlighted:"accelerate launch run_distributed.py --num_processes=2",wrap:!1}}),j=new Oe({props:{$$slots:{default:[Xt]},$$scope:{ctx:b}}}),E=new et({props:{title:"PyTorch Distributed",local:"pytorch-distributed",headingTag:"h2"}}),N=new J({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5kaXN0cmlidXRlZCUyMGFzJTIwZGlzdCUwQWltcG9ydCUyMHRvcmNoLm11bHRpcHJvY2Vzc2luZyUyMGFzJTIwbXAlMEElMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFzZCUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMjB1c2Vfc2FmZXRlbnNvcnMlM0RUcnVlJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.distributed <span class="hljs-keyword">as</span> dist
<span class="hljs-keyword">import</span> torch.multiprocessing <span class="hljs-keyword">as</span> mp
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
sd = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;stable-diffusion-v1-5/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>
)`,wrap:!1}}),V=new J({props:{code:"ZGVmJTIwcnVuX2luZmVyZW5jZShyYW5rJTJDJTIwd29ybGRfc2l6ZSklM0ElMEElMjAlMjAlMjAlMjBkaXN0LmluaXRfcHJvY2Vzc19ncm91cCglMjJuY2NsJTIyJTJDJTIwcmFuayUzRHJhbmslMkMlMjB3b3JsZF9zaXplJTNEd29ybGRfc2l6ZSklMEElMEElMjAlMjAlMjAlMjBzZC50byhyYW5rKSUwQSUwQSUyMCUyMCUyMCUyMGlmJTIwdG9yY2guZGlzdHJpYnV0ZWQuZ2V0X3JhbmsoKSUyMCUzRCUzRCUyMDAlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlMjAlM0QlMjAlMjJhJTIwZG9nJTIyJTBBJTIwJTIwJTIwJTIwZWxpZiUyMHRvcmNoLmRpc3RyaWJ1dGVkLmdldF9yYW5rKCklMjAlM0QlM0QlMjAxJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJvbXB0JTIwJTNEJTIwJTIyYSUyMGNhdCUyMiUwQSUwQSUyMCUyMCUyMCUyMGltYWdlJTIwJTNEJTIwc2QocHJvbXB0KS5pbWFnZXMlNUIwJTVEJTBBJTIwJTIwJTIwJTIwaW1hZ2Uuc2F2ZShmJTIyLiUyRiU3QidfJy5qb2luKHByb21wdCklN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">run_inference</span>(<span class="hljs-params">rank, world_size</span>):
dist.init_process_group(<span class="hljs-string">&quot;nccl&quot;</span>, rank=rank, world_size=world_size)
sd.to(rank)
<span class="hljs-keyword">if</span> torch.distributed.get_rank() == <span class="hljs-number">0</span>:
prompt = <span class="hljs-string">&quot;a dog&quot;</span>
<span class="hljs-keyword">elif</span> torch.distributed.get_rank() == <span class="hljs-number">1</span>:
prompt = <span class="hljs-string">&quot;a cat&quot;</span>
image = sd(prompt).images[<span class="hljs-number">0</span>]
image.save(<span class="hljs-string">f&quot;./<span class="hljs-subst">{<span class="hljs-string">&#x27;_&#x27;</span>.join(prompt)}</span>.png&quot;</span>)`,wrap:!1}}),Y=new J({props:{code:"ZGVmJTIwbWFpbigpJTNBJTBBJTIwJTIwJTIwJTIwd29ybGRfc2l6ZSUyMCUzRCUyMDIlMEElMjAlMjAlMjAlMjBtcC5zcGF3bihydW5faW5mZXJlbmNlJTJDJTIwYXJncyUzRCh3b3JsZF9zaXplJTJDKSUyQyUyMG5wcm9jcyUzRHdvcmxkX3NpemUlMkMlMjBqb2luJTNEVHJ1ZSklMEElMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBtYWluKCk=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>():
world_size = <span class="hljs-number">2</span>
mp.spawn(run_inference, args=(world_size,), nprocs=world_size, join=<span class="hljs-literal">True</span>)
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
main()`,wrap:!1}}),z=new J({props:{code:"dG9yY2hydW4lMjBydW5fZGlzdHJpYnV0ZWQucHklMjAtLW5wcm9jX3Blcl9ub2RlJTNEMg==",highlighted:"torchrun run_distributed.py --nproc_per_node=2",wrap:!1}}),T=new Oe({props:{warning:!1,$$slots:{default:[Rt]},$$scope:{ctx:b}}}),F=new et({props:{title:"Model sharding",local:"model-sharding",headingTag:"h2"}}),U=new Oe({props:{warning:!1,$$slots:{default:[Nt]},$$scope:{ctx:b}}}),q=new J({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsdXhQaXBlbGluZSUwQWltcG9ydCUyMHRvcmNoJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyYSUyMHBob3RvJTIwb2YlMjBhJTIwZG9nJTIwd2l0aCUyMGNhdC1saWtlJTIwbG9vayUyMiUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwRmx1eFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjB0cmFuc2Zvcm1lciUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB2YWUlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlX21hcCUzRCUyMmJhbGFuY2VkJTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X21lbW9yeSUzRCU3QjAlM0ElMjAlMjIxNkdCJTIyJTJDJTIwMSUzQSUyMCUyMjE2R0IlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXdpdGglMjB0b3JjaC5ub19ncmFkKCklM0ElMEElMjAlMjAlMjAlMjBwcmludCglMjJFbmNvZGluZyUyMHByb21wdHMuJTIyKSUwQSUyMCUyMCUyMCUyMHByb21wdF9lbWJlZHMlMkMlMjBwb29sZWRfcHJvbXB0X2VtYmVkcyUyQyUyMHRleHRfaWRzJTIwJTNEJTIwcGlwZWxpbmUuZW5jb2RlX3Byb21wdCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMjBwcm9tcHRfMiUzRE5vbmUlMkMlMjBtYXhfc2VxdWVuY2VfbGVuZ3RoJTNENTEyJTBBJTIwJTIwJTIwJTIwKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">import</span> torch
prompt = <span class="hljs-string">&quot;a photo of a dog with cat-like look&quot;</span>
pipeline = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
transformer=<span class="hljs-literal">None</span>,
vae=<span class="hljs-literal">None</span>,
device_map=<span class="hljs-string">&quot;balanced&quot;</span>,
max_memory={<span class="hljs-number">0</span>: <span class="hljs-string">&quot;16GB&quot;</span>, <span class="hljs-number">1</span>: <span class="hljs-string">&quot;16GB&quot;</span>},
torch_dtype=torch.bfloat16
)
<span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Encoding prompts.&quot;</span>)
prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
prompt=prompt, prompt_2=<span class="hljs-literal">None</span>, max_sequence_length=<span class="hljs-number">512</span>
)`,wrap:!1}}),K=new J({props:{code:"aW1wb3J0JTIwZ2MlMjAlMEElMEFkZWYlMjBmbHVzaCgpJTNBJTBBJTIwJTIwJTIwJTIwZ2MuY29sbGVjdCgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5lbXB0eV9jYWNoZSgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5yZXNldF9tYXhfbWVtb3J5X2FsbG9jYXRlZCgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5yZXNldF9wZWFrX21lbW9yeV9zdGF0cygpJTBBJTBBZGVsJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyJTBBZGVsJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyXzIlMEFkZWwlMjBwaXBlbGluZS50b2tlbml6ZXIlMEFkZWwlMjBwaXBlbGluZS50b2tlbml6ZXJfMiUwQWRlbCUyMHBpcGVsaW5lJTBBJTBBZmx1c2goKQ==",highlighted:`<span class="hljs-keyword">import</span> gc
<span class="hljs-keyword">def</span> <span class="hljs-title function_">flush</span>():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
<span class="hljs-keyword">del</span> pipeline.text_encoder
<span class="hljs-keyword">del</span> pipeline.text_encoder_2
<span class="hljs-keyword">del</span> pipeline.tokenizer
<span class="hljs-keyword">del</span> pipeline.tokenizer_2
<span class="hljs-keyword">del</span> pipeline
flush()`,wrap:!1}}),ee=new J({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsdXhUcmFuc2Zvcm1lcjJETW9kZWwlMEFpbXBvcnQlMjB0b3JjaCUyMCUwQSUwQXRyYW5zZm9ybWVyJTIwJTNEJTIwRmx1eFRyYW5zZm9ybWVyMkRNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyYmxhY2stZm9yZXN0LWxhYnMlMkZGTFVYLjEtZGV2JTIyJTJDJTIwJTBBJTIwJTIwJTIwJTIwc3ViZm9sZGVyJTNEJTIydHJhbnNmb3JtZXIlMjIlMkMlMEElMjAlMjAlMjAlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEp",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxTransformer2DModel
<span class="hljs-keyword">import</span> torch
transformer = FluxTransformer2DModel.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
subfolder=<span class="hljs-string">&quot;transformer&quot;</span>,
device_map=<span class="hljs-string">&quot;auto&quot;</span>,
torch_dtype=torch.bfloat16
)`,wrap:!1}}),g=new Oe({props:{warning:!1,$$slots:{default:[Ht]},$$scope:{ctx:b}}}),le=new J({props:{code:"cGlwZWxpbmUlMjAlM0QlMjBGbHV4UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmJsYWNrLWZvcmVzdC1sYWJzJTJGRkxVWC4xLWRldiUyMiUyQyUyMCUyQyUwQSUyMCUyMCUyMCUyMHRleHRfZW5jb2RlciUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB0ZXh0X2VuY29kZXJfMiUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB0b2tlbml6ZXIlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVyXzIlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwdmFlJTNETm9uZSUyQyUwQSUyMCUyMCUyMCUyMHRyYW5zZm9ybWVyJTNEdHJhbnNmb3JtZXIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQSUwQXByaW50KCUyMlJ1bm5pbmclMjBkZW5vaXNpbmcuJTIyKSUwQWhlaWdodCUyQyUyMHdpZHRoJTIwJTNEJTIwNzY4JTJDJTIwMTM2MCUwQWxhdGVudHMlMjAlM0QlMjBwaXBlbGluZSglMEElMjAlMjAlMjAlMjBwcm9tcHRfZW1iZWRzJTNEcHJvbXB0X2VtYmVkcyUyQyUwQSUyMCUyMCUyMCUyMHBvb2xlZF9wcm9tcHRfZW1iZWRzJTNEcG9vbGVkX3Byb21wdF9lbWJlZHMlMkMlMEElMjAlMjAlMjAlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNENTAlMkMlMEElMjAlMjAlMjAlMjBndWlkYW5jZV9zY2FsZSUzRDMuNSUyQyUwQSUyMCUyMCUyMCUyMGhlaWdodCUzRGhlaWdodCUyQyUwQSUyMCUyMCUyMCUyMHdpZHRoJTNEd2lkdGglMkMlMEElMjAlMjAlMjAlMjBvdXRwdXRfdHlwZSUzRCUyMmxhdGVudCUyMiUyQyUwQSkuaW1hZ2Vz",highlighted:`pipeline = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>, ,
text_encoder=<span class="hljs-literal">None</span>,
text_encoder_2=<span class="hljs-literal">None</span>,
tokenizer=<span class="hljs-literal">None</span>,
tokenizer_2=<span class="hljs-literal">None</span>,
vae=<span class="hljs-literal">None</span>,
transformer=transformer,
torch_dtype=torch.bfloat16
)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Running denoising.&quot;</span>)
height, width = <span class="hljs-number">768</span>, <span class="hljs-number">1360</span>
latents = pipeline(
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
num_inference_steps=<span class="hljs-number">50</span>,
guidance_scale=<span class="hljs-number">3.5</span>,
height=height,
width=width,
output_type=<span class="hljs-string">&quot;latent&quot;</span>,
).images`,wrap:!1}}),ae=new J({props:{code:"ZGVsJTIwcGlwZWxpbmUudHJhbnNmb3JtZXIlMEFkZWwlMjBwaXBlbGluZSUwQSUwQWZsdXNoKCk=",highlighted:`<span class="hljs-keyword">del</span> pipeline.transformer
<span class="hljs-keyword">del</span> pipeline
flush()`,wrap:!1}}),ie=new J({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9lbmNvZGVyS0wlMEFmcm9tJTIwZGlmZnVzZXJzLmltYWdlX3Byb2Nlc3NvciUyMGltcG9ydCUyMFZhZUltYWdlUHJvY2Vzc29yJTBBaW1wb3J0JTIwdG9yY2glMjAlMEElMEF2YWUlMjAlM0QlMjBBdXRvZW5jb2RlcktMLmZyb21fcHJldHJhaW5lZChja3B0X2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIydmFlJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNikudG8oJTIyY3VkYSUyMiklMEF2YWVfc2NhbGVfZmFjdG9yJTIwJTNEJTIwMiUyMCoqJTIwKGxlbih2YWUuY29uZmlnLmJsb2NrX291dF9jaGFubmVscykpJTBBaW1hZ2VfcHJvY2Vzc29yJTIwJTNEJTIwVmFlSW1hZ2VQcm9jZXNzb3IodmFlX3NjYWxlX2ZhY3RvciUzRHZhZV9zY2FsZV9mYWN0b3IpJTBBJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMHByaW50KCUyMlJ1bm5pbmclMjBkZWNvZGluZy4lMjIpJTBBJTIwJTIwJTIwJTIwbGF0ZW50cyUyMCUzRCUyMEZsdXhQaXBlbGluZS5fdW5wYWNrX2xhdGVudHMobGF0ZW50cyUyQyUyMGhlaWdodCUyQyUyMHdpZHRoJTJDJTIwdmFlX3NjYWxlX2ZhY3RvciklMEElMjAlMjAlMjAlMjBsYXRlbnRzJTIwJTNEJTIwKGxhdGVudHMlMjAlMkYlMjB2YWUuY29uZmlnLnNjYWxpbmdfZmFjdG9yKSUyMCUyQiUyMHZhZS5jb25maWcuc2hpZnRfZmFjdG9yJTBBJTBBJTIwJTIwJTIwJTIwaW1hZ2UlMjAlM0QlMjB2YWUuZGVjb2RlKGxhdGVudHMlMkMlMjByZXR1cm5fZGljdCUzREZhbHNlKSU1QjAlNUQlMEElMjAlMjAlMjAlMjBpbWFnZSUyMCUzRCUyMGltYWdlX3Byb2Nlc3Nvci5wb3N0cHJvY2VzcyhpbWFnZSUyQyUyMG91dHB1dF90eXBlJTNEJTIycGlsJTIyKSUwQSUyMCUyMCUyMCUyMGltYWdlJTVCMCU1RC5zYXZlKCUyMnNwbGl0X3RyYW5zZm9ybWVyLnBuZyUyMik=",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKL
<span class="hljs-keyword">from</span> diffusers.image_processor <span class="hljs-keyword">import</span> VaeImageProcessor
<span class="hljs-keyword">import</span> torch
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder=<span class="hljs-string">&quot;vae&quot;</span>, torch_dtype=torch.bfloat16).to(<span class="hljs-string">&quot;cuda&quot;</span>)
vae_scale_factor = <span class="hljs-number">2</span> ** (<span class="hljs-built_in">len</span>(vae.config.block_out_channels))
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
<span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Running decoding.&quot;</span>)
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
image = vae.decode(latents, return_dict=<span class="hljs-literal">False</span>)[<span class="hljs-number">0</span>]
image = image_processor.postprocess(image, output_type=<span class="hljs-string">&quot;pil&quot;</span>)
image[<span class="hljs-number">0</span>].save(<span class="hljs-string">&quot;split_transformer.png&quot;</span>)`,wrap:!1}}),oe=new Et({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/training/distributed_inference.md"}}),{c(){i=o("meta"),y=a(),r=o("p"),w=a(),d(Z.$$.fragment),ce=a(),_=o("p"),_.innerHTML=lt,de=a(),$=o("p"),$.textContent=st,me=a(),d(v.$$.fragment),fe=a(),I=o("p"),I.innerHTML=at,ue=a(),k=o("p"),k.innerHTML=nt,Me=a(),B=o("p"),B.innerHTML=it,he=a(),d(G.$$.fragment),ye=a(),C=o("p"),C.innerHTML=rt,we=a(),d(W.$$.fragment),Je=a(),d(j.$$.fragment),be=a(),d(E.$$.fragment),je=a(),X=o("p"),X.innerHTML=ot,Te=a(),R=o("p"),R.innerHTML=pt,Ue=a(),d(N.$$.fragment),ge=a(),H=o("p"),H.innerHTML=ct,Ze=a(),x=o("p"),x.innerHTML=dt,_e=a(),d(V.$$.fragment),$e=a(),A=o("p"),A.innerHTML=mt,ve=a(),d(Y.$$.fragment),Ie=a(),Q=o("p"),Q.innerHTML=ft,ke=a(),d(z.$$.fragment),Be=a(),d(T.$$.fragment),Ge=a(),d(F.$$.fragment),Ce=a(),S=o("p"),S.innerHTML=ut,We=a(),P=o("p"),P.textContent=Mt,Ee=a(),L=o("p"),L.innerHTML=ht,Xe=a(),d(U.$$.fragment),Re=a(),d(q.$$.fragment),Ne=a(),D=o("p"),D.textContent=yt,He=a(),d(K.$$.fragment),xe=a(),O=o("p"),O.innerHTML=wt,Ve=a(),d(ee.$$.fragment),Ae=a(),d(g.$$.fragment),Ye=a(),te=o("p"),te.innerHTML=Jt,Qe=a(),d(le.$$.fragment),ze=a(),se=o("p"),se.textContent=bt,Fe=a(),d(ae.$$.fragment),Se=a(),ne=o("p"),ne.textContent=jt,Pe=a(),d(ie.$$.fragment),Le=a(),re=o("p"),re.textContent=Tt,qe=a(),d(oe.$$.fragment),De=a(),pe=o("p"),this.h()},l(e){const t=Ct("svelte-u9bgzb",document.head);i=p(t,"META",{name:!0,content:!0}),t.forEach(l),y=n(e),r=p(e,"P",{}),$t(r).forEach(l),w=n(e),m(Z.$$.fragment,e),ce=n(e),_=p(e,"P",{"data-svelte-h":!0}),c(_)!=="svelte-a7bv7i"&&(_.innerHTML=lt),de=n(e),$=p(e,"P",{"data-svelte-h":!0}),c($)!=="svelte-1qu3csy"&&($.textContent=st),me=n(e),m(v.$$.fragment,e),fe=n(e),I=p(e,"P",{"data-svelte-h":!0}),c(I)!=="svelte-13uq1g2"&&(I.innerHTML=at),ue=n(e),k=p(e,"P",{"data-svelte-h":!0}),c(k)!=="svelte-owbmy"&&(k.innerHTML=nt),Me=n(e),B=p(e,"P",{"data-svelte-h":!0}),c(B)!=="svelte-1iuwz8b"&&(B.innerHTML=it),he=n(e),m(G.$$.fragment,e),ye=n(e),C=p(e,"P",{"data-svelte-h":!0}),c(C)!=="svelte-1ohh8as"&&(C.innerHTML=rt),we=n(e),m(W.$$.fragment,e),Je=n(e),m(j.$$.fragment,e),be=n(e),m(E.$$.fragment,e),je=n(e),X=p(e,"P",{"data-svelte-h":!0}),c(X)!=="svelte-jtiddl"&&(X.innerHTML=ot),Te=n(e),R=p(e,"P",{"data-svelte-h":!0}),c(R)!=="svelte-wiuanb"&&(R.innerHTML=pt),Ue=n(e),m(N.$$.fragment,e),ge=n(e),H=p(e,"P",{"data-svelte-h":!0}),c(H)!=="svelte-qkdvuf"&&(H.innerHTML=ct),Ze=n(e),x=p(e,"P",{"data-svelte-h":!0}),c(x)!=="svelte-1y624kq"&&(x.innerHTML=dt),_e=n(e),m(V.$$.fragment,e),$e=n(e),A=p(e,"P",{"data-svelte-h":!0}),c(A)!=="svelte-1ecd3vq"&&(A.innerHTML=mt),ve=n(e),m(Y.$$.fragment,e),Ie=n(e),Q=p(e,"P",{"data-svelte-h":!0}),c(Q)!=="svelte-ykaora"&&(Q.innerHTML=ft),ke=n(e),m(z.$$.fragment,e),Be=n(e),m(T.$$.fragment,e),Ge=n(e),m(F.$$.fragment,e),Ce=n(e),S=p(e,"P",{"data-svelte-h":!0}),c(S)!=="svelte-z43e17"&&(S.innerHTML=ut),We=n(e),P=p(e,"P",{"data-svelte-h":!0}),c(P)!=="svelte-11atf5q"&&(P.textContent=Mt),Ee=n(e),L=p(e,"P",{"data-svelte-h":!0}),c(L)!=="svelte-1nunyf2"&&(L.innerHTML=ht),Xe=n(e),m(U.$$.fragment,e),Re=n(e),m(q.$$.fragment,e),Ne=n(e),D=p(e,"P",{"data-svelte-h":!0}),c(D)!=="svelte-b7hlro"&&(D.textContent=yt),He=n(e),m(K.$$.fragment,e),xe=n(e),O=p(e,"P",{"data-svelte-h":!0}),c(O)!=="svelte-5mgron"&&(O.innerHTML=wt),Ve=n(e),m(ee.$$.fragment,e),Ae=n(e),m(g.$$.fragment,e),Ye=n(e),te=p(e,"P",{"data-svelte-h":!0}),c(te)!=="svelte-124kckl"&&(te.innerHTML=Jt),Qe=n(e),m(le.$$.fragment,e),ze=n(e),se=p(e,"P",{"data-svelte-h":!0}),c(se)!=="svelte-5fatj5"&&(se.textContent=bt),Fe=n(e),m(ae.$$.fragment,e),Se=n(e),ne=p(e,"P",{"data-svelte-h":!0}),c(ne)!=="svelte-1qec9oi"&&(ne.textContent=jt),Pe=n(e),m(ie.$$.fragment,e),Le=n(e),re=p(e,"P",{"data-svelte-h":!0}),c(re)!=="svelte-3tqui3"&&(re.textContent=Tt),qe=n(e),m(oe.$$.fragment,e),De=n(e),pe=p(e,"P",{}),$t(pe).forEach(l),this.h()},h(){vt(i,"name","hf:doc:metadata"),vt(i,"content",Vt)},m(e,t){Wt(document.head,i),s(e,y,t),s(e,r,t),s(e,w,t),f(Z,e,t),s(e,ce,t),s(e,_,t),s(e,de,t),s(e,$,t),s(e,me,t),f(v,e,t),s(e,fe,t),s(e,I,t),s(e,ue,t),s(e,k,t),s(e,Me,t),s(e,B,t),s(e,he,t),f(G,e,t),s(e,ye,t),s(e,C,t),s(e,we,t),f(W,e,t),s(e,Je,t),f(j,e,t),s(e,be,t),f(E,e,t),s(e,je,t),s(e,X,t),s(e,Te,t),s(e,R,t),s(e,Ue,t),f(N,e,t),s(e,ge,t),s(e,H,t),s(e,Ze,t),s(e,x,t),s(e,_e,t),f(V,e,t),s(e,$e,t),s(e,A,t),s(e,ve,t),f(Y,e,t),s(e,Ie,t),s(e,Q,t),s(e,ke,t),f(z,e,t),s(e,Be,t),f(T,e,t),s(e,Ge,t),f(F,e,t),s(e,Ce,t),s(e,S,t),s(e,We,t),s(e,P,t),s(e,Ee,t),s(e,L,t),s(e,Xe,t),f(U,e,t),s(e,Re,t),f(q,e,t),s(e,Ne,t),s(e,D,t),s(e,He,t),f(K,e,t),s(e,xe,t),s(e,O,t),s(e,Ve,t),f(ee,e,t),s(e,Ae,t),f(g,e,t),s(e,Ye,t),s(e,te,t),s(e,Qe,t),f(le,e,t),s(e,ze,t),s(e,se,t),s(e,Fe,t),f(ae,e,t),s(e,Se,t),s(e,ne,t),s(e,Pe,t),f(ie,e,t),s(e,Le,t),s(e,re,t),s(e,qe,t),f(oe,e,t),s(e,De,t),s(e,pe,t),Ke=!0},p(e,[t]){const Ut={};t&2&&(Ut.$$scope={dirty:t,ctx:e}),j.$set(Ut);const gt={};t&2&&(gt.$$scope={dirty:t,ctx:e}),T.$set(gt);const Zt={};t&2&&(Zt.$$scope={dirty:t,ctx:e}),U.$set(Zt);const _t={};t&2&&(_t.$$scope={dirty:t,ctx:e}),g.$set(_t)},i(e){Ke||(u(Z.$$.fragment,e),u(v.$$.fragment,e),u(G.$$.fragment,e),u(W.$$.fragment,e),u(j.$$.fragment,e),u(E.$$.fragment,e),u(N.$$.fragment,e),u(V.$$.fragment,e),u(Y.$$.fragment,e),u(z.$$.fragment,e),u(T.$$.fragment,e),u(F.$$.fragment,e),u(U.$$.fragment,e),u(q.$$.fragment,e),u(K.$$.fragment,e),u(ee.$$.fragment,e),u(g.$$.fragment,e),u(le.$$.fragment,e),u(ae.$$.fragment,e),u(ie.$$.fragment,e),u(oe.$$.fragment,e),Ke=!0)},o(e){M(Z.$$.fragment,e),M(v.$$.fragment,e),M(G.$$.fragment,e),M(W.$$.fragment,e),M(j.$$.fragment,e),M(E.$$.fragment,e),M(N.$$.fragment,e),M(V.$$.fragment,e),M(Y.$$.fragment,e),M(z.$$.fragment,e),M(T.$$.fragment,e),M(F.$$.fragment,e),M(U.$$.fragment,e),M(q.$$.fragment,e),M(K.$$.fragment,e),M(ee.$$.fragment,e),M(g.$$.fragment,e),M(le.$$.fragment,e),M(ae.$$.fragment,e),M(ie.$$.fragment,e),M(oe.$$.fragment,e),Ke=!1},d(e){e&&(l(y),l(r),l(w),l(ce),l(_),l(de),l($),l(me),l(fe),l(I),l(ue),l(k),l(Me),l(B),l(he),l(ye),l(C),l(we),l(Je),l(be),l(je),l(X),l(Te),l(R),l(Ue),l(ge),l(H),l(Ze),l(x),l(_e),l($e),l(A),l(ve),l(Ie),l(Q),l(ke),l(Be),l(Ge),l(Ce),l(S),l(We),l(P),l(Ee),l(L),l(Xe),l(Re),l(Ne),l(D),l(He),l(xe),l(O),l(Ve),l(Ae),l(Ye),l(te),l(Qe),l(ze),l(se),l(Fe),l(Se),l(ne),l(Pe),l(Le),l(re),l(qe),l(De),l(pe)),l(i),h(Z,e),h(v,e),h(G,e),h(W,e),h(j,e),h(E,e),h(N,e),h(V,e),h(Y,e),h(z,e),h(T,e),h(F,e),h(U,e),h(q,e),h(K,e),h(ee,e),h(g,e),h(le,e),h(ae,e),h(ie,e),h(oe,e)}}}const Vt='{"title":"Distributed inference","local":"distributed-inference","sections":[{"title":"🤗 Accelerate","local":"-accelerate","sections":[],"depth":2},{"title":"PyTorch Distributed","local":"pytorch-distributed","sections":[],"depth":2},{"title":"Model sharding","local":"model-sharding","sections":[],"depth":2}],"depth":1}';function At(b){return kt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Pt extends Bt{constructor(i){super(),Gt(this,i,At,xt,It,{})}}export{Pt as component};

Xet Storage Details

Size:
31 kB
·
Xet hash:
0ff9fb72d99764b112f4ad803293708cb9b096ff1f7ab8e1bced229039ac9180

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.