Buckets:
| import{s as Cl,o as Bl,n as ll}from"../chunks/scheduler.5c93273d.js";import{S as gl,i as kl,g as c,s as a,r as M,A as vl,h as r,f as t,c as n,j as Gl,u as m,x as o,k as Il,y as Wl,a as s,v as d,d as f,t as y,w}from"../chunks/index.e43dd92b.js";import{T as Oe}from"../chunks/Tip.1cbfe904.js";import{C as j}from"../chunks/CodeBlock.6896320e.js";import{H as el,E as El}from"../chunks/getInferenceSnippets.22672bbf.js";function Vl(h){let p,u='参考这个最小示例 <a href="https://gist.github.com/sayakpaul/cfaebd221820d7b43fae638b4dfa01ba" rel="nofollow">脚本</a> 以在多个 GPU 上运行推理。要了解更多信息,请查看 <a href="https://huggingface.co/docs/accelerate/en/usage_guides/distributed_inference#distributed-inference-with-accelerate" rel="nofollow">使用 🤗 Accelerate 进行分布式推理</a> 指南。';return{c(){p=c("p"),p.innerHTML=u},l(i){p=r(i,"P",{"data-svelte-h":!0}),o(p)!=="svelte-1wqzskn"&&(p.innerHTML=u)},m(i,J){s(i,p,J)},p:ll,d(i){i&&t(p)}}}function Rl(h){let p,u='您可以在 <code>DiffusionPipeline</code> 中使用 <code>device_map</code> 将其模型级组件分布在多个设备上。请参考 <a href="../tutorials/inference_with_big_models#device-placement">设备放置</a> 指南了解更多信息。';return{c(){p=c("p"),p.innerHTML=u},l(i){p=r(i,"P",{"data-svelte-h":!0}),o(p)!=="svelte-i2nti4"&&(p.innerHTML=u)},m(i,J){s(i,p,J)},p:ll,d(i){i&&t(p)}}}function Nl(h){let p,u="<strong>仅</strong> 在此步骤加载文本编码器!扩散变换器和VAE在后续步骤中加载以节省内存。";return{c(){p=c("p"),p.innerHTML=u},l(i){p=r(i,"P",{"data-svelte-h":!0}),o(p)!=="svelte-1cfscz8"&&(p.innerHTML=u)},m(i,J){s(i,p,J)},p:ll,d(i){i&&t(p)}}}function Hl(h){let p,u="在任何时候,您可以尝试 <code>print(pipeline.hf_device_map)</code> 来查看各种模型如何在设备上分布。这对于跟踪模型的设备放置很有用。您也可以尝试 <code>print(transformer.hf_device_map)</code> 来查看变换器模型如何在设备上分片。";return{c(){p=c("p"),p.innerHTML=u},l(i){p=r(i,"P",{"data-svelte-h":!0}),o(p)!=="svelte-1aiohbf"&&(p.innerHTML=u)},m(i,J){s(i,p,J)},p:ll,d(i){i&&t(p)}}}function Xl(h){let p,u,i,J,_,oe,$,tl='在分布式设置中,您可以使用 🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> 或 <a href="https://pytorch.org/tutorials/beginner/dist_overview.html" rel="nofollow">PyTorch Distributed</a> 在多个 GPU 上运行推理,这对于并行生成多个提示非常有用。',Me,G,sl="本指南将向您展示如何使用 🤗 Accelerate 和 PyTorch Distributed 进行分布式推理。",me,I,de,C,al='🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> 是一个旨在简化在分布式设置中训练或运行推理的库。它简化了设置分布式环境的过程,让您可以专注于您的 PyTorch 代码。',fe,B,nl="首先,创建一个 Python 文件并初始化一个 <code>accelerate.PartialState</code> 来创建分布式环境;您的设置会自动检测,因此您无需明确定义 <code>rank</code> 或 <code>world_size</code>。将 <code>DiffusionPipeline</code> 移动到 <code>distributed_state.device</code> 以为每个进程分配一个 GPU。",ye,g,pl="现在使用 <code>split_between_processes</code> 实用程序作为上下文管理器,自动在进程数之间分发提示。",we,k,ue,v,il="使用 <code>--num_processes</code> 参数指定要使用的 GPU 数量,并调用 <code>accelerate launch</code> 来运行脚本:",Je,W,je,T,he,E,Te,V,cl=`PyTorch 支持 <a href="https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html" rel="nofollow"><code>DistributedDataParallel</code></a>,它启用了数据 | |
| 并行性。`,be,R,rl="首先,创建一个 Python 文件并导入 <code>torch.distributed</code> 和 <code>torch.multiprocessing</code> 来设置分布式进程组,并为每个 GPU 上的推理生成进程。您还应该初始化一个 <code>DiffusionPipeline</code>:",Ue,N,Ze,H,ol='您需要创建一个函数来运行推理;<a href="https://pytorch.org/docs/stable/distributed.html?highlight=init_process_group#torch.distributed.init_process_group" rel="nofollow"><code>init_process_group</code></a> 处理创建一个分布式环境,指定要使用的后端类型、当前进程的 <code>rank</code> 以及参与进程的数量 <code>world_size</code>。如果您在 2 个 GPU 上并行运行推理,那么 <code>world_size</code> 就是 2。',_e,X,Ml="将 <code>DiffusionPipeline</code> 移动到 <code>rank</code>,并使用 <code>get_rank</code> 为每个进程分配一个 GPU,其中每个进程处理不同的提示:",$e,A,Ge,F,ml='要运行分布式推理,调用 <a href="https://pytorch.org/docs/stable/multiprocessing.html#torch.multiprocessing.spawn" rel="nofollow"><code>mp.spawn</code></a> 在 <code>world_size</code> 定义的 GPU 数量上运行 <code>run_inference</code> 函数:',Ie,x,Ce,z,dl="完成推理脚本后,使用 <code>--nproc_per_node</code> 参数指定要使用的 GPU 数量,并调用 <code>torchrun</code> 来运行脚本:",Be,Q,ge,b,ke,Y,ve,S,fl='现代扩散系统,如 <a href="../api/pipelines/flux">Flux</a>,非常大且包含多个模型。例如,<a href="https://hf.co/black-forest-labs/FLUX.1-dev" rel="nofollow">Flux.1-Dev</a> 由两个文本编码器 - <a href="https://hf.co/google/t5-v1_1-xxl" rel="nofollow">T5-XXL</a> 和 <a href="https://hf.co/openai/clip-vit-large-patch14" rel="nofollow">CLIP-L</a> - 一个 <a href="../api/models/flux_transformer">扩散变换器</a>,以及一个 <a href="../api/models/autoencoderkl">VAE</a> 组成。对于如此大的模型,在消费级 GPU 上运行推理可能具有挑战性。',We,P,yl="模型分片是一种技术,当模型无法容纳在单个 GPU 上时,将模型分布在多个 GPU 上。下面的示例假设有两个 16GB GPU 可用于推理。",Ee,L,wl="开始使用文本编码器计算文本嵌入。通过设置 <code>device_map="balanced"</code> 将文本编码器保持在两个GPU上。<code>balanced</code> 策略将模型均匀分布在所有可用GPU上。使用 <code>max_memory</code> 参数为每个GPU上的每个文本编码器分配最大内存量。",Ve,U,Re,q,Ne,D,ul="一旦文本嵌入计算完成,从GPU中移除它们以为扩散变换器腾出空间。",He,K,Xe,O,Jl='接下来加载扩散变换器,它有125亿参数。这次,设置 <code>device_map="auto"</code> 以自动将模型分布在两个16GB GPU上。<code>auto</code> 策略由 <a href="https://hf.co/docs/accelerate/index" rel="nofollow">Accelerate</a> 支持,并作为 <a href="https://hf.co/docs/accelerate/concept_guides/big_model_inference" rel="nofollow">大模型推理</a> 功能的一部分可用。它首先将模型分布在最快的设备(GPU)上,然后在需要时移动到较慢的设备如CPU和硬盘。将模型参数存储在较慢设备上的权衡是推理延迟较慢。',Ae,ee,Fe,Z,xe,le,jl="将变换器模型添加到管道中以进行去噪,但将其他模型级组件如文本编码器和VAE设置为 <code>None</code>,因为您还不需要它们。",ze,te,Qe,se,hl="从内存中移除管道和变换器,因为它们不再需要。",Ye,ae,Se,ne,Tl="最后,使用变分自编码器(VAE)将潜在表示解码为图像。VAE通常足够小,可以在单个GPU上加载。",Pe,pe,Le,ie,bl="通过选择性加载和卸载在特定阶段所需的模型,并将最大模型分片到多个GPU上,可以在消费级GPU上运行大型模型的推理。",qe,ce,De,re,Ke;return _=new el({props:{title:"分布式推理",local:"分布式推理",headingTag:"h1"}}),I=new el({props:{title:"🤗 Accelerate",local:"-accelerate",headingTag:"h2"}}),k=new j({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMFBhcnRpYWxTdGF0ZSUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBEaWZmdXNpb25QaXBlbGluZSUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHVzZV9zYWZldGVuc29ycyUzRFRydWUlMEEpJTBBZGlzdHJpYnV0ZWRfc3RhdGUlMjAlM0QlMjBQYXJ0aWFsU3RhdGUoKSUwQXBpcGVsaW5lLnRvKGRpc3RyaWJ1dGVkX3N0YXRlLmRldmljZSklMEElMEF3aXRoJTIwZGlzdHJpYnV0ZWRfc3RhdGUuc3BsaXRfYmV0d2Vlbl9wcm9jZXNzZXMoJTVCJTIyYSUyMGRvZyUyMiUyQyUyMCUyMmElMjBjYXQlMjIlNUQpJTIwYXMlMjBwcm9tcHQlM0ElMEElMjAlMjAlMjAlMjByZXN1bHQlMjAlM0QlMjBwaXBlbGluZShwcm9tcHQpLmltYWdlcyU1QjAlNUQlMEElMjAlMjAlMjAlMjByZXN1bHQuc2F2ZShmJTIycmVzdWx0XyU3QmRpc3RyaWJ1dGVkX3N0YXRlLnByb2Nlc3NfaW5kZXglN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| ) | |
| distributed_state = PartialState() | |
| pipeline.to(distributed_state.device) | |
| <span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">"a dog"</span>, <span class="hljs-string">"a cat"</span>]) <span class="hljs-keyword">as</span> prompt: | |
| result = pipeline(prompt).images[<span class="hljs-number">0</span>] | |
| result.save(<span class="hljs-string">f"result_<span class="hljs-subst">{distributed_state.process_index}</span>.png"</span>)`,wrap:!1}}),W=new j({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHJ1bl9kaXN0cmlidXRlZC5weSUyMC0tbnVtX3Byb2Nlc3NlcyUzRDI=",highlighted:"accelerate launch run_distributed.py --num_processes=2",wrap:!1}}),T=new Oe({props:{$$slots:{default:[Vl]},$$scope:{ctx:h}}}),E=new el({props:{title:"PyTorch Distributed",local:"pytorch-distributed",headingTag:"h2"}}),N=new j({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5kaXN0cmlidXRlZCUyMGFzJTIwZGlzdCUwQWltcG9ydCUyMHRvcmNoLm11bHRpcHJvY2Vzc2luZyUyMGFzJTIwbXAlMEElMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFzZCUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMjB1c2Vfc2FmZXRlbnNvcnMlM0RUcnVlJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> torch.distributed <span class="hljs-keyword">as</span> dist | |
| <span class="hljs-keyword">import</span> torch.multiprocessing <span class="hljs-keyword">as</span> mp | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| sd = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"stable-diffusion-v1-5/stable-diffusion-v1-5"</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| )`,wrap:!1}}),A=new j({props:{code:"ZGVmJTIwcnVuX2luZmVyZW5jZShyYW5rJTJDJTIwd29ybGRfc2l6ZSklM0ElMEElMjAlMjAlMjAlMjBkaXN0LmluaXRfcHJvY2Vzc19ncm91cCglMjJuY2NsJTIyJTJDJTIwcmFuayUzRHJhbmslMkMlMjB3b3JsZF9zaXplJTNEd29ybGRfc2l6ZSklMEElMEElMjAlMjAlMjAlMjBzZC50byhyYW5rKSUwQSUwQSUyMCUyMCUyMCUyMGlmJTIwdG9yY2guZGlzdHJpYnV0ZWQuZ2V0X3JhbmsoKSUyMCUzRCUzRCUyMDAlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlMjAlM0QlMjAlMjJhJTIwZG9nJTIyJTBBJTIwJTIwJTIwJTIwZWxpZiUyMHRvcmNoLmRpc3RyaWJ1dGVkLmdldF9yYW5rKCklMjAlM0QlM0QlMjAxJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJvbXB0JTIwJTNEJTIwJTIyYSUyMGNhdCUyMiUwQSUwQSUyMCUyMCUyMCUyMGltYWdlJTIwJTNEJTIwc2QocHJvbXB0KS5pbWFnZXMlNUIwJTVEJTBBJTIwJTIwJTIwJTIwaW1hZ2Uuc2F2ZShmJTIyLiUyRiU3QidfJy5qb2luKHByb21wdCklN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">run_inference</span>(<span class="hljs-params">rank, world_size</span>): | |
| dist.init_process_group(<span class="hljs-string">"nccl"</span>, rank=rank, world_size=world_size) | |
| sd.to(rank) | |
| <span class="hljs-keyword">if</span> torch.distributed.get_rank() == <span class="hljs-number">0</span>: | |
| prompt = <span class="hljs-string">"a dog"</span> | |
| <span class="hljs-keyword">elif</span> torch.distributed.get_rank() == <span class="hljs-number">1</span>: | |
| prompt = <span class="hljs-string">"a cat"</span> | |
| image = sd(prompt).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">f"./<span class="hljs-subst">{<span class="hljs-string">'_'</span>.join(prompt)}</span>.png"</span>)`,wrap:!1}}),x=new j({props:{code:"ZGVmJTIwbWFpbigpJTNBJTBBJTIwJTIwJTIwJTIwd29ybGRfc2l6ZSUyMCUzRCUyMDIlMEElMjAlMjAlMjAlMjBtcC5zcGF3bihydW5faW5mZXJlbmNlJTJDJTIwYXJncyUzRCh3b3JsZF9zaXplJTJDKSUyQyUyMG5wcm9jcyUzRHdvcmxkX3NpemUlMkMlMjBqb2luJTNEVHJ1ZSklMEElMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBtYWluKCk=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>(): | |
| world_size = <span class="hljs-number">2</span> | |
| mp.spawn(run_inference, args=(world_size,), nprocs=world_size, join=<span class="hljs-literal">True</span>) | |
| <span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">"__main__"</span>: | |
| main()`,wrap:!1}}),Q=new j({props:{code:"dG9yY2hydW4lMjBydW5fZGlzdHJpYnV0ZWQucHklMjAtLW5wcm9jX3Blcl9ub2RlJTNEMg==",highlighted:"torchrun run_distributed.py --nproc_per_node=2",wrap:!1}}),b=new Oe({props:{warning:!1,$$slots:{default:[Rl]},$$scope:{ctx:h}}}),Y=new el({props:{title:"模型分片",local:"模型分片",headingTag:"h2"}}),U=new Oe({props:{warning:!1,$$slots:{default:[Nl]},$$scope:{ctx:h}}}),q=new j({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsdXhQaXBlbGluZSUwQWltcG9ydCUyMHRvcmNoJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyYSUyMHBob3RvJTIwb2YlMjBhJTIwZG9nJTIwd2l0aCUyMGNhdC1saWtlJTIwbG9vayUyMiUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwRmx1eFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjB0cmFuc2Zvcm1lciUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB2YWUlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlX21hcCUzRCUyMmJhbGFuY2VkJTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X21lbW9yeSUzRCU3QjAlM0ElMjAlMjIxNkdCJTIyJTJDJTIwMSUzQSUyMCUyMjE2R0IlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXdpdGglMjB0b3JjaC5ub19ncmFkKCklM0ElMEElMjAlMjAlMjAlMjBwcmludCglMjJFbmNvZGluZyUyMHByb21wdHMuJTIyKSUwQSUyMCUyMCUyMCUyMHByb21wdF9lbWJlZHMlMkMlMjBwb29sZWRfcHJvbXB0X2VtYmVkcyUyQyUyMHRleHRfaWRzJTIwJTNEJTIwcGlwZWxpbmUuZW5jb2RlX3Byb21wdCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMjBwcm9tcHRfMiUzRE5vbmUlMkMlMjBtYXhfc2VxdWVuY2VfbGVuZ3RoJTNENTEyJTBBJTIwJTIwJTIwJTIwKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| prompt = <span class="hljs-string">"a photo of a dog with cat-like look"</span> | |
| pipeline = FluxPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| transformer=<span class="hljs-literal">None</span>, | |
| vae=<span class="hljs-literal">None</span>, | |
| device_map=<span class="hljs-string">"balanced"</span>, | |
| max_memory={<span class="hljs-number">0</span>: <span class="hljs-string">"16GB"</span>, <span class="hljs-number">1</span>: <span class="hljs-string">"16GB"</span>}, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| <span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"Encoding prompts."</span>) | |
| prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt( | |
| prompt=prompt, prompt_2=<span class="hljs-literal">None</span>, max_sequence_length=<span class="hljs-number">512</span> | |
| )`,wrap:!1}}),K=new j({props:{code:"aW1wb3J0JTIwZ2MlMjAlMEElMEFkZWYlMjBmbHVzaCgpJTNBJTBBJTIwJTIwJTIwJTIwZ2MuY29sbGVjdCgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5lbXB0eV9jYWNoZSgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5yZXNldF9tYXhfbWVtb3J5X2FsbG9jYXRlZCgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5yZXNldF9wZWFrX21lbW9yeV9zdGF0cygpJTBBJTBBZGVsJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyJTBBZGVsJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyXzIlMEFkZWwlMjBwaXBlbGluZS50b2tlbml6ZXIlMEFkZWwlMjBwaXBlbGluZS50b2tlbml6ZXJfMiUwQWRlbCUyMHBpcGVsaW5lJTBBJTBBZmx1c2goKQ==",highlighted:`<span class="hljs-keyword">import</span> gc | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">flush</span>(): | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| torch.cuda.reset_max_memory_allocated() | |
| torch.cuda.reset_peak_memory_stats() | |
| <span class="hljs-keyword">del</span> pipeline.text_encoder | |
| <span class="hljs-keyword">del</span> pipeline.text_encoder_2 | |
| <span class="hljs-keyword">del</span> pipeline.tokenizer | |
| <span class="hljs-keyword">del</span> pipeline.tokenizer_2 | |
| <span class="hljs-keyword">del</span> pipeline | |
| flush()`,wrap:!1}}),ee=new j({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9Nb2RlbCUwQWltcG9ydCUyMHRvcmNoJTIwJTBBJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBBdXRvTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmJsYWNrLWZvcmVzdC1sYWJzJTJGRkxVWC4xLWRldiUyMiUyQyUyMCUwQSUyMCUyMCUyMCUyMHN1YmZvbGRlciUzRCUyMnRyYW5zZm9ybWVyJTIyJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlX21hcCUzRCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel | |
| <span class="hljs-keyword">import</span> torch | |
| transformer = AutoModel.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| subfolder=<span class="hljs-string">"transformer"</span>, | |
| device_map=<span class="hljs-string">"auto"</span>, | |
| torch_dtype=torch.bfloat16 | |
| )`,wrap:!1}}),Z=new Oe({props:{warning:!1,$$slots:{default:[Hl]},$$scope:{ctx:h}}}),te=new j({props:{code:"cGlwZWxpbmUlMjAlM0QlMjBGbHV4UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmJsYWNrLWZvcmVzdC1sYWJzJTJGRkxVWC4xLWRldiUyMiUyQyUwQSUyMCUyMCUyMCUyMHRleHRfZW5jb2RlciUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB0ZXh0X2VuY29kZXJfMiUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB0b2tlbml6ZXIlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVyXzIlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwdmFlJTNETm9uZSUyQyUwQSUyMCUyMCUyMCUyMHRyYW5zZm9ybWVyJTNEdHJhbnNmb3JtZXIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQSUwQXByaW50KCUyMlJ1bm5pbmclMjBkZW5vaXNpbmcuJTIyKSUwQWhlaWdodCUyQyUyMHdpZHRoJTIwJTNEJTIwNzY4JTJDJTIwMTM2MCUwQWxhdGVudHMlMjAlM0QlMjBwaXBlbGluZSglMEElMjAlMjAlMjAlMEElMjAlMjAlMjAlMjAlMjAlMEFwcm9tcHRfZW1iZWRzJTNEcHJvbXB0X2VtYmVkcyUyQyUwQXBvb2xlZF9wcm9tcHRfZW1iZWRzJTNEcG9vbGVkX3Byb21wdF9lbWJlZHMlMkMlMEFudW1faW5mZXJlbmNlX3N0ZXBzJTNENTAlMkMlMEFndWlkYW5jZV9zY2FsZSUzRDMuNSUyQyUwQWhlaWdodCUzRGhlaWdodCUyQyUwQXdpZHRoJTNEd2lkdGglMkMlMEFvdXRwdXRfdHlwZSUzRCUyMmxhdGVudCUyMiUyQyUwQSkuaW1hZ2Vz",highlighted:`pipeline = FluxPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| text_encoder=<span class="hljs-literal">None</span>, | |
| text_encoder_2=<span class="hljs-literal">None</span>, | |
| tokenizer=<span class="hljs-literal">None</span>, | |
| tokenizer_2=<span class="hljs-literal">None</span>, | |
| vae=<span class="hljs-literal">None</span>, | |
| transformer=transformer, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"Running denoising."</span>) | |
| height, width = <span class="hljs-number">768</span>, <span class="hljs-number">1360</span> | |
| latents = pipeline( | |
| prompt_embeds=prompt_embeds, | |
| pooled_prompt_embeds=pooled_prompt_embeds, | |
| num_inference_steps=<span class="hljs-number">50</span>, | |
| guidance_scale=<span class="hljs-number">3.5</span>, | |
| height=height, | |
| width=width, | |
| output_type=<span class="hljs-string">"latent"</span>, | |
| ).images`,wrap:!1}}),ae=new j({props:{code:"ZGVsJTIwcGlwZWxpbmUudHJhbnNmb3JtZXIlMEFkZWwlMjBwaXBlbGluZSUwQSUwQWZsdXNoKCk=",highlighted:`<span class="hljs-keyword">del</span> pipeline.transformer | |
| <span class="hljs-keyword">del</span> pipeline | |
| flush()`,wrap:!1}}),pe=new j({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9lbmNvZGVyS0wlMEFmcm9tJTIwZGlmZnVzZXJzLmltYWdlX3Byb2Nlc3NvciUyMGltcG9ydCUyMFZhZUltYWdlUHJvY2Vzc29yJTBBaW1wb3J0JTIwdG9yY2glMjAlMEElMEF2YWUlMjAlM0QlMjBBdXRvZW5jb2RlcktMLmZyb21fcHJldHJhaW5lZChja3B0X2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIydmFlJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNikudG8oJTIyY3VkYSUyMiklMEF2YWVfc2NhbGVfZmFjdG9yJTIwJTNEJTIwMiUyMCoqJTIwKGxlbih2YWUuY29uZmlnLmJsb2NrX291dF9jaGFubmVscykpJTBBaW1hZ2VfcHJvY2Vzc29yJTIwJTNEJTIwVmFlSW1hZ2VQcm9jZXNzb3IodmFlX3NjYWxlX2ZhY3RvciUzRHZhZV9zY2FsZV9mYWN0b3IpJTBBJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMHByaW50KCUyMiVFOCVCRiU5MCVFOCVBMSU4QyVFOCVBNyVBMyVFNyVBMCU4MSVFNCVCOCVBRCVFMyU4MCU4MiUyMiklMEElMjAlMjAlMjAlMjBsYXRlbnRzJTIwJTNEJTIwRmx1eFBpcGVsaW5lLl91bnBhY2tfbGF0ZW50cyhsYXRlbnRzJTJDJTIwaGVpZ2h0JTJDJTIwd2lkdGglMkMlMjB2YWVfc2NhbGVfZmFjdG9yKSUwQSUyMCUyMCUyMCUyMGxhdGVudHMlMjAlM0QlMjAobGF0ZW50cyUyMCUyRiUyMHZhZS5jb25maWcuc2NhbGluZ19mYWN0b3IpJTIwJTJCJTIwdmFlLmNvbmZpZy5zaGlmdF9mYWN0b3IlMEElMEElMjAlMjAlMjAlMjBpbWFnZSUyMCUzRCUyMHZhZS5kZWNvZGUobGF0ZW50cyUyQyUyMHJldHVybl9kaWN0JTNERmFsc2UpJTVCMCU1RCUwQSUyMCUyMCUyMCUyMGltYWdlJTIwJTNEJTIwaW1hZ2VfcHJvY2Vzc29yLnBvc3Rwcm9jZXNzKGltYWdlJTJDJTIwb3V0cHV0X3R5cGUlM0QlMjJwaWwlMjIpJTBBJTIwJTIwJTIwJTIwaW1hZ2UlNUIwJTVELnNhdmUoJTIyc3BsaXRfdHJhbnNmb3JtZXIucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKL | |
| <span class="hljs-keyword">from</span> diffusers.image_processor <span class="hljs-keyword">import</span> VaeImageProcessor | |
| <span class="hljs-keyword">import</span> torch | |
| vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder=<span class="hljs-string">"vae"</span>, torch_dtype=torch.bfloat16).to(<span class="hljs-string">"cuda"</span>) | |
| vae_scale_factor = <span class="hljs-number">2</span> ** (<span class="hljs-built_in">len</span>(vae.config.block_out_channels)) | |
| image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor) | |
| <span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">"运行解码中。"</span>) | |
| latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor) | |
| latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor | |
| image = vae.decode(latents, return_dict=<span class="hljs-literal">False</span>)[<span class="hljs-number">0</span>] | |
| image = image_processor.postprocess(image, output_type=<span class="hljs-string">"pil"</span>) | |
| image[<span class="hljs-number">0</span>].save(<span class="hljs-string">"split_transformer.png"</span>)`,wrap:!1}}),ce=new El({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/training/distributed_inference.md"}}),{c(){p=c("meta"),u=a(),i=c("p"),J=a(),M(_.$$.fragment),oe=a(),$=c("p"),$.innerHTML=tl,Me=a(),G=c("p"),G.textContent=sl,me=a(),M(I.$$.fragment),de=a(),C=c("p"),C.innerHTML=al,fe=a(),B=c("p"),B.innerHTML=nl,ye=a(),g=c("p"),g.innerHTML=pl,we=a(),M(k.$$.fragment),ue=a(),v=c("p"),v.innerHTML=il,Je=a(),M(W.$$.fragment),je=a(),M(T.$$.fragment),he=a(),M(E.$$.fragment),Te=a(),V=c("p"),V.innerHTML=cl,be=a(),R=c("p"),R.innerHTML=rl,Ue=a(),M(N.$$.fragment),Ze=a(),H=c("p"),H.innerHTML=ol,_e=a(),X=c("p"),X.innerHTML=Ml,$e=a(),M(A.$$.fragment),Ge=a(),F=c("p"),F.innerHTML=ml,Ie=a(),M(x.$$.fragment),Ce=a(),z=c("p"),z.innerHTML=dl,Be=a(),M(Q.$$.fragment),ge=a(),M(b.$$.fragment),ke=a(),M(Y.$$.fragment),ve=a(),S=c("p"),S.innerHTML=fl,We=a(),P=c("p"),P.textContent=yl,Ee=a(),L=c("p"),L.innerHTML=wl,Ve=a(),M(U.$$.fragment),Re=a(),M(q.$$.fragment),Ne=a(),D=c("p"),D.textContent=ul,He=a(),M(K.$$.fragment),Xe=a(),O=c("p"),O.innerHTML=Jl,Ae=a(),M(ee.$$.fragment),Fe=a(),M(Z.$$.fragment),xe=a(),le=c("p"),le.innerHTML=jl,ze=a(),M(te.$$.fragment),Qe=a(),se=c("p"),se.textContent=hl,Ye=a(),M(ae.$$.fragment),Se=a(),ne=c("p"),ne.textContent=Tl,Pe=a(),M(pe.$$.fragment),Le=a(),ie=c("p"),ie.textContent=bl,qe=a(),M(ce.$$.fragment),De=a(),re=c("p"),this.h()},l(e){const l=vl("svelte-u9bgzb",document.head);p=r(l,"META",{name:!0,content:!0}),l.forEach(t),u=n(e),i=r(e,"P",{}),Gl(i).forEach(t),J=n(e),m(_.$$.fragment,e),oe=n(e),$=r(e,"P",{"data-svelte-h":!0}),o($)!=="svelte-164t5yx"&&($.innerHTML=tl),Me=n(e),G=r(e,"P",{"data-svelte-h":!0}),o(G)!=="svelte-qajcxl"&&(G.textContent=sl),me=n(e),m(I.$$.fragment,e),de=n(e),C=r(e,"P",{"data-svelte-h":!0}),o(C)!=="svelte-10vyc10"&&(C.innerHTML=al),fe=n(e),B=r(e,"P",{"data-svelte-h":!0}),o(B)!=="svelte-k495sb"&&(B.innerHTML=nl),ye=n(e),g=r(e,"P",{"data-svelte-h":!0}),o(g)!=="svelte-1224dwq"&&(g.innerHTML=pl),we=n(e),m(k.$$.fragment,e),ue=n(e),v=r(e,"P",{"data-svelte-h":!0}),o(v)!=="svelte-1eq5z11"&&(v.innerHTML=il),Je=n(e),m(W.$$.fragment,e),je=n(e),m(T.$$.fragment,e),he=n(e),m(E.$$.fragment,e),Te=n(e),V=r(e,"P",{"data-svelte-h":!0}),o(V)!=="svelte-7gn49b"&&(V.innerHTML=cl),be=n(e),R=r(e,"P",{"data-svelte-h":!0}),o(R)!=="svelte-1vhpcpe"&&(R.innerHTML=rl),Ue=n(e),m(N.$$.fragment,e),Ze=n(e),H=r(e,"P",{"data-svelte-h":!0}),o(H)!=="svelte-109hrfe"&&(H.innerHTML=ol),_e=n(e),X=r(e,"P",{"data-svelte-h":!0}),o(X)!=="svelte-1qcsr3i"&&(X.innerHTML=Ml),$e=n(e),m(A.$$.fragment,e),Ge=n(e),F=r(e,"P",{"data-svelte-h":!0}),o(F)!=="svelte-1yf0k86"&&(F.innerHTML=ml),Ie=n(e),m(x.$$.fragment,e),Ce=n(e),z=r(e,"P",{"data-svelte-h":!0}),o(z)!=="svelte-1ebzc0p"&&(z.innerHTML=dl),Be=n(e),m(Q.$$.fragment,e),ge=n(e),m(b.$$.fragment,e),ke=n(e),m(Y.$$.fragment,e),ve=n(e),S=r(e,"P",{"data-svelte-h":!0}),o(S)!=="svelte-yu6o1n"&&(S.innerHTML=fl),We=n(e),P=r(e,"P",{"data-svelte-h":!0}),o(P)!=="svelte-1wtcutz"&&(P.textContent=yl),Ee=n(e),L=r(e,"P",{"data-svelte-h":!0}),o(L)!=="svelte-vjsvbo"&&(L.innerHTML=wl),Ve=n(e),m(U.$$.fragment,e),Re=n(e),m(q.$$.fragment,e),Ne=n(e),D=r(e,"P",{"data-svelte-h":!0}),o(D)!=="svelte-101ikwk"&&(D.textContent=ul),He=n(e),m(K.$$.fragment,e),Xe=n(e),O=r(e,"P",{"data-svelte-h":!0}),o(O)!=="svelte-i97tc3"&&(O.innerHTML=Jl),Ae=n(e),m(ee.$$.fragment,e),Fe=n(e),m(Z.$$.fragment,e),xe=n(e),le=r(e,"P",{"data-svelte-h":!0}),o(le)!=="svelte-1lgsyuc"&&(le.innerHTML=jl),ze=n(e),m(te.$$.fragment,e),Qe=n(e),se=r(e,"P",{"data-svelte-h":!0}),o(se)!=="svelte-x0nor"&&(se.textContent=hl),Ye=n(e),m(ae.$$.fragment,e),Se=n(e),ne=r(e,"P",{"data-svelte-h":!0}),o(ne)!=="svelte-1v7ewyt"&&(ne.textContent=Tl),Pe=n(e),m(pe.$$.fragment,e),Le=n(e),ie=r(e,"P",{"data-svelte-h":!0}),o(ie)!=="svelte-19rooo6"&&(ie.textContent=bl),qe=n(e),m(ce.$$.fragment,e),De=n(e),re=r(e,"P",{}),Gl(re).forEach(t),this.h()},h(){Il(p,"name","hf:doc:metadata"),Il(p,"content",Al)},m(e,l){Wl(document.head,p),s(e,u,l),s(e,i,l),s(e,J,l),d(_,e,l),s(e,oe,l),s(e,$,l),s(e,Me,l),s(e,G,l),s(e,me,l),d(I,e,l),s(e,de,l),s(e,C,l),s(e,fe,l),s(e,B,l),s(e,ye,l),s(e,g,l),s(e,we,l),d(k,e,l),s(e,ue,l),s(e,v,l),s(e,Je,l),d(W,e,l),s(e,je,l),d(T,e,l),s(e,he,l),d(E,e,l),s(e,Te,l),s(e,V,l),s(e,be,l),s(e,R,l),s(e,Ue,l),d(N,e,l),s(e,Ze,l),s(e,H,l),s(e,_e,l),s(e,X,l),s(e,$e,l),d(A,e,l),s(e,Ge,l),s(e,F,l),s(e,Ie,l),d(x,e,l),s(e,Ce,l),s(e,z,l),s(e,Be,l),d(Q,e,l),s(e,ge,l),d(b,e,l),s(e,ke,l),d(Y,e,l),s(e,ve,l),s(e,S,l),s(e,We,l),s(e,P,l),s(e,Ee,l),s(e,L,l),s(e,Ve,l),d(U,e,l),s(e,Re,l),d(q,e,l),s(e,Ne,l),s(e,D,l),s(e,He,l),d(K,e,l),s(e,Xe,l),s(e,O,l),s(e,Ae,l),d(ee,e,l),s(e,Fe,l),d(Z,e,l),s(e,xe,l),s(e,le,l),s(e,ze,l),d(te,e,l),s(e,Qe,l),s(e,se,l),s(e,Ye,l),d(ae,e,l),s(e,Se,l),s(e,ne,l),s(e,Pe,l),d(pe,e,l),s(e,Le,l),s(e,ie,l),s(e,qe,l),d(ce,e,l),s(e,De,l),s(e,re,l),Ke=!0},p(e,[l]){const Ul={};l&2&&(Ul.$$scope={dirty:l,ctx:e}),T.$set(Ul);const Zl={};l&2&&(Zl.$$scope={dirty:l,ctx:e}),b.$set(Zl);const _l={};l&2&&(_l.$$scope={dirty:l,ctx:e}),U.$set(_l);const $l={};l&2&&($l.$$scope={dirty:l,ctx:e}),Z.$set($l)},i(e){Ke||(f(_.$$.fragment,e),f(I.$$.fragment,e),f(k.$$.fragment,e),f(W.$$.fragment,e),f(T.$$.fragment,e),f(E.$$.fragment,e),f(N.$$.fragment,e),f(A.$$.fragment,e),f(x.$$.fragment,e),f(Q.$$.fragment,e),f(b.$$.fragment,e),f(Y.$$.fragment,e),f(U.$$.fragment,e),f(q.$$.fragment,e),f(K.$$.fragment,e),f(ee.$$.fragment,e),f(Z.$$.fragment,e),f(te.$$.fragment,e),f(ae.$$.fragment,e),f(pe.$$.fragment,e),f(ce.$$.fragment,e),Ke=!0)},o(e){y(_.$$.fragment,e),y(I.$$.fragment,e),y(k.$$.fragment,e),y(W.$$.fragment,e),y(T.$$.fragment,e),y(E.$$.fragment,e),y(N.$$.fragment,e),y(A.$$.fragment,e),y(x.$$.fragment,e),y(Q.$$.fragment,e),y(b.$$.fragment,e),y(Y.$$.fragment,e),y(U.$$.fragment,e),y(q.$$.fragment,e),y(K.$$.fragment,e),y(ee.$$.fragment,e),y(Z.$$.fragment,e),y(te.$$.fragment,e),y(ae.$$.fragment,e),y(pe.$$.fragment,e),y(ce.$$.fragment,e),Ke=!1},d(e){e&&(t(u),t(i),t(J),t(oe),t($),t(Me),t(G),t(me),t(de),t(C),t(fe),t(B),t(ye),t(g),t(we),t(ue),t(v),t(Je),t(je),t(he),t(Te),t(V),t(be),t(R),t(Ue),t(Ze),t(H),t(_e),t(X),t($e),t(Ge),t(F),t(Ie),t(Ce),t(z),t(Be),t(ge),t(ke),t(ve),t(S),t(We),t(P),t(Ee),t(L),t(Ve),t(Re),t(Ne),t(D),t(He),t(Xe),t(O),t(Ae),t(Fe),t(xe),t(le),t(ze),t(Qe),t(se),t(Ye),t(Se),t(ne),t(Pe),t(Le),t(ie),t(qe),t(De),t(re)),t(p),w(_,e),w(I,e),w(k,e),w(W,e),w(T,e),w(E,e),w(N,e),w(A,e),w(x,e),w(Q,e),w(b,e),w(Y,e),w(U,e),w(q,e),w(K,e),w(ee,e),w(Z,e),w(te,e),w(ae,e),w(pe,e),w(ce,e)}}}const Al='{"title":"分布式推理","local":"分布式推理","sections":[{"title":"🤗 Accelerate","local":"-accelerate","sections":[],"depth":2},{"title":"PyTorch Distributed","local":"pytorch-distributed","sections":[],"depth":2},{"title":"模型分片","local":"模型分片","sections":[],"depth":2}],"depth":1}';function Fl(h){return Bl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Pl extends gl{constructor(p){super(),kl(this,p,Fl,Xl,Cl,{})}}export{Pl as component}; | |
Xet Storage Details
- Size:
- 29.9 kB
- Xet hash:
- 2b4546d40b66a901d4b041946e912ce2ab43418e8b0a72ab758c7e5239ae36da
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.