Buckets:

rtrm's picture
download
raw
29.3 kB
import{s as Be,n as Ce,o as ve}from"../chunks/scheduler.e4ff9b64.js";import{S as ge,i as We,e as p,s as a,c as o,h as Ee,a as i,d as t,b as n,f as Ie,g as r,j as c,k as pl,l as $e,m as s,n as M,t as d,o as m,p as J}from"../chunks/index.09f1bca0.js";import{C as Re,H as ee,E as Xe}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.300ddef9.js";import{C as w}from"../chunks/CodeBlock.f4955779.js";function Ne(te){let y,ol,il,rl,b,Ml,h,dl,U,se='在分布式设置中,您可以使用 🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> 或 <a href="https://pytorch.org/tutorials/beginner/dist_overview.html" rel="nofollow">PyTorch Distributed</a> 在多个 GPU 上运行推理,这对于并行生成多个提示非常有用。',ml,Z,ae="本指南将向您展示如何使用 🤗 Accelerate 和 PyTorch Distributed 进行分布式推理。",Jl,_,wl,k,ne='🤗 <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> 是一个旨在简化在分布式设置中训练或运行推理的库。它简化了设置分布式环境的过程,让您可以专注于您的 PyTorch 代码。',yl,G,pe="首先,创建一个 Python 文件并初始化一个 <code>accelerate.PartialState</code> 来创建分布式环境;您的设置会自动检测,因此您无需明确定义 <code>rank</code> 或 <code>world_size</code>。将 <code>DiffusionPipeline</code> 移动到 <code>distributed_state.device</code> 以为每个进程分配一个 GPU。",ul,I,ie="现在使用 <code>split_between_processes</code> 实用程序作为上下文管理器,自动在进程数之间分发提示。",fl,B,jl,C,ce="使用 <code>--num_processes</code> 参数指定要使用的 GPU 数量,并调用 <code>accelerate launch</code> 来运行脚本:",Tl,v,bl,u,oe='<p>参考这个最小示例 <a href="https://gist.github.com/sayakpaul/cfaebd221820d7b43fae638b4dfa01ba" rel="nofollow">脚本</a> 以在多个 GPU 上运行推理。要了解更多信息,请查看 <a href="https://huggingface.co/docs/accelerate/en/usage_guides/distributed_inference#distributed-inference-with-accelerate" rel="nofollow">使用 🤗 Accelerate 进行分布式推理</a> 指南。</p>',hl,g,Ul,W,re=`PyTorch 支持 <a href="https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html" rel="nofollow"><code>DistributedDataParallel</code></a>,它启用了数据
并行性。`,Zl,E,Me="首先,创建一个 Python 文件并导入 <code>torch.distributed</code> 和 <code>torch.multiprocessing</code> 来设置分布式进程组,并为每个 GPU 上的推理生成进程。您还应该初始化一个 <code>DiffusionPipeline</code>:",_l,$,kl,R,de='您需要创建一个函数来运行推理;<a href="https://pytorch.org/docs/stable/distributed.html?highlight=init_process_group#torch.distributed.init_process_group" rel="nofollow"><code>init_process_group</code></a> 处理创建一个分布式环境,指定要使用的后端类型、当前进程的 <code>rank</code> 以及参与进程的数量 <code>world_size</code>。如果您在 2 个 GPU 上并行运行推理,那么 <code>world_size</code> 就是 2。',Gl,X,me="将 <code>DiffusionPipeline</code> 移动到 <code>rank</code>,并使用 <code>get_rank</code> 为每个进程分配一个 GPU,其中每个进程处理不同的提示:",Il,N,Bl,V,Je='要运行分布式推理,调用 <a href="https://pytorch.org/docs/stable/multiprocessing.html#torch.multiprocessing.spawn" rel="nofollow"><code>mp.spawn</code></a> 在 <code>world_size</code> 定义的 GPU 数量上运行 <code>run_inference</code> 函数:',Cl,H,vl,A,we="完成推理脚本后,使用 <code>--nproc_per_node</code> 参数指定要使用的 GPU 数量,并调用 <code>torchrun</code> 来运行脚本:",gl,x,Wl,f,ye='<p>您可以在 <code>DiffusionPipeline</code> 中使用 <code>device_map</code> 将其模型级组件分布在多个设备上。请参考 <a href="../tutorials/inference_with_big_models#device-placement">设备放置</a> 指南了解更多信息。</p>',El,z,$l,Q,ue='现代扩散系统,如 <a href="../api/pipelines/flux">Flux</a>,非常大且包含多个模型。例如,<a href="https://hf.co/black-forest-labs/FLUX.1-dev" rel="nofollow">Flux.1-Dev</a> 由两个文本编码器 - <a href="https://hf.co/google/t5-v1_1-xxl" rel="nofollow">T5-XXL</a> 和 <a href="https://hf.co/openai/clip-vit-large-patch14" rel="nofollow">CLIP-L</a> - 一个 <a href="../api/models/flux_transformer">扩散变换器</a>,以及一个 <a href="../api/models/autoencoderkl">VAE</a> 组成。对于如此大的模型,在消费级 GPU 上运行推理可能具有挑战性。',Rl,F,fe="模型分片是一种技术,当模型无法容纳在单个 GPU 上时,将模型分布在多个 GPU 上。下面的示例假设有两个 16GB GPU 可用于推理。",Xl,Y,je="开始使用文本编码器计算文本嵌入。通过设置 <code>device_map=&quot;balanced&quot;</code> 将文本编码器保持在两个GPU上。<code>balanced</code> 策略将模型均匀分布在所有可用GPU上。使用 <code>max_memory</code> 参数为每个GPU上的每个文本编码器分配最大内存量。",Nl,j,Te="<p><strong>仅</strong> 在此步骤加载文本编码器!扩散变换器和VAE在后续步骤中加载以节省内存。</p>",Vl,S,Hl,L,be="一旦文本嵌入计算完成,从GPU中移除它们以为扩散变换器腾出空间。",Al,P,xl,q,he='接下来加载扩散变换器,它有125亿参数。这次,设置 <code>device_map=&quot;auto&quot;</code> 以自动将模型分布在两个16GB GPU上。<code>auto</code> 策略由 <a href="https://hf.co/docs/accelerate/index" rel="nofollow">Accelerate</a> 支持,并作为 <a href="https://hf.co/docs/accelerate/concept_guides/big_model_inference" rel="nofollow">大模型推理</a> 功能的一部分可用。它首先将模型分布在最快的设备(GPU)上,然后在需要时移动到较慢的设备如CPU和硬盘。将模型参数存储在较慢设备上的权衡是推理延迟较慢。',zl,D,Ql,T,Ue="<p>在任何时候,您可以尝试 <code>print(pipeline.hf_device_map)</code> 来查看各种模型如何在设备上分布。这对于跟踪模型的设备放置很有用。您也可以尝试 <code>print(transformer.hf_device_map)</code> 来查看变换器模型如何在设备上分片。</p>",Fl,K,Ze="将变换器模型添加到管道中以进行去噪,但将其他模型级组件如文本编码器和VAE设置为 <code>None</code>,因为您还不需要它们。",Yl,O,Sl,ll,_e="从内存中移除管道和变换器,因为它们不再需要。",Ll,el,Pl,tl,ke="最后,使用变分自编码器(VAE)将潜在表示解码为图像。VAE通常足够小,可以在单个GPU上加载。",ql,sl,Dl,al,Ge="通过选择性加载和卸载在特定阶段所需的模型,并将最大模型分片到多个GPU上,可以在消费级GPU上运行大型模型的推理。",Kl,nl,Ol,cl,le;return b=new Re({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),h=new ee({props:{title:"分布式推理",local:"分布式推理",headingTag:"h1"}}),_=new ee({props:{title:"🤗 Accelerate",local:"-accelerate",headingTag:"h2"}}),B=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMFBhcnRpYWxTdGF0ZSUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBEaWZmdXNpb25QaXBlbGluZSUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwRGlmZnVzaW9uUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUyMHVzZV9zYWZldGVuc29ycyUzRFRydWUlMEEpJTBBZGlzdHJpYnV0ZWRfc3RhdGUlMjAlM0QlMjBQYXJ0aWFsU3RhdGUoKSUwQXBpcGVsaW5lLnRvKGRpc3RyaWJ1dGVkX3N0YXRlLmRldmljZSklMEElMEF3aXRoJTIwZGlzdHJpYnV0ZWRfc3RhdGUuc3BsaXRfYmV0d2Vlbl9wcm9jZXNzZXMoJTVCJTIyYSUyMGRvZyUyMiUyQyUyMCUyMmElMjBjYXQlMjIlNUQpJTIwYXMlMjBwcm9tcHQlM0ElMEElMjAlMjAlMjAlMjByZXN1bHQlMjAlM0QlMjBwaXBlbGluZShwcm9tcHQpLmltYWdlcyU1QjAlNUQlMEElMjAlMjAlMjAlMjByZXN1bHQuc2F2ZShmJTIycmVzdWx0XyU3QmRpc3RyaWJ1dGVkX3N0YXRlLnByb2Nlc3NfaW5kZXglN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> PartialState
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
pipeline = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;stable-diffusion-v1-5/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>
)
distributed_state = PartialState()
pipeline.to(distributed_state.device)
<span class="hljs-keyword">with</span> distributed_state.split_between_processes([<span class="hljs-string">&quot;a dog&quot;</span>, <span class="hljs-string">&quot;a cat&quot;</span>]) <span class="hljs-keyword">as</span> prompt:
result = pipeline(prompt).images[<span class="hljs-number">0</span>]
result.save(<span class="hljs-string">f&quot;result_<span class="hljs-subst">{distributed_state.process_index}</span>.png&quot;</span>)`,wrap:!1}}),v=new w({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHJ1bl9kaXN0cmlidXRlZC5weSUyMC0tbnVtX3Byb2Nlc3NlcyUzRDI=",highlighted:"accelerate launch run_distributed.py --num_processes=2",wrap:!1}}),g=new ee({props:{title:"PyTorch Distributed",local:"pytorch-distributed",headingTag:"h2"}}),$=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFpbXBvcnQlMjB0b3JjaC5kaXN0cmlidXRlZCUyMGFzJTIwZGlzdCUwQWltcG9ydCUyMHRvcmNoLm11bHRpcHJvY2Vzc2luZyUyMGFzJTIwbXAlMEElMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEElMEFzZCUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJzdGFibGUtZGlmZnVzaW9uLXYxLTUlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMjB1c2Vfc2FmZXRlbnNvcnMlM0RUcnVlJTBBKQ==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> torch.distributed <span class="hljs-keyword">as</span> dist
<span class="hljs-keyword">import</span> torch.multiprocessing <span class="hljs-keyword">as</span> mp
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline
sd = DiffusionPipeline.from_pretrained(
<span class="hljs-string">&quot;stable-diffusion-v1-5/stable-diffusion-v1-5&quot;</span>, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>
)`,wrap:!1}}),N=new w({props:{code:"ZGVmJTIwcnVuX2luZmVyZW5jZShyYW5rJTJDJTIwd29ybGRfc2l6ZSklM0ElMEElMjAlMjAlMjAlMjBkaXN0LmluaXRfcHJvY2Vzc19ncm91cCglMjJuY2NsJTIyJTJDJTIwcmFuayUzRHJhbmslMkMlMjB3b3JsZF9zaXplJTNEd29ybGRfc2l6ZSklMEElMEElMjAlMjAlMjAlMjBzZC50byhyYW5rKSUwQSUwQSUyMCUyMCUyMCUyMGlmJTIwdG9yY2guZGlzdHJpYnV0ZWQuZ2V0X3JhbmsoKSUyMCUzRCUzRCUyMDAlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlMjAlM0QlMjAlMjJhJTIwZG9nJTIyJTBBJTIwJTIwJTIwJTIwZWxpZiUyMHRvcmNoLmRpc3RyaWJ1dGVkLmdldF9yYW5rKCklMjAlM0QlM0QlMjAxJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJvbXB0JTIwJTNEJTIwJTIyYSUyMGNhdCUyMiUwQSUwQSUyMCUyMCUyMCUyMGltYWdlJTIwJTNEJTIwc2QocHJvbXB0KS5pbWFnZXMlNUIwJTVEJTBBJTIwJTIwJTIwJTIwaW1hZ2Uuc2F2ZShmJTIyLiUyRiU3QidfJy5qb2luKHByb21wdCklN0QucG5nJTIyKQ==",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">run_inference</span>(<span class="hljs-params">rank, world_size</span>):
dist.init_process_group(<span class="hljs-string">&quot;nccl&quot;</span>, rank=rank, world_size=world_size)
sd.to(rank)
<span class="hljs-keyword">if</span> torch.distributed.get_rank() == <span class="hljs-number">0</span>:
prompt = <span class="hljs-string">&quot;a dog&quot;</span>
<span class="hljs-keyword">elif</span> torch.distributed.get_rank() == <span class="hljs-number">1</span>:
prompt = <span class="hljs-string">&quot;a cat&quot;</span>
image = sd(prompt).images[<span class="hljs-number">0</span>]
image.save(<span class="hljs-string">f&quot;./<span class="hljs-subst">{<span class="hljs-string">&#x27;_&#x27;</span>.join(prompt)}</span>.png&quot;</span>)`,wrap:!1}}),H=new w({props:{code:"ZGVmJTIwbWFpbigpJTNBJTBBJTIwJTIwJTIwJTIwd29ybGRfc2l6ZSUyMCUzRCUyMDIlMEElMjAlMjAlMjAlMjBtcC5zcGF3bihydW5faW5mZXJlbmNlJTJDJTIwYXJncyUzRCh3b3JsZF9zaXplJTJDKSUyQyUyMG5wcm9jcyUzRHdvcmxkX3NpemUlMkMlMjBqb2luJTNEVHJ1ZSklMEElMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBtYWluKCk=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>():
world_size = <span class="hljs-number">2</span>
mp.spawn(run_inference, args=(world_size,), nprocs=world_size, join=<span class="hljs-literal">True</span>)
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
main()`,wrap:!1}}),x=new w({props:{code:"dG9yY2hydW4lMjBydW5fZGlzdHJpYnV0ZWQucHklMjAtLW5wcm9jX3Blcl9ub2RlJTNEMg==",highlighted:"torchrun run_distributed.py --nproc_per_node=2",wrap:!1}}),z=new ee({props:{title:"模型分片",local:"模型分片",headingTag:"h2"}}),S=new w({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEZsdXhQaXBlbGluZSUwQWltcG9ydCUyMHRvcmNoJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyYSUyMHBob3RvJTIwb2YlMjBhJTIwZG9nJTIwd2l0aCUyMGNhdC1saWtlJTIwbG9vayUyMiUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwRmx1eFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjB0cmFuc2Zvcm1lciUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB2YWUlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlX21hcCUzRCUyMmJhbGFuY2VkJTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X21lbW9yeSUzRCU3QjAlM0ElMjAlMjIxNkdCJTIyJTJDJTIwMSUzQSUyMCUyMjE2R0IlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQXdpdGglMjB0b3JjaC5ub19ncmFkKCklM0ElMEElMjAlMjAlMjAlMjBwcmludCglMjJFbmNvZGluZyUyMHByb21wdHMuJTIyKSUwQSUyMCUyMCUyMCUyMHByb21wdF9lbWJlZHMlMkMlMjBwb29sZWRfcHJvbXB0X2VtYmVkcyUyQyUyMHRleHRfaWRzJTIwJTNEJTIwcGlwZWxpbmUuZW5jb2RlX3Byb21wdCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9tcHQlM0Rwcm9tcHQlMkMlMjBwcm9tcHRfMiUzRE5vbmUlMkMlMjBtYXhfc2VxdWVuY2VfbGVuZ3RoJTNENTEyJTBBJTIwJTIwJTIwJTIwKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">import</span> torch
prompt = <span class="hljs-string">&quot;a photo of a dog with cat-like look&quot;</span>
pipeline = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
transformer=<span class="hljs-literal">None</span>,
vae=<span class="hljs-literal">None</span>,
device_map=<span class="hljs-string">&quot;balanced&quot;</span>,
max_memory={<span class="hljs-number">0</span>: <span class="hljs-string">&quot;16GB&quot;</span>, <span class="hljs-number">1</span>: <span class="hljs-string">&quot;16GB&quot;</span>},
torch_dtype=torch.bfloat16
)
<span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Encoding prompts.&quot;</span>)
prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
prompt=prompt, prompt_2=<span class="hljs-literal">None</span>, max_sequence_length=<span class="hljs-number">512</span>
)`,wrap:!1}}),P=new w({props:{code:"aW1wb3J0JTIwZ2MlMjAlMEElMEFkZWYlMjBmbHVzaCgpJTNBJTBBJTIwJTIwJTIwJTIwZ2MuY29sbGVjdCgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5lbXB0eV9jYWNoZSgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5yZXNldF9tYXhfbWVtb3J5X2FsbG9jYXRlZCgpJTBBJTIwJTIwJTIwJTIwdG9yY2guY3VkYS5yZXNldF9wZWFrX21lbW9yeV9zdGF0cygpJTBBJTBBZGVsJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyJTBBZGVsJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyXzIlMEFkZWwlMjBwaXBlbGluZS50b2tlbml6ZXIlMEFkZWwlMjBwaXBlbGluZS50b2tlbml6ZXJfMiUwQWRlbCUyMHBpcGVsaW5lJTBBJTBBZmx1c2goKQ==",highlighted:`<span class="hljs-keyword">import</span> gc
<span class="hljs-keyword">def</span> <span class="hljs-title function_">flush</span>():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
<span class="hljs-keyword">del</span> pipeline.text_encoder
<span class="hljs-keyword">del</span> pipeline.text_encoder_2
<span class="hljs-keyword">del</span> pipeline.tokenizer
<span class="hljs-keyword">del</span> pipeline.tokenizer_2
<span class="hljs-keyword">del</span> pipeline
flush()`,wrap:!1}}),D=new w({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9Nb2RlbCUwQWltcG9ydCUyMHRvcmNoJTIwJTBBJTBBdHJhbnNmb3JtZXIlMjAlM0QlMjBBdXRvTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmJsYWNrLWZvcmVzdC1sYWJzJTJGRkxVWC4xLWRldiUyMiUyQyUyMCUwQSUyMCUyMCUyMCUyMHN1YmZvbGRlciUzRCUyMnRyYW5zZm9ybWVyJTIyJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlX21hcCUzRCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel
<span class="hljs-keyword">import</span> torch
transformer = AutoModel.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
subfolder=<span class="hljs-string">&quot;transformer&quot;</span>,
device_map=<span class="hljs-string">&quot;auto&quot;</span>,
torch_dtype=torch.bfloat16
)`,wrap:!1}}),O=new w({props:{code:"cGlwZWxpbmUlMjAlM0QlMjBGbHV4UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmJsYWNrLWZvcmVzdC1sYWJzJTJGRkxVWC4xLWRldiUyMiUyQyUwQSUyMCUyMCUyMCUyMHRleHRfZW5jb2RlciUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB0ZXh0X2VuY29kZXJfMiUzRE5vbmUlMkMlMEElMjAlMjAlMjAlMjB0b2tlbml6ZXIlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVyXzIlM0ROb25lJTJDJTBBJTIwJTIwJTIwJTIwdmFlJTNETm9uZSUyQyUwQSUyMCUyMCUyMCUyMHRyYW5zZm9ybWVyJTNEdHJhbnNmb3JtZXIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKSUwQSUwQXByaW50KCUyMlJ1bm5pbmclMjBkZW5vaXNpbmcuJTIyKSUwQWhlaWdodCUyQyUyMHdpZHRoJTIwJTNEJTIwNzY4JTJDJTIwMTM2MCUwQWxhdGVudHMlMjAlM0QlMjBwaXBlbGluZSglMEElMjAlMjAlMjAlMEElMjAlMjAlMjAlMjAlMjAlMEFwcm9tcHRfZW1iZWRzJTNEcHJvbXB0X2VtYmVkcyUyQyUwQXBvb2xlZF9wcm9tcHRfZW1iZWRzJTNEcG9vbGVkX3Byb21wdF9lbWJlZHMlMkMlMEFudW1faW5mZXJlbmNlX3N0ZXBzJTNENTAlMkMlMEFndWlkYW5jZV9zY2FsZSUzRDMuNSUyQyUwQWhlaWdodCUzRGhlaWdodCUyQyUwQXdpZHRoJTNEd2lkdGglMkMlMEFvdXRwdXRfdHlwZSUzRCUyMmxhdGVudCUyMiUyQyUwQSkuaW1hZ2Vz",highlighted:`pipeline = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
text_encoder=<span class="hljs-literal">None</span>,
text_encoder_2=<span class="hljs-literal">None</span>,
tokenizer=<span class="hljs-literal">None</span>,
tokenizer_2=<span class="hljs-literal">None</span>,
vae=<span class="hljs-literal">None</span>,
transformer=transformer,
torch_dtype=torch.bfloat16
)
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Running denoising.&quot;</span>)
height, width = <span class="hljs-number">768</span>, <span class="hljs-number">1360</span>
latents = pipeline(
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
num_inference_steps=<span class="hljs-number">50</span>,
guidance_scale=<span class="hljs-number">3.5</span>,
height=height,
width=width,
output_type=<span class="hljs-string">&quot;latent&quot;</span>,
).images`,wrap:!1}}),el=new w({props:{code:"ZGVsJTIwcGlwZWxpbmUudHJhbnNmb3JtZXIlMEFkZWwlMjBwaXBlbGluZSUwQSUwQWZsdXNoKCk=",highlighted:`<span class="hljs-keyword">del</span> pipeline.transformer
<span class="hljs-keyword">del</span> pipeline
flush()`,wrap:!1}}),sl=new w({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9lbmNvZGVyS0wlMEFmcm9tJTIwZGlmZnVzZXJzLmltYWdlX3Byb2Nlc3NvciUyMGltcG9ydCUyMFZhZUltYWdlUHJvY2Vzc29yJTBBaW1wb3J0JTIwdG9yY2glMjAlMEElMEF2YWUlMjAlM0QlMjBBdXRvZW5jb2RlcktMLmZyb21fcHJldHJhaW5lZChja3B0X2lkJTJDJTIwc3ViZm9sZGVyJTNEJTIydmFlJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNikudG8oJTIyY3VkYSUyMiklMEF2YWVfc2NhbGVfZmFjdG9yJTIwJTNEJTIwMiUyMCoqJTIwKGxlbih2YWUuY29uZmlnLmJsb2NrX291dF9jaGFubmVscyklMjAtJTIwMSklMEFpbWFnZV9wcm9jZXNzb3IlMjAlM0QlMjBWYWVJbWFnZVByb2Nlc3Nvcih2YWVfc2NhbGVfZmFjdG9yJTNEdmFlX3NjYWxlX2ZhY3RvciklMEElMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoJTIyJUU4JUJGJTkwJUU4JUExJThDJUU4JUE3JUEzJUU3JUEwJTgxJUU0JUI4JUFEJUUzJTgwJTgyJTIyKSUwQSUyMCUyMCUyMCUyMGxhdGVudHMlMjAlM0QlMjBGbHV4UGlwZWxpbmUuX3VucGFja19sYXRlbnRzKGxhdGVudHMlMkMlMjBoZWlnaHQlMkMlMjB3aWR0aCUyQyUyMHZhZV9zY2FsZV9mYWN0b3IpJTBBJTIwJTIwJTIwJTIwbGF0ZW50cyUyMCUzRCUyMChsYXRlbnRzJTIwJTJGJTIwdmFlLmNvbmZpZy5zY2FsaW5nX2ZhY3RvciklMjAlMkIlMjB2YWUuY29uZmlnLnNoaWZ0X2ZhY3RvciUwQSUwQSUyMCUyMCUyMCUyMGltYWdlJTIwJTNEJTIwdmFlLmRlY29kZShsYXRlbnRzJTJDJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSklNUIwJTVEJTBBJTIwJTIwJTIwJTIwaW1hZ2UlMjAlM0QlMjBpbWFnZV9wcm9jZXNzb3IucG9zdHByb2Nlc3MoaW1hZ2UlMkMlMjBvdXRwdXRfdHlwZSUzRCUyMnBpbCUyMiklMEElMjAlMjAlMjAlMjBpbWFnZSU1QjAlNUQuc2F2ZSglMjJzcGxpdF90cmFuc2Zvcm1lci5wbmclMjIp",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoencoderKL
<span class="hljs-keyword">from</span> diffusers.image_processor <span class="hljs-keyword">import</span> VaeImageProcessor
<span class="hljs-keyword">import</span> torch
vae = AutoencoderKL.from_pretrained(ckpt_id, subfolder=<span class="hljs-string">&quot;vae&quot;</span>, torch_dtype=torch.bfloat16).to(<span class="hljs-string">&quot;cuda&quot;</span>)
vae_scale_factor = <span class="hljs-number">2</span> ** (<span class="hljs-built_in">len</span>(vae.config.block_out_channels) - <span class="hljs-number">1</span>)
image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
<span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;运行解码中。&quot;</span>)
latents = FluxPipeline._unpack_latents(latents, height, width, vae_scale_factor)
latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
image = vae.decode(latents, return_dict=<span class="hljs-literal">False</span>)[<span class="hljs-number">0</span>]
image = image_processor.postprocess(image, output_type=<span class="hljs-string">&quot;pil&quot;</span>)
image[<span class="hljs-number">0</span>].save(<span class="hljs-string">&quot;split_transformer.png&quot;</span>)`,wrap:!1}}),nl=new Xe({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/training/distributed_inference.md"}}),{c(){y=p("meta"),ol=a(),il=p("p"),rl=a(),o(b.$$.fragment),Ml=a(),o(h.$$.fragment),dl=a(),U=p("p"),U.innerHTML=se,ml=a(),Z=p("p"),Z.textContent=ae,Jl=a(),o(_.$$.fragment),wl=a(),k=p("p"),k.innerHTML=ne,yl=a(),G=p("p"),G.innerHTML=pe,ul=a(),I=p("p"),I.innerHTML=ie,fl=a(),o(B.$$.fragment),jl=a(),C=p("p"),C.innerHTML=ce,Tl=a(),o(v.$$.fragment),bl=a(),u=p("blockquote"),u.innerHTML=oe,hl=a(),o(g.$$.fragment),Ul=a(),W=p("p"),W.innerHTML=re,Zl=a(),E=p("p"),E.innerHTML=Me,_l=a(),o($.$$.fragment),kl=a(),R=p("p"),R.innerHTML=de,Gl=a(),X=p("p"),X.innerHTML=me,Il=a(),o(N.$$.fragment),Bl=a(),V=p("p"),V.innerHTML=Je,Cl=a(),o(H.$$.fragment),vl=a(),A=p("p"),A.innerHTML=we,gl=a(),o(x.$$.fragment),Wl=a(),f=p("blockquote"),f.innerHTML=ye,El=a(),o(z.$$.fragment),$l=a(),Q=p("p"),Q.innerHTML=ue,Rl=a(),F=p("p"),F.textContent=fe,Xl=a(),Y=p("p"),Y.innerHTML=je,Nl=a(),j=p("blockquote"),j.innerHTML=Te,Vl=a(),o(S.$$.fragment),Hl=a(),L=p("p"),L.textContent=be,Al=a(),o(P.$$.fragment),xl=a(),q=p("p"),q.innerHTML=he,zl=a(),o(D.$$.fragment),Ql=a(),T=p("blockquote"),T.innerHTML=Ue,Fl=a(),K=p("p"),K.innerHTML=Ze,Yl=a(),o(O.$$.fragment),Sl=a(),ll=p("p"),ll.textContent=_e,Ll=a(),o(el.$$.fragment),Pl=a(),tl=p("p"),tl.textContent=ke,ql=a(),o(sl.$$.fragment),Dl=a(),al=p("p"),al.textContent=Ge,Kl=a(),o(nl.$$.fragment),Ol=a(),cl=p("p"),this.h()},l(l){const e=Ee("svelte-u9bgzb",document.head);y=i(e,"META",{name:!0,content:!0}),e.forEach(t),ol=n(l),il=i(l,"P",{}),Ie(il).forEach(t),rl=n(l),r(b.$$.fragment,l),Ml=n(l),r(h.$$.fragment,l),dl=n(l),U=i(l,"P",{"data-svelte-h":!0}),c(U)!=="svelte-164t5yx"&&(U.innerHTML=se),ml=n(l),Z=i(l,"P",{"data-svelte-h":!0}),c(Z)!=="svelte-qajcxl"&&(Z.textContent=ae),Jl=n(l),r(_.$$.fragment,l),wl=n(l),k=i(l,"P",{"data-svelte-h":!0}),c(k)!=="svelte-10vyc10"&&(k.innerHTML=ne),yl=n(l),G=i(l,"P",{"data-svelte-h":!0}),c(G)!=="svelte-k495sb"&&(G.innerHTML=pe),ul=n(l),I=i(l,"P",{"data-svelte-h":!0}),c(I)!=="svelte-1224dwq"&&(I.innerHTML=ie),fl=n(l),r(B.$$.fragment,l),jl=n(l),C=i(l,"P",{"data-svelte-h":!0}),c(C)!=="svelte-1eq5z11"&&(C.innerHTML=ce),Tl=n(l),r(v.$$.fragment,l),bl=n(l),u=i(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),c(u)!=="svelte-87d30k"&&(u.innerHTML=oe),hl=n(l),r(g.$$.fragment,l),Ul=n(l),W=i(l,"P",{"data-svelte-h":!0}),c(W)!=="svelte-7gn49b"&&(W.innerHTML=re),Zl=n(l),E=i(l,"P",{"data-svelte-h":!0}),c(E)!=="svelte-1vhpcpe"&&(E.innerHTML=Me),_l=n(l),r($.$$.fragment,l),kl=n(l),R=i(l,"P",{"data-svelte-h":!0}),c(R)!=="svelte-109hrfe"&&(R.innerHTML=de),Gl=n(l),X=i(l,"P",{"data-svelte-h":!0}),c(X)!=="svelte-1qcsr3i"&&(X.innerHTML=me),Il=n(l),r(N.$$.fragment,l),Bl=n(l),V=i(l,"P",{"data-svelte-h":!0}),c(V)!=="svelte-1yf0k86"&&(V.innerHTML=Je),Cl=n(l),r(H.$$.fragment,l),vl=n(l),A=i(l,"P",{"data-svelte-h":!0}),c(A)!=="svelte-1ebzc0p"&&(A.innerHTML=we),gl=n(l),r(x.$$.fragment,l),Wl=n(l),f=i(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),c(f)!=="svelte-zkfe1n"&&(f.innerHTML=ye),El=n(l),r(z.$$.fragment,l),$l=n(l),Q=i(l,"P",{"data-svelte-h":!0}),c(Q)!=="svelte-yu6o1n"&&(Q.innerHTML=ue),Rl=n(l),F=i(l,"P",{"data-svelte-h":!0}),c(F)!=="svelte-1wtcutz"&&(F.textContent=fe),Xl=n(l),Y=i(l,"P",{"data-svelte-h":!0}),c(Y)!=="svelte-vjsvbo"&&(Y.innerHTML=je),Nl=n(l),j=i(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),c(j)!=="svelte-zyi0qz"&&(j.innerHTML=Te),Vl=n(l),r(S.$$.fragment,l),Hl=n(l),L=i(l,"P",{"data-svelte-h":!0}),c(L)!=="svelte-101ikwk"&&(L.textContent=be),Al=n(l),r(P.$$.fragment,l),xl=n(l),q=i(l,"P",{"data-svelte-h":!0}),c(q)!=="svelte-i97tc3"&&(q.innerHTML=he),zl=n(l),r(D.$$.fragment,l),Ql=n(l),T=i(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),c(T)!=="svelte-1l0lzn4"&&(T.innerHTML=Ue),Fl=n(l),K=i(l,"P",{"data-svelte-h":!0}),c(K)!=="svelte-1lgsyuc"&&(K.innerHTML=Ze),Yl=n(l),r(O.$$.fragment,l),Sl=n(l),ll=i(l,"P",{"data-svelte-h":!0}),c(ll)!=="svelte-x0nor"&&(ll.textContent=_e),Ll=n(l),r(el.$$.fragment,l),Pl=n(l),tl=i(l,"P",{"data-svelte-h":!0}),c(tl)!=="svelte-1v7ewyt"&&(tl.textContent=ke),ql=n(l),r(sl.$$.fragment,l),Dl=n(l),al=i(l,"P",{"data-svelte-h":!0}),c(al)!=="svelte-19rooo6"&&(al.textContent=Ge),Kl=n(l),r(nl.$$.fragment,l),Ol=n(l),cl=i(l,"P",{}),Ie(cl).forEach(t),this.h()},h(){pl(y,"name","hf:doc:metadata"),pl(y,"content",Ve),pl(u,"class","tip"),pl(f,"class","tip"),pl(j,"class","tip"),pl(T,"class","tip")},m(l,e){$e(document.head,y),s(l,ol,e),s(l,il,e),s(l,rl,e),M(b,l,e),s(l,Ml,e),M(h,l,e),s(l,dl,e),s(l,U,e),s(l,ml,e),s(l,Z,e),s(l,Jl,e),M(_,l,e),s(l,wl,e),s(l,k,e),s(l,yl,e),s(l,G,e),s(l,ul,e),s(l,I,e),s(l,fl,e),M(B,l,e),s(l,jl,e),s(l,C,e),s(l,Tl,e),M(v,l,e),s(l,bl,e),s(l,u,e),s(l,hl,e),M(g,l,e),s(l,Ul,e),s(l,W,e),s(l,Zl,e),s(l,E,e),s(l,_l,e),M($,l,e),s(l,kl,e),s(l,R,e),s(l,Gl,e),s(l,X,e),s(l,Il,e),M(N,l,e),s(l,Bl,e),s(l,V,e),s(l,Cl,e),M(H,l,e),s(l,vl,e),s(l,A,e),s(l,gl,e),M(x,l,e),s(l,Wl,e),s(l,f,e),s(l,El,e),M(z,l,e),s(l,$l,e),s(l,Q,e),s(l,Rl,e),s(l,F,e),s(l,Xl,e),s(l,Y,e),s(l,Nl,e),s(l,j,e),s(l,Vl,e),M(S,l,e),s(l,Hl,e),s(l,L,e),s(l,Al,e),M(P,l,e),s(l,xl,e),s(l,q,e),s(l,zl,e),M(D,l,e),s(l,Ql,e),s(l,T,e),s(l,Fl,e),s(l,K,e),s(l,Yl,e),M(O,l,e),s(l,Sl,e),s(l,ll,e),s(l,Ll,e),M(el,l,e),s(l,Pl,e),s(l,tl,e),s(l,ql,e),M(sl,l,e),s(l,Dl,e),s(l,al,e),s(l,Kl,e),M(nl,l,e),s(l,Ol,e),s(l,cl,e),le=!0},p:Ce,i(l){le||(d(b.$$.fragment,l),d(h.$$.fragment,l),d(_.$$.fragment,l),d(B.$$.fragment,l),d(v.$$.fragment,l),d(g.$$.fragment,l),d($.$$.fragment,l),d(N.$$.fragment,l),d(H.$$.fragment,l),d(x.$$.fragment,l),d(z.$$.fragment,l),d(S.$$.fragment,l),d(P.$$.fragment,l),d(D.$$.fragment,l),d(O.$$.fragment,l),d(el.$$.fragment,l),d(sl.$$.fragment,l),d(nl.$$.fragment,l),le=!0)},o(l){m(b.$$.fragment,l),m(h.$$.fragment,l),m(_.$$.fragment,l),m(B.$$.fragment,l),m(v.$$.fragment,l),m(g.$$.fragment,l),m($.$$.fragment,l),m(N.$$.fragment,l),m(H.$$.fragment,l),m(x.$$.fragment,l),m(z.$$.fragment,l),m(S.$$.fragment,l),m(P.$$.fragment,l),m(D.$$.fragment,l),m(O.$$.fragment,l),m(el.$$.fragment,l),m(sl.$$.fragment,l),m(nl.$$.fragment,l),le=!1},d(l){l&&(t(ol),t(il),t(rl),t(Ml),t(dl),t(U),t(ml),t(Z),t(Jl),t(wl),t(k),t(yl),t(G),t(ul),t(I),t(fl),t(jl),t(C),t(Tl),t(bl),t(u),t(hl),t(Ul),t(W),t(Zl),t(E),t(_l),t(kl),t(R),t(Gl),t(X),t(Il),t(Bl),t(V),t(Cl),t(vl),t(A),t(gl),t(Wl),t(f),t(El),t($l),t(Q),t(Rl),t(F),t(Xl),t(Y),t(Nl),t(j),t(Vl),t(Hl),t(L),t(Al),t(xl),t(q),t(zl),t(Ql),t(T),t(Fl),t(K),t(Yl),t(Sl),t(ll),t(Ll),t(Pl),t(tl),t(ql),t(Dl),t(al),t(Kl),t(Ol),t(cl)),t(y),J(b,l),J(h,l),J(_,l),J(B,l),J(v,l),J(g,l),J($,l),J(N,l),J(H,l),J(x,l),J(z,l),J(S,l),J(P,l),J(D,l),J(O,l),J(el,l),J(sl,l),J(nl,l)}}}const Ve='{"title":"分布式推理","local":"分布式推理","sections":[{"title":"🤗 Accelerate","local":"-accelerate","sections":[],"depth":2},{"title":"PyTorch Distributed","local":"pytorch-distributed","sections":[],"depth":2},{"title":"模型分片","local":"模型分片","sections":[],"depth":2}],"depth":1}';function He(te){return ve(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Fe extends ge{constructor(y){super(),We(this,y,He,Ne,Be,{})}}export{Fe as component};

Xet Storage Details

Size:
29.3 kB
·
Xet hash:
816586dbb793d2bbc28f7dec38f33d7508606831c76e08d526001bf3d68561d5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.