Buckets:
| import{s as Il,o as Wl,n as Gl}from"../chunks/scheduler.e4ff9b64.js";import{S as _l,i as Xl,e as J,s as i,c as d,h as gl,a as r,d as s,b as m,f as Zl,g as b,j as U,k as Ml,l as Vl,m as e,n as f,t as w,o as h,p as j}from"../chunks/index.09f1bca0.js";import{C as vl,H as cl,E as Rl}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.bbad1169.js";import{C as ml}from"../chunks/CodeBlock.f8309f3f.js";import{H as Cl,a as Bl}from"../chunks/HfOption.44827c7f.js";function Yl(Z){let n,u='<a href="./memory#model-offloading">模型 CPU 卸载</a> 将单个管道组件(如 transformer 模型)在需要计算时移动到 GPU。否则,它会被卸载到 CPU。',c,M,a;return M=new ml({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQSUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhY2hlX3NpemVfbGltaXQlMjAlM0QlMjAxMDAwJTBBdG9yY2guX2R5bmFtby5jb25maWcuY2FwdHVyZV9keW5hbWljX291dHB1dF9zaGFwZV9vcHMlMjAlM0QlMjBUcnVlJTBBJTBBJTIzJTIwJUU5JTg3JThGJUU1JThDJTk2JTBBcGlwZWxpbmVfcXVhbnRfY29uZmlnJTIwJTNEJTIwUGlwZWxpbmVRdWFudGl6YXRpb25Db25maWcoJTBBJTIwJTIwJTIwJTIwcXVhbnRfYmFja2VuZCUzRCUyMmJpdHNhbmRieXRlc180Yml0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRfa3dhcmdzJTNEJTdCJTIybG9hZF9pbl80Yml0JTIyJTNBJTIwVHJ1ZSUyQyUyMCUyMmJuYl80Yml0X3F1YW50X3R5cGUlMjIlM0ElMjAlMjJuZjQlMjIlMkMlMjAlMjJibmJfNGJpdF9jb21wdXRlX2R0eXBlJTIyJTNBJTIwdG9yY2guYmZsb2F0MTYlN0QlMkMlMEElMjAlMjAlMjAlMjBjb21wb25lbnRzX3RvX3F1YW50aXplJTNEJTVCJTIydHJhbnNmb3JtZXIlMjIlMkMlMjAlMjJ0ZXh0X2VuY29kZXJfMiUyMiU1RCUyQyUwQSklMEFwaXBlbGluZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEcGlwZWxpbmVfcXVhbnRfY29uZmlnJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUyQyUwQSkudG8oJTIyY3VkYSUyMiklMEElMEElMjMlMjAlRTYlQTglQTElRTUlOUUlOEIlMjBDUFUlMjAlRTUlOEQlQjglRTglQkQlQkQlMEFwaXBlbGluZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQSUyMyUyMCVFNyVCQyU5NiVFOCVBRiU5MSUwQXBpcGVsaW5lLnRyYW5zZm9ybWVyLmNvbXBpbGUoKSUwQXBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMCUyMmNpbmVtYXRpYyUyMGZpbG0lMjBzdGlsbCUyMG9mJTIwYSUyMGNhdCUyMHNpcHBpbmclMjBhJTIwbWFyZ2FyaXRhJTIwaW4lMjBhJTIwcG9vbCUyMGluJTIwUGFsbSUyMFNwcmluZ3MlMkMlMjBDYWxpZm9ybmlhJTJDJTIwaGlnaGx5JTIwZGV0YWlsZWQlMkMlMjBoaWdoJTIwYnVkZ2V0JTIwaG9sbHl3b29kJTIwbW92aWUlMkMlMjBjaW5lbWFzY29wZSUyQyUyMG1vb2R5JTJDJTIwZXBpYyUyQyUyMGdvcmdlb3VzJTJDJTIwZmlsbSUyMGdyYWluJTIyJTBBKS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| torch._dynamo.config.cache_size_limit = <span class="hljs-number">1000</span> | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder_2"</span>], | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 模型 CPU 卸载</span> | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>() | |
| pipeline( | |
| <span class="hljs-string">"cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain"</span> | |
| ).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){n=J("p"),n.innerHTML=u,c=i(),d(M.$$.fragment)},l(p){n=r(p,"P",{"data-svelte-h":!0}),U(n)!=="svelte-4ay8nf"&&(n.innerHTML=u),c=m(p),b(M.$$.fragment,p)},m(p,y){e(p,n,y),e(p,c,y),f(M,p,y),a=!0},p:Gl,i(p){a||(w(M.$$.fragment,p),a=!0)},o(p){h(M.$$.fragment,p),a=!1},d(p){p&&(s(n),s(c)),j(M,p)}}}function Ql(Z){let n,u='<a href="./memory#group-offloading">组卸载</a> 将单个管道组件(如变换器模型)的内部层移动到 GPU 进行计算,并在不需要时将其卸载。同时,它使用 <a href="./memory#cuda-stream">CUDA 流</a> 功能来预取下一层以执行。',c,M,a="通过重叠计算和数据传输,它比模型 CPU 卸载更快,同时还能节省内存。",p,y,T;return y=new ml({props:{code:"JTIzJTIwcGlwJTIwaW5zdGFsbCUyMGZ0ZnklMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBBdXRvTW9kZWwlMkMlMjBEaWZmdXNpb25QaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMuaG9va3MlMjBpbXBvcnQlMjBhcHBseV9ncm91cF9vZmZsb2FkaW5nJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGV4cG9ydF90b192aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMucXVhbnRpemVycyUyMGltcG9ydCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFVNVDVFbmNvZGVyTW9kZWwlMEElMEF0b3JjaC5fZHluYW1vLmNvbmZpZy5jYWNoZV9zaXplX2xpbWl0JTIwJTNEJTIwMTAwMCUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhcHR1cmVfZHluYW1pY19vdXRwdXRfc2hhcGVfb3BzJTIwJTNEJTIwVHJ1ZSUwQSUwQSUyMyUyMCVFOSU4NyU4RiVFNSU4QyU5NiUwQXBpcGVsaW5lX3F1YW50X2NvbmZpZyUyMCUzRCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHF1YW50X2JhY2tlbmQlM0QlMjJiaXRzYW5kYnl0ZXNfNGJpdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50X2t3YXJncyUzRCU3QiUyMmxvYWRfaW5fNGJpdCUyMiUzQSUyMFRydWUlMkMlMjAlMjJibmJfNGJpdF9xdWFudF90eXBlJTIyJTNBJTIwJTIybmY0JTIyJTJDJTIwJTIyYm5iXzRiaXRfY29tcHV0ZV9kdHlwZSUyMiUzQSUyMHRvcmNoLmJmbG9hdDE2JTdEJTJDJTBBJTIwJTIwJTIwJTIwY29tcG9uZW50c190b19xdWFudGl6ZSUzRCU1QiUyMnRyYW5zZm9ybWVyJTIyJTJDJTIwJTIydGV4dF9lbmNvZGVyJTIyJTVEJTJDJTBBKSUwQSUwQXRleHRfZW5jb2RlciUyMCUzRCUyMFVNVDVFbmNvZGVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMldhbi1BSSUyRldhbjIuMS1UMlYtMTRCLURpZmZ1c2VycyUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMnRleHRfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBcGlwZWxpbmUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyV2FuLUFJJTJGV2FuMi4xLVQyVi0xNEItRGlmZnVzZXJzJTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRHBpcGVsaW5lX3F1YW50X2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU3JUJCJTg0JUU1JThEJUI4JUU4JUJEJUJEJTBBb25sb2FkX2RldmljZSUyMCUzRCUyMHRvcmNoLmRldmljZSglMjJjdWRhJTIyKSUwQW9mZmxvYWRfZGV2aWNlJTIwJTNEJTIwdG9yY2guZGV2aWNlKCUyMmNwdSUyMiklMEElMEFwaXBlbGluZS50cmFuc2Zvcm1lci5lbmFibGVfZ3JvdXBfb2ZmbG9hZCglMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQXBpcGVsaW5lLnZhZS5lbmFibGVfZ3JvdXBfb2ZmbG9hZCglMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQWFwcGx5X2dyb3VwX29mZmxvYWRpbmcoJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwb25sb2FkX2RldmljZSUzRG9ubG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQSUwQSUyMyUyMCVFNyVCQyU5NiVFOCVBRiU5MSUwQXBpcGVsaW5lLnRyYW5zZm9ybWVyLmNvbXBpbGUoKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMiUyMiUyMiUwQVRoZSUyMGNhbWVyYSUyMHJ1c2hlcyUyMGZyb20lMjBmYXIlMjB0byUyMG5lYXIlMjBpbiUyMGElMjBsb3ctYW5nbGUlMjBzaG90JTJDJTIwJTBBcmV2ZWFsaW5nJTIwYSUyMHdoaXRlJTIwZmVycmV0JTIwb24lMjBhJTIwbG9nLiUyMEl0JTIwcGxheXMlMkMlMjBsZWFwcyUyMGludG8lMjB0aGUlMjB3YXRlciUyQyUyMGFuZCUyMGVtZXJnZXMlMkMlMjBhcyUyMHRoZSUyMGNhbWVyYSUyMHpvb21zJTIwaW4lMjAlMEFmb3IlMjBhJTIwY2xvc2UtdXAuJTIwV2F0ZXIlMjBzcGxhc2hlcyUyMGJlcnJ5JTIwYnVzaGVzJTIwbmVhcmJ5JTJDJTIwd2hpbGUlMjBtb3NzJTJDJTIwc25vdyUyQyUyMGFuZCUyMGxlYXZlcyUyMGJsYW5rZXQlMjB0aGUlMjBncm91bmQuJTIwJTBBQmlyY2glMjB0cmVlcyUyMGFuZCUyMGElMjBsaWdodCUyMGJsdWUlMjBza3klMjBmcmFtZSUyMHRoZSUyMHNjZW5lJTJDJTIwd2l0aCUyMGZlcm5zJTIwaW4lMjB0aGUlMjBmb3JlZ3JvdW5kLiUyMFNpZGUlMjBsaWdodGluZyUyMGNhc3RzJTIwZHluYW1pYyUyMCUwQXNoYWRvd3MlMjBhbmQlMjB3YXJtJTIwaGlnaGxpZ2h0cy4lMjBNZWRpdW0lMjBjb21wb3NpdGlvbiUyQyUyMGZyb250JTIwdmlldyUyQyUyMGxvdyUyMGFuZ2xlJTJDJTIwd2l0aCUyMGRlcHRoJTIwb2YlMjBmaWVsZC4lMEElMjIlMjIlMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjIlMjIlMjIlMEFCcmlnaHQlMjB0b25lcyUyQyUyMG92ZXJleHBvc2VkJTJDJTIwc3RhdGljJTJDJTIwYmx1cnJlZCUyMGRldGFpbHMlMkMlMjBzdWJ0aXRsZXMlMkMlMjBzdHlsZSUyQyUyMHdvcmtzJTJDJTIwcGFpbnRpbmdzJTJDJTIwaW1hZ2VzJTJDJTIwc3RhdGljJTJDJTIwb3ZlcmFsbCUyMGdyYXklMkMlMjB3b3JzdCUyMHF1YWxpdHklMkMlMjAlMEFsb3clMjBxdWFsaXR5JTJDJTIwSlBFRyUyMGNvbXByZXNzaW9uJTIwcmVzaWR1ZSUyQyUyMHVnbHklMkMlMjBpbmNvbXBsZXRlJTJDJTIwZXh0cmElMjBmaW5nZXJzJTJDJTIwcG9vcmx5JTIwZHJhd24lMjBoYW5kcyUyQyUyMHBvb3JseSUyMGRyYXduJTIwZmFjZXMlMkMlMjBkZWZvcm1lZCUyQyUyMGRpc2ZpZ3VyZWQlMkMlMjAlMEFtaXNzaGFwZW4lMjBsaW1icyUyQyUyMGZ1c2VkJTIwZmluZ2VycyUyQyUyMHN0aWxsJTIwcGljdHVyZSUyQyUyMG1lc3N5JTIwYmFja2dyb3VuZCUyQyUyMHRocmVlJTIwbGVncyUyQyUyMG1hbnklMjBwZW9wbGUlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQlMkMlMjB3YWxraW5nJTIwYmFja3dhcmRzJTBBJTIyJTIyJTIyJTBBJTBBb3V0cHV0JTIwJTNEJTIwcGlwZWxpbmUoJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDgxJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q1LjAlMkMlMEEpLmZyYW1lcyU1QjAlNUQlMEFleHBvcnRfdG9fdmlkZW8ob3V0cHV0JTJDJTIwJTIyb3V0cHV0Lm1wNCUyMiUyQyUyMGZwcyUzRDE2KQ==",highlighted:`<span class="hljs-comment"># pip install ftfy</span> | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel, DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.hooks <span class="hljs-keyword">import</span> apply_group_offloading | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> UMT5EncoderModel | |
| torch._dynamo.config.cache_size_limit = <span class="hljs-number">1000</span> | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder"</span>], | |
| ) | |
| text_encoder = UMT5EncoderModel.from_pretrained( | |
| <span class="hljs-string">"Wan-AI/Wan2.1-T2V-14B-Diffusers"</span>, subfolder=<span class="hljs-string">"text_encoder"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"Wan-AI/Wan2.1-T2V-14B-Diffusers"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 组卸载</span> | |
| onload_device = torch.device(<span class="hljs-string">"cuda"</span>) | |
| offload_device = torch.device(<span class="hljs-string">"cpu"</span>) | |
| pipeline.transformer.enable_group_offload( | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| pipeline.vae.enable_group_offload( | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| apply_group_offloading( | |
| pipeline.text_encoder, | |
| onload_device=onload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>() | |
| prompt = <span class="hljs-string">""" | |
| The camera rushes from far to near in a low-angle shot, | |
| revealing a white ferret on a log. It plays, leaps into the water, and emerges, as the camera zooms in | |
| for a close-up. Water splashes berry bushes nearby, while moss, snow, and leaves blanket the ground. | |
| Birch trees and a light blue sky frame the scene, with ferns in the foreground. Side lighting casts dynamic | |
| shadows and warm highlights. Medium composition, front view, low angle, with depth of field. | |
| """</span> | |
| negative_prompt = <span class="hljs-string">""" | |
| Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, | |
| low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, | |
| misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards | |
| """</span> | |
| output = pipeline( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| num_frames=<span class="hljs-number">81</span>, | |
| guidance_scale=<span class="hljs-number">5.0</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),{c(){n=J("p"),n.innerHTML=u,c=i(),M=J("p"),M.textContent=a,p=i(),d(y.$$.fragment)},l(o){n=r(o,"P",{"data-svelte-h":!0}),U(n)!=="svelte-xfxsnk"&&(n.innerHTML=u),c=m(o),M=r(o,"P",{"data-svelte-h":!0}),U(M)!=="svelte-iejgsl"&&(M.textContent=a),p=m(o),b(y.$$.fragment,o)},m(o,B){e(o,n,B),e(o,c,B),e(o,M,B),e(o,p,B),f(y,o,B),T=!0},p:Gl,i(o){T||(w(y.$$.fragment,o),T=!0)},o(o){h(y.$$.fragment,o),T=!1},d(o){o&&(s(n),s(c),s(M),s(p)),j(y,o)}}}function Fl(Z){let n,u,c,M;return n=new Bl({props:{id:"offloading",option:"model CPU offloading",$$slots:{default:[Yl]},$$scope:{ctx:Z}}}),c=new Bl({props:{id:"offloading",option:"group offloading",$$slots:{default:[Ql]},$$scope:{ctx:Z}}}),{c(){d(n.$$.fragment),u=i(),d(c.$$.fragment)},l(a){b(n.$$.fragment,a),u=m(a),b(c.$$.fragment,a)},m(a,p){f(n,a,p),e(a,u,p),f(c,a,p),M=!0},p(a,p){const y={};p&2&&(y.$$scope={dirty:p,ctx:a}),n.$set(y);const T={};p&2&&(T.$$scope={dirty:p,ctx:a}),c.$set(T)},i(a){M||(w(n.$$.fragment,a),w(c.$$.fragment,a),M=!0)},o(a){h(n.$$.fragment,a),h(c.$$.fragment,a),M=!1},d(a){a&&s(u),j(n,a),j(c,a)}}}function kl(Z){let n,u,c,M,a,p,y,T,o,B='优化模型通常涉及<a href="./fp16">推理速度</a>和<a href="./memory">内存使用</a>之间的权衡。例如,虽然<a href="./cache">缓存</a>可以提高推理速度,但它也会增加内存消耗,因为它需要存储中间注意力层的输出。一种更平衡的优化策略结合了量化模型、<a href="./fp16#torchcompile">torch.compile</a> 和各种<a href="./memory#offloading">卸载方法</a>。',z,G,yl='<p>查看 <a href="./fp16#torchcompile">torch.compile</a> 指南以了解更多关于编译以及如何在此处应用的信息。例如,区域编译可以显著减少编译时间,而不会放弃任何加速。</p>',S,W,Jl='对于图像生成,结合量化和<a href="./memory#model-offloading">模型卸载</a>通常可以在质量、速度和内存之间提供最佳权衡。组卸载对于图像生成效果不佳,因为如果计算内核更快完成,通常不可能<em>完全</em>重叠数据传输。这会导致 CPU 和 GPU 之间的一些通信开销。',x,_,rl='对于视频生成,结合量化和<a href="./memory#group-offloading">组卸载</a>往往更好,因为视频模型更受计算限制。',H,X,ul="下表提供了优化策略组合及其对 Flux 延迟和内存使用的影响的比较。",A,g,Ul="<thead><tr><th>组合</th> <th>延迟 (s)</th> <th>内存使用 (GB)</th></tr></thead> <tbody><tr><td>量化</td> <td>32.602</td> <td>14.9453</td></tr> <tr><td>量化, torch.compile</td> <td>25.847</td> <td>14.9448</td></tr> <tr><td>量化, torch.compile, 模型 CPU 卸载</td> <td>32.312</td> <td>12.2369</td></tr></tbody>",D,V,Tl="这些结果是在 Flux 上使用 RTX 4090 进行基准测试的。transformer 和 text_encoder 组件已量化。如果您有兴趣评估自己的模型,请参考[基准测试脚本](https://gist.github.com/sayakpaul/0db9d8eeeb3d2a0e5ed7cf0d9ca19b7d)。",L,v,dl='本指南将向您展示如何使用 <a href="../quantization/bitsandbytes#torchcompile">bitsandbytes</a> 编译和卸载量化模型。确保您正在使用 <a href="https://pytorch.org/get-started/locally/" rel="nofollow">PyTorch nightly</a> 和最新版本的 bitsandbytes。',P,R,K,C,O,Y,bl='首先通过<a href="../quantization/overview">量化</a>模型来减少存储所需的内存,并<a href="./fp16#torchcompile">编译</a>它以加速推理。',ll,Q,fl='配置 <a href="https://docs.pytorch.org/docs/stable/torch.compiler_dynamo_overview.html" rel="nofollow">Dynamo</a> <code>capture_dynamic_output_shape_ops = True</code> 以在编译 bitsandbytes 模型时处理动态输出。',tl,F,sl,k,el,N,wl="除了量化和 torch.compile,如果您需要进一步减少内存使用,可以尝试卸载。卸载根据需要将各种层或模型组件从 CPU 移动到 GPU 进行计算。",al,q,hl='在卸载期间配置 <a href="https://docs.pytorch.org/docs/stable/torch.compiler_dynamo_overview.html" rel="nofollow">Dynamo</a> <code>cache_size_limit</code> 以避免过多的重新编译,并设置 <code>capture_dynamic_output_shape_ops = True</code> 以在编译 bitsandbytes 模型时处理动态输出。',nl,I,pl,$,ol,E,il;return a=new vl({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),y=new cl({props:{title:"编译和卸载量化模型",local:"编译和卸载量化模型",headingTag:"h1"}}),R=new ml({props:{code:"cGlwJTIwaW5zdGFsbCUyMC1VJTIwYml0c2FuZGJ5dGVz",highlighted:"pip install -U bitsandbytes",wrap:!1}}),C=new cl({props:{title:"量化和 torch.compile",local:"量化和-torchcompile",headingTag:"h2"}}),F=new ml({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQSUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhcHR1cmVfZHluYW1pY19vdXRwdXRfc2hhcGVfb3BzJTIwJTNEJTIwVHJ1ZSUwQSUwQSUyMyUyMCVFOSU4NyU4RiVFNSU4QyU5NiUwQXBpcGVsaW5lX3F1YW50X2NvbmZpZyUyMCUzRCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHF1YW50X2JhY2tlbmQlM0QlMjJiaXRzYW5kYnl0ZXNfNGJpdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50X2t3YXJncyUzRCU3QiUyMmxvYWRfaW5fNGJpdCUyMiUzQSUyMFRydWUlMkMlMjAlMjJibmJfNGJpdF9xdWFudF90eXBlJTIyJTNBJTIwJTIybmY0JTIyJTJDJTIwJTIyYm5iXzRiaXRfY29tcHV0ZV9kdHlwZSUyMiUzQSUyMHRvcmNoLmJmbG9hdDE2JTdEJTJDJTBBJTIwJTIwJTIwJTIwY29tcG9uZW50c190b19xdWFudGl6ZSUzRCU1QiUyMnRyYW5zZm9ybWVyJTIyJTJDJTIwJTIydGV4dF9lbmNvZGVyXzIlMjIlNUQlMkMlMEEpJTBBcGlwZWxpbmUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyYmxhY2stZm9yZXN0LWxhYnMlMkZGTFVYLjEtZGV2JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRHBpcGVsaW5lX3F1YW50X2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU3JUJDJTk2JUU4JUFGJTkxJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIuY29tcGlsZShtb2RlJTNEJTIybWF4LWF1dG90dW5lJTIyJTJDJTIwZnVsbGdyYXBoJTNEVHJ1ZSklMEFwaXBlbGluZSglMjIlMjIlMjIlMEElMjAlMjAlMjAlMjBjaW5lbWF0aWMlMjBmaWxtJTIwc3RpbGwlMjBvZiUyMGElMjBjYXQlMjBzaXBwaW5nJTIwYSUyMG1hcmdhcml0YSUyMGluJTIwYSUyMHBvb2wlMjBpbiUyMFBhbG0lMjBTcHJpbmdzJTJDJTIwQ2FsaWZvcm5pYSUwQSUyMCUyMCUyMCUyMGhpZ2hseSUyMGRldGFpbGVkJTJDJTIwaGlnaCUyMGJ1ZGdldCUyMGhvbGx5d29vZCUyMG1vdmllJTJDJTIwY2luZW1hc2NvcGUlMkMlMjBtb29keSUyQyUyMGVwaWMlMkMlMjBnb3JnZW91cyUyQyUyMGZpbG0lMjBncmFpbiUwQSUyMiUyMiUyMiUwQSkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder_2"</span>], | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.to(memory_format=torch.channels_last) | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>(mode=<span class="hljs-string">"max-autotune"</span>, fullgraph=<span class="hljs-literal">True</span>) | |
| pipeline(<span class="hljs-string">""" | |
| cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California | |
| highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain | |
| """</span> | |
| ).images[<span class="hljs-number">0</span>]`,wrap:!1}}),k=new cl({props:{title:"量化、torch.compile 和卸载",local:"量化torchcompile-和卸载",headingTag:"h2"}}),I=new Cl({props:{id:"offloading",options:["model CPU offloading","group offloading"],$$slots:{default:[Fl]},$$scope:{ctx:Z}}}),$=new Rl({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/optimization/speed-memory-optims.md"}}),{c(){n=J("meta"),u=i(),c=J("p"),M=i(),d(a.$$.fragment),p=i(),d(y.$$.fragment),T=i(),o=J("p"),o.innerHTML=B,z=i(),G=J("blockquote"),G.innerHTML=yl,S=i(),W=J("p"),W.innerHTML=Jl,x=i(),_=J("p"),_.innerHTML=rl,H=i(),X=J("p"),X.textContent=ul,A=i(),g=J("table"),g.innerHTML=Ul,D=i(),V=J("small"),V.textContent=Tl,L=i(),v=J("p"),v.innerHTML=dl,P=i(),d(R.$$.fragment),K=i(),d(C.$$.fragment),O=i(),Y=J("p"),Y.innerHTML=bl,ll=i(),Q=J("p"),Q.innerHTML=fl,tl=i(),d(F.$$.fragment),sl=i(),d(k.$$.fragment),el=i(),N=J("p"),N.textContent=wl,al=i(),q=J("p"),q.innerHTML=hl,nl=i(),d(I.$$.fragment),pl=i(),d($.$$.fragment),ol=i(),E=J("p"),this.h()},l(l){const t=gl("svelte-u9bgzb",document.head);n=r(t,"META",{name:!0,content:!0}),t.forEach(s),u=m(l),c=r(l,"P",{}),Zl(c).forEach(s),M=m(l),b(a.$$.fragment,l),p=m(l),b(y.$$.fragment,l),T=m(l),o=r(l,"P",{"data-svelte-h":!0}),U(o)!=="svelte-181wuip"&&(o.innerHTML=B),z=m(l),G=r(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),U(G)!=="svelte-179o7uv"&&(G.innerHTML=yl),S=m(l),W=r(l,"P",{"data-svelte-h":!0}),U(W)!=="svelte-15a734g"&&(W.innerHTML=Jl),x=m(l),_=r(l,"P",{"data-svelte-h":!0}),U(_)!=="svelte-12155we"&&(_.innerHTML=rl),H=m(l),X=r(l,"P",{"data-svelte-h":!0}),U(X)!=="svelte-zqzew3"&&(X.textContent=ul),A=m(l),g=r(l,"TABLE",{"data-svelte-h":!0}),U(g)!=="svelte-yb0epc"&&(g.innerHTML=Ul),D=m(l),V=r(l,"SMALL",{"data-svelte-h":!0}),U(V)!=="svelte-moa23m"&&(V.textContent=Tl),L=m(l),v=r(l,"P",{"data-svelte-h":!0}),U(v)!=="svelte-jsp2de"&&(v.innerHTML=dl),P=m(l),b(R.$$.fragment,l),K=m(l),b(C.$$.fragment,l),O=m(l),Y=r(l,"P",{"data-svelte-h":!0}),U(Y)!=="svelte-9fcnzw"&&(Y.innerHTML=bl),ll=m(l),Q=r(l,"P",{"data-svelte-h":!0}),U(Q)!=="svelte-5t95ee"&&(Q.innerHTML=fl),tl=m(l),b(F.$$.fragment,l),sl=m(l),b(k.$$.fragment,l),el=m(l),N=r(l,"P",{"data-svelte-h":!0}),U(N)!=="svelte-14h7pc0"&&(N.textContent=wl),al=m(l),q=r(l,"P",{"data-svelte-h":!0}),U(q)!=="svelte-gy5ew7"&&(q.innerHTML=hl),nl=m(l),b(I.$$.fragment,l),pl=m(l),b($.$$.fragment,l),ol=m(l),E=r(l,"P",{}),Zl(E).forEach(s),this.h()},h(){Ml(n,"name","hf:doc:metadata"),Ml(n,"content",Nl),Ml(G,"class","tip")},m(l,t){Vl(document.head,n),e(l,u,t),e(l,c,t),e(l,M,t),f(a,l,t),e(l,p,t),f(y,l,t),e(l,T,t),e(l,o,t),e(l,z,t),e(l,G,t),e(l,S,t),e(l,W,t),e(l,x,t),e(l,_,t),e(l,H,t),e(l,X,t),e(l,A,t),e(l,g,t),e(l,D,t),e(l,V,t),e(l,L,t),e(l,v,t),e(l,P,t),f(R,l,t),e(l,K,t),f(C,l,t),e(l,O,t),e(l,Y,t),e(l,ll,t),e(l,Q,t),e(l,tl,t),f(F,l,t),e(l,sl,t),f(k,l,t),e(l,el,t),e(l,N,t),e(l,al,t),e(l,q,t),e(l,nl,t),f(I,l,t),e(l,pl,t),f($,l,t),e(l,ol,t),e(l,E,t),il=!0},p(l,[t]){const jl={};t&2&&(jl.$$scope={dirty:t,ctx:l}),I.$set(jl)},i(l){il||(w(a.$$.fragment,l),w(y.$$.fragment,l),w(R.$$.fragment,l),w(C.$$.fragment,l),w(F.$$.fragment,l),w(k.$$.fragment,l),w(I.$$.fragment,l),w($.$$.fragment,l),il=!0)},o(l){h(a.$$.fragment,l),h(y.$$.fragment,l),h(R.$$.fragment,l),h(C.$$.fragment,l),h(F.$$.fragment,l),h(k.$$.fragment,l),h(I.$$.fragment,l),h($.$$.fragment,l),il=!1},d(l){l&&(s(u),s(c),s(M),s(p),s(T),s(o),s(z),s(G),s(S),s(W),s(x),s(_),s(H),s(X),s(A),s(g),s(D),s(V),s(L),s(v),s(P),s(K),s(O),s(Y),s(ll),s(Q),s(tl),s(sl),s(el),s(N),s(al),s(q),s(nl),s(pl),s(ol),s(E)),s(n),j(a,l),j(y,l),j(R,l),j(C,l),j(F,l),j(k,l),j(I,l),j($,l)}}}const Nl='{"title":"编译和卸载量化模型","local":"编译和卸载量化模型","sections":[{"title":"量化和 torch.compile","local":"量化和-torchcompile","sections":[],"depth":2},{"title":"量化、torch.compile 和卸载","local":"量化torchcompile-和卸载","sections":[],"depth":2}],"depth":1}';function ql(Z){return Wl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Hl extends _l{constructor(n){super(),Xl(this,n,ql,kl,Il,{})}}export{Hl as component}; | |
Xet Storage Details
- Size:
- 27.2 kB
- Xet hash:
- 38f4b0405548637949c08aa405c0300b3f07e0229702f3025223e5dc3db7328d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.