Buckets:
| import{s as Bl,o as Gl,n as Zl}from"../chunks/scheduler.5c93273d.js";import{S as Il,i as Wl,g as J,s as i,r as d,A as _l,h as r,f as s,c as M,j as hl,u as b,x as U,k as il,y as Xl,a as e,v as f,d as w,t as h,w as j}from"../chunks/index.e43dd92b.js";import{C as pl}from"../chunks/CodeBlock.6896320e.js";import{H as Ml,E as gl}from"../chunks/getInferenceSnippets.161194d2.js";import{H as Vl,a as jl}from"../chunks/HfOption.d50154c3.js";function vl(B){let n,u='<a href="./memory#model-offloading">模型 CPU 卸载</a> 将单个管道组件(如 transformer 模型)在需要计算时移动到 GPU。否则,它会被卸载到 CPU。',m,p,a;return p=new pl({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQSUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhY2hlX3NpemVfbGltaXQlMjAlM0QlMjAxMDAwJTBBdG9yY2guX2R5bmFtby5jb25maWcuY2FwdHVyZV9keW5hbWljX291dHB1dF9zaGFwZV9vcHMlMjAlM0QlMjBUcnVlJTBBJTBBJTIzJTIwJUU5JTg3JThGJUU1JThDJTk2JTBBcGlwZWxpbmVfcXVhbnRfY29uZmlnJTIwJTNEJTIwUGlwZWxpbmVRdWFudGl6YXRpb25Db25maWcoJTBBJTIwJTIwJTIwJTIwcXVhbnRfYmFja2VuZCUzRCUyMmJpdHNhbmRieXRlc180Yml0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRfa3dhcmdzJTNEJTdCJTIybG9hZF9pbl80Yml0JTIyJTNBJTIwVHJ1ZSUyQyUyMCUyMmJuYl80Yml0X3F1YW50X3R5cGUlMjIlM0ElMjAlMjJuZjQlMjIlMkMlMjAlMjJibmJfNGJpdF9jb21wdXRlX2R0eXBlJTIyJTNBJTIwdG9yY2guYmZsb2F0MTYlN0QlMkMlMEElMjAlMjAlMjAlMjBjb21wb25lbnRzX3RvX3F1YW50aXplJTNEJTVCJTIydHJhbnNmb3JtZXIlMjIlMkMlMjAlMjJ0ZXh0X2VuY29kZXJfMiUyMiU1RCUyQyUwQSklMEFwaXBlbGluZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEcGlwZWxpbmVfcXVhbnRfY29uZmlnJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUyQyUwQSkudG8oJTIyY3VkYSUyMiklMEElMEElMjMlMjAlRTYlQTglQTElRTUlOUUlOEIlMjBDUFUlMjAlRTUlOEQlQjglRTglQkQlQkQlMEFwaXBlbGluZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQSUyMyUyMCVFNyVCQyU5NiVFOCVBRiU5MSUwQXBpcGVsaW5lLnRyYW5zZm9ybWVyLmNvbXBpbGUoKSUwQXBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMCUyMmNpbmVtYXRpYyUyMGZpbG0lMjBzdGlsbCUyMG9mJTIwYSUyMGNhdCUyMHNpcHBpbmclMjBhJTIwbWFyZ2FyaXRhJTIwaW4lMjBhJTIwcG9vbCUyMGluJTIwUGFsbSUyMFNwcmluZ3MlMkMlMjBDYWxpZm9ybmlhJTJDJTIwaGlnaGx5JTIwZGV0YWlsZWQlMkMlMjBoaWdoJTIwYnVkZ2V0JTIwaG9sbHl3b29kJTIwbW92aWUlMkMlMjBjaW5lbWFzY29wZSUyQyUyMG1vb2R5JTJDJTIwZXBpYyUyQyUyMGdvcmdlb3VzJTJDJTIwZmlsbSUyMGdyYWluJTIyJTBBKS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| torch._dynamo.config.cache_size_limit = <span class="hljs-number">1000</span> | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder_2"</span>], | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 模型 CPU 卸载</span> | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>() | |
| pipeline( | |
| <span class="hljs-string">"cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain"</span> | |
| ).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){n=J("p"),n.innerHTML=u,m=i(),d(p.$$.fragment)},l(o){n=r(o,"P",{"data-svelte-h":!0}),U(n)!=="svelte-4ay8nf"&&(n.innerHTML=u),m=M(o),b(p.$$.fragment,o)},m(o,y){e(o,n,y),e(o,m,y),f(p,o,y),a=!0},p:Zl,i(o){a||(w(p.$$.fragment,o),a=!0)},o(o){h(p.$$.fragment,o),a=!1},d(o){o&&(s(n),s(m)),j(p,o)}}}function Rl(B){let n,u='<a href="./memory#group-offloading">组卸载</a> 将单个管道组件(如变换器模型)的内部层移动到 GPU 进行计算,并在不需要时将其卸载。同时,它使用 <a href="./memory#cuda-stream">CUDA 流</a> 功能来预取下一层以执行。',m,p,a="通过重叠计算和数据传输,它比模型 CPU 卸载更快,同时还能节省内存。",o,y,Z;return y=new pl({props:{code:"JTIzJTIwcGlwJTIwaW5zdGFsbCUyMGZ0ZnklMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBBdXRvTW9kZWwlMkMlMjBEaWZmdXNpb25QaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMuaG9va3MlMjBpbXBvcnQlMjBhcHBseV9ncm91cF9vZmZsb2FkaW5nJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGV4cG9ydF90b192aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMucXVhbnRpemVycyUyMGltcG9ydCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFVNVDVFbmNvZGVyTW9kZWwlMEElMEF0b3JjaC5fZHluYW1vLmNvbmZpZy5jYWNoZV9zaXplX2xpbWl0JTIwJTNEJTIwMTAwMCUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhcHR1cmVfZHluYW1pY19vdXRwdXRfc2hhcGVfb3BzJTIwJTNEJTIwVHJ1ZSUwQSUwQSUyMyUyMCVFOSU4NyU4RiVFNSU4QyU5NiUwQXBpcGVsaW5lX3F1YW50X2NvbmZpZyUyMCUzRCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHF1YW50X2JhY2tlbmQlM0QlMjJiaXRzYW5kYnl0ZXNfNGJpdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50X2t3YXJncyUzRCU3QiUyMmxvYWRfaW5fNGJpdCUyMiUzQSUyMFRydWUlMkMlMjAlMjJibmJfNGJpdF9xdWFudF90eXBlJTIyJTNBJTIwJTIybmY0JTIyJTJDJTIwJTIyYm5iXzRiaXRfY29tcHV0ZV9kdHlwZSUyMiUzQSUyMHRvcmNoLmJmbG9hdDE2JTdEJTJDJTBBJTIwJTIwJTIwJTIwY29tcG9uZW50c190b19xdWFudGl6ZSUzRCU1QiUyMnRyYW5zZm9ybWVyJTIyJTJDJTIwJTIydGV4dF9lbmNvZGVyJTIyJTVEJTJDJTBBKSUwQSUwQXRleHRfZW5jb2RlciUyMCUzRCUyMFVNVDVFbmNvZGVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMldhbi1BSSUyRldhbjIuMS1UMlYtMTRCLURpZmZ1c2VycyUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMnRleHRfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBcGlwZWxpbmUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyV2FuLUFJJTJGV2FuMi4xLVQyVi0xNEItRGlmZnVzZXJzJTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRHBpcGVsaW5lX3F1YW50X2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU3JUJCJTg0JUU1JThEJUI4JUU4JUJEJUJEJTBBb25sb2FkX2RldmljZSUyMCUzRCUyMHRvcmNoLmRldmljZSglMjJjdWRhJTIyKSUwQW9mZmxvYWRfZGV2aWNlJTIwJTNEJTIwdG9yY2guZGV2aWNlKCUyMmNwdSUyMiklMEElMEFwaXBlbGluZS50cmFuc2Zvcm1lci5lbmFibGVfZ3JvdXBfb2ZmbG9hZCglMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQXBpcGVsaW5lLnZhZS5lbmFibGVfZ3JvdXBfb2ZmbG9hZCglMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQWFwcGx5X2dyb3VwX29mZmxvYWRpbmcoJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwb25sb2FkX2RldmljZSUzRG9ubG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQSUwQSUyMyUyMCVFNyVCQyU5NiVFOCVBRiU5MSUwQXBpcGVsaW5lLnRyYW5zZm9ybWVyLmNvbXBpbGUoKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMiUyMiUyMiUwQVRoZSUyMGNhbWVyYSUyMHJ1c2hlcyUyMGZyb20lMjBmYXIlMjB0byUyMG5lYXIlMjBpbiUyMGElMjBsb3ctYW5nbGUlMjBzaG90JTJDJTIwJTBBcmV2ZWFsaW5nJTIwYSUyMHdoaXRlJTIwZmVycmV0JTIwb24lMjBhJTIwbG9nLiUyMEl0JTIwcGxheXMlMkMlMjBsZWFwcyUyMGludG8lMjB0aGUlMjB3YXRlciUyQyUyMGFuZCUyMGVtZXJnZXMlMkMlMjBhcyUyMHRoZSUyMGNhbWVyYSUyMHpvb21zJTIwaW4lMjAlMEFmb3IlMjBhJTIwY2xvc2UtdXAuJTIwV2F0ZXIlMjBzcGxhc2hlcyUyMGJlcnJ5JTIwYnVzaGVzJTIwbmVhcmJ5JTJDJTIwd2hpbGUlMjBtb3NzJTJDJTIwc25vdyUyQyUyMGFuZCUyMGxlYXZlcyUyMGJsYW5rZXQlMjB0aGUlMjBncm91bmQuJTIwJTBBQmlyY2glMjB0cmVlcyUyMGFuZCUyMGElMjBsaWdodCUyMGJsdWUlMjBza3klMjBmcmFtZSUyMHRoZSUyMHNjZW5lJTJDJTIwd2l0aCUyMGZlcm5zJTIwaW4lMjB0aGUlMjBmb3JlZ3JvdW5kLiUyMFNpZGUlMjBsaWdodGluZyUyMGNhc3RzJTIwZHluYW1pYyUyMCUwQXNoYWRvd3MlMjBhbmQlMjB3YXJtJTIwaGlnaGxpZ2h0cy4lMjBNZWRpdW0lMjBjb21wb3NpdGlvbiUyQyUyMGZyb250JTIwdmlldyUyQyUyMGxvdyUyMGFuZ2xlJTJDJTIwd2l0aCUyMGRlcHRoJTIwb2YlMjBmaWVsZC4lMEElMjIlMjIlMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjIlMjIlMjIlMEFCcmlnaHQlMjB0b25lcyUyQyUyMG92ZXJleHBvc2VkJTJDJTIwc3RhdGljJTJDJTIwYmx1cnJlZCUyMGRldGFpbHMlMkMlMjBzdWJ0aXRsZXMlMkMlMjBzdHlsZSUyQyUyMHdvcmtzJTJDJTIwcGFpbnRpbmdzJTJDJTIwaW1hZ2VzJTJDJTIwc3RhdGljJTJDJTIwb3ZlcmFsbCUyMGdyYXklMkMlMjB3b3JzdCUyMHF1YWxpdHklMkMlMjAlMEFsb3clMjBxdWFsaXR5JTJDJTIwSlBFRyUyMGNvbXByZXNzaW9uJTIwcmVzaWR1ZSUyQyUyMHVnbHklMkMlMjBpbmNvbXBsZXRlJTJDJTIwZXh0cmElMjBmaW5nZXJzJTJDJTIwcG9vcmx5JTIwZHJhd24lMjBoYW5kcyUyQyUyMHBvb3JseSUyMGRyYXduJTIwZmFjZXMlMkMlMjBkZWZvcm1lZCUyQyUyMGRpc2ZpZ3VyZWQlMkMlMjAlMEFtaXNzaGFwZW4lMjBsaW1icyUyQyUyMGZ1c2VkJTIwZmluZ2VycyUyQyUyMHN0aWxsJTIwcGljdHVyZSUyQyUyMG1lc3N5JTIwYmFja2dyb3VuZCUyQyUyMHRocmVlJTIwbGVncyUyQyUyMG1hbnklMjBwZW9wbGUlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQlMkMlMjB3YWxraW5nJTIwYmFja3dhcmRzJTBBJTIyJTIyJTIyJTBBJTBBb3V0cHV0JTIwJTNEJTIwcGlwZWxpbmUoJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDgxJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q1LjAlMkMlMEEpLmZyYW1lcyU1QjAlNUQlMEFleHBvcnRfdG9fdmlkZW8ob3V0cHV0JTJDJTIwJTIyb3V0cHV0Lm1wNCUyMiUyQyUyMGZwcyUzRDE2KQ==",highlighted:`<span class="hljs-comment"># pip install ftfy</span> | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel, DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.hooks <span class="hljs-keyword">import</span> apply_group_offloading | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> UMT5EncoderModel | |
| torch._dynamo.config.cache_size_limit = <span class="hljs-number">1000</span> | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder"</span>], | |
| ) | |
| text_encoder = UMT5EncoderModel.from_pretrained( | |
| <span class="hljs-string">"Wan-AI/Wan2.1-T2V-14B-Diffusers"</span>, subfolder=<span class="hljs-string">"text_encoder"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"Wan-AI/Wan2.1-T2V-14B-Diffusers"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 组卸载</span> | |
| onload_device = torch.device(<span class="hljs-string">"cuda"</span>) | |
| offload_device = torch.device(<span class="hljs-string">"cpu"</span>) | |
| pipeline.transformer.enable_group_offload( | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| pipeline.vae.enable_group_offload( | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| apply_group_offloading( | |
| pipeline.text_encoder, | |
| onload_device=onload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>() | |
| prompt = <span class="hljs-string">""" | |
| The camera rushes from far to near in a low-angle shot, | |
| revealing a white ferret on a log. It plays, leaps into the water, and emerges, as the camera zooms in | |
| for a close-up. Water splashes berry bushes nearby, while moss, snow, and leaves blanket the ground. | |
| Birch trees and a light blue sky frame the scene, with ferns in the foreground. Side lighting casts dynamic | |
| shadows and warm highlights. Medium composition, front view, low angle, with depth of field. | |
| """</span> | |
| negative_prompt = <span class="hljs-string">""" | |
| Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, | |
| low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, | |
| misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards | |
| """</span> | |
| output = pipeline( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| num_frames=<span class="hljs-number">81</span>, | |
| guidance_scale=<span class="hljs-number">5.0</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),{c(){n=J("p"),n.innerHTML=u,m=i(),p=J("p"),p.textContent=a,o=i(),d(y.$$.fragment)},l(c){n=r(c,"P",{"data-svelte-h":!0}),U(n)!=="svelte-xfxsnk"&&(n.innerHTML=u),m=M(c),p=r(c,"P",{"data-svelte-h":!0}),U(p)!=="svelte-iejgsl"&&(p.textContent=a),o=M(c),b(y.$$.fragment,c)},m(c,T){e(c,n,T),e(c,m,T),e(c,p,T),e(c,o,T),f(y,c,T),Z=!0},p:Zl,i(c){Z||(w(y.$$.fragment,c),Z=!0)},o(c){h(y.$$.fragment,c),Z=!1},d(c){c&&(s(n),s(m),s(p),s(o)),j(y,c)}}}function Cl(B){let n,u,m,p;return n=new jl({props:{id:"offloading",option:"model CPU offloading",$$slots:{default:[vl]},$$scope:{ctx:B}}}),m=new jl({props:{id:"offloading",option:"group offloading",$$slots:{default:[Rl]},$$scope:{ctx:B}}}),{c(){d(n.$$.fragment),u=i(),d(m.$$.fragment)},l(a){b(n.$$.fragment,a),u=M(a),b(m.$$.fragment,a)},m(a,o){f(n,a,o),e(a,u,o),f(m,a,o),p=!0},p(a,o){const y={};o&2&&(y.$$scope={dirty:o,ctx:a}),n.$set(y);const Z={};o&2&&(Z.$$scope={dirty:o,ctx:a}),m.$set(Z)},i(a){p||(w(n.$$.fragment,a),w(m.$$.fragment,a),p=!0)},o(a){h(n.$$.fragment,a),h(m.$$.fragment,a),p=!1},d(a){a&&s(u),j(n,a),j(m,a)}}}function Yl(B){let n,u,m,p,a,o,y,Z='优化模型通常涉及<a href="./fp16">推理速度</a>和<a href="./memory">内存使用</a>之间的权衡。例如,虽然<a href="./cache">缓存</a>可以提高推理速度,但它也会增加内存消耗,因为它需要存储中间注意力层的输出。一种更平衡的优化策略结合了量化模型、<a href="./fp16#torchcompile">torch.compile</a> 和各种<a href="./memory#offloading">卸载方法</a>。',c,T,ml='<p>查看 <a href="./fp16#torchcompile">torch.compile</a> 指南以了解更多关于编译以及如何在此处应用的信息。例如,区域编译可以显著减少编译时间,而不会放弃任何加速。</p>',z,I,cl='对于图像生成,结合量化和<a href="./memory#model-offloading">模型卸载</a>通常可以在质量、速度和内存之间提供最佳权衡。组卸载对于图像生成效果不佳,因为如果计算内核更快完成,通常不可能<em>完全</em>重叠数据传输。这会导致 CPU 和 GPU 之间的一些通信开销。',$,W,yl='对于视频生成,结合量化和<a href="./memory#group-offloading">组卸载</a>往往更好,因为视频模型更受计算限制。',S,_,Jl="下表提供了优化策略组合及其对 Flux 延迟和内存使用的影响的比较。",H,X,rl="<thead><tr><th>组合</th> <th>延迟 (s)</th> <th>内存使用 (GB)</th></tr></thead> <tbody><tr><td>量化</td> <td>32.602</td> <td>14.9453</td></tr> <tr><td>量化, torch.compile</td> <td>25.847</td> <td>14.9448</td></tr> <tr><td>量化, torch.compile, 模型 CPU 卸载</td> <td>32.312</td> <td>12.2369</td></tr></tbody>",x,g,ul="这些结果是在 Flux 上使用 RTX 4090 进行基准测试的。transformer 和 text_encoder 组件已量化。如果您有兴趣评估自己的模型,请参考[基准测试脚本](https://gist.github.com/sayakpaul/0db9d8eeeb3d2a0e5ed7cf0d9ca19b7d)。",A,V,Ul='本指南将向您展示如何使用 <a href="../quantization/bitsandbytes#torchcompile">bitsandbytes</a> 编译和卸载量化模型。确保您正在使用 <a href="https://pytorch.org/get-started/locally/" rel="nofollow">PyTorch nightly</a> 和最新版本的 bitsandbytes。',D,v,L,R,P,C,Tl='首先通过<a href="../quantization/overview">量化</a>模型来减少存储所需的内存,并<a href="./fp16#torchcompile">编译</a>它以加速推理。',K,Y,dl='配置 <a href="https://docs.pytorch.org/docs/stable/torch.compiler_dynamo_overview.html" rel="nofollow">Dynamo</a> <code>capture_dynamic_output_shape_ops = True</code> 以在编译 bitsandbytes 模型时处理动态输出。',O,Q,ll,F,tl,k,bl="除了量化和 torch.compile,如果您需要进一步减少内存使用,可以尝试卸载。卸载根据需要将各种层或模型组件从 CPU 移动到 GPU 进行计算。",sl,N,fl='在卸载期间配置 <a href="https://docs.pytorch.org/docs/stable/torch.compiler_dynamo_overview.html" rel="nofollow">Dynamo</a> <code>cache_size_limit</code> 以避免过多的重新编译,并设置 <code>capture_dynamic_output_shape_ops = True</code> 以在编译 bitsandbytes 模型时处理动态输出。',el,G,al,q,nl,E,ol;return a=new Ml({props:{title:"编译和卸载量化模型",local:"编译和卸载量化模型",headingTag:"h1"}}),v=new pl({props:{code:"cGlwJTIwaW5zdGFsbCUyMC1VJTIwYml0c2FuZGJ5dGVz",highlighted:"pip install -U bitsandbytes",wrap:!1}}),R=new Ml({props:{title:"量化和 torch.compile",local:"量化和-torchcompile",headingTag:"h2"}}),Q=new pl({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQSUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhcHR1cmVfZHluYW1pY19vdXRwdXRfc2hhcGVfb3BzJTIwJTNEJTIwVHJ1ZSUwQSUwQSUyMyUyMCVFOSU4NyU4RiVFNSU4QyU5NiUwQXBpcGVsaW5lX3F1YW50X2NvbmZpZyUyMCUzRCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHF1YW50X2JhY2tlbmQlM0QlMjJiaXRzYW5kYnl0ZXNfNGJpdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50X2t3YXJncyUzRCU3QiUyMmxvYWRfaW5fNGJpdCUyMiUzQSUyMFRydWUlMkMlMjAlMjJibmJfNGJpdF9xdWFudF90eXBlJTIyJTNBJTIwJTIybmY0JTIyJTJDJTIwJTIyYm5iXzRiaXRfY29tcHV0ZV9kdHlwZSUyMiUzQSUyMHRvcmNoLmJmbG9hdDE2JTdEJTJDJTBBJTIwJTIwJTIwJTIwY29tcG9uZW50c190b19xdWFudGl6ZSUzRCU1QiUyMnRyYW5zZm9ybWVyJTIyJTJDJTIwJTIydGV4dF9lbmNvZGVyXzIlMjIlNUQlMkMlMEEpJTBBcGlwZWxpbmUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyYmxhY2stZm9yZXN0LWxhYnMlMkZGTFVYLjEtZGV2JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRHBpcGVsaW5lX3F1YW50X2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU3JUJDJTk2JUU4JUFGJTkxJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIuY29tcGlsZShtb2RlJTNEJTIybWF4LWF1dG90dW5lJTIyJTJDJTIwZnVsbGdyYXBoJTNEVHJ1ZSklMEFwaXBlbGluZSglMjIlMjIlMjIlMEElMjAlMjAlMjAlMjBjaW5lbWF0aWMlMjBmaWxtJTIwc3RpbGwlMjBvZiUyMGElMjBjYXQlMjBzaXBwaW5nJTIwYSUyMG1hcmdhcml0YSUyMGluJTIwYSUyMHBvb2wlMjBpbiUyMFBhbG0lMjBTcHJpbmdzJTJDJTIwQ2FsaWZvcm5pYSUwQSUyMCUyMCUyMCUyMGhpZ2hseSUyMGRldGFpbGVkJTJDJTIwaGlnaCUyMGJ1ZGdldCUyMGhvbGx5d29vZCUyMG1vdmllJTJDJTIwY2luZW1hc2NvcGUlMkMlMjBtb29keSUyQyUyMGVwaWMlMkMlMjBnb3JnZW91cyUyQyUyMGZpbG0lMjBncmFpbiUwQSUyMiUyMiUyMiUwQSkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder_2"</span>], | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.to(memory_format=torch.channels_last) | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>(mode=<span class="hljs-string">"max-autotune"</span>, fullgraph=<span class="hljs-literal">True</span>) | |
| pipeline(<span class="hljs-string">""" | |
| cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California | |
| highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain | |
| """</span> | |
| ).images[<span class="hljs-number">0</span>]`,wrap:!1}}),F=new Ml({props:{title:"量化、torch.compile 和卸载",local:"量化torchcompile-和卸载",headingTag:"h2"}}),G=new Vl({props:{id:"offloading",options:["model CPU offloading","group offloading"],$$slots:{default:[Cl]},$$scope:{ctx:B}}}),q=new gl({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/optimization/speed-memory-optims.md"}}),{c(){n=J("meta"),u=i(),m=J("p"),p=i(),d(a.$$.fragment),o=i(),y=J("p"),y.innerHTML=Z,c=i(),T=J("blockquote"),T.innerHTML=ml,z=i(),I=J("p"),I.innerHTML=cl,$=i(),W=J("p"),W.innerHTML=yl,S=i(),_=J("p"),_.textContent=Jl,H=i(),X=J("table"),X.innerHTML=rl,x=i(),g=J("small"),g.textContent=ul,A=i(),V=J("p"),V.innerHTML=Ul,D=i(),d(v.$$.fragment),L=i(),d(R.$$.fragment),P=i(),C=J("p"),C.innerHTML=Tl,K=i(),Y=J("p"),Y.innerHTML=dl,O=i(),d(Q.$$.fragment),ll=i(),d(F.$$.fragment),tl=i(),k=J("p"),k.textContent=bl,sl=i(),N=J("p"),N.innerHTML=fl,el=i(),d(G.$$.fragment),al=i(),d(q.$$.fragment),nl=i(),E=J("p"),this.h()},l(l){const t=_l("svelte-u9bgzb",document.head);n=r(t,"META",{name:!0,content:!0}),t.forEach(s),u=M(l),m=r(l,"P",{}),hl(m).forEach(s),p=M(l),b(a.$$.fragment,l),o=M(l),y=r(l,"P",{"data-svelte-h":!0}),U(y)!=="svelte-181wuip"&&(y.innerHTML=Z),c=M(l),T=r(l,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),U(T)!=="svelte-179o7uv"&&(T.innerHTML=ml),z=M(l),I=r(l,"P",{"data-svelte-h":!0}),U(I)!=="svelte-15a734g"&&(I.innerHTML=cl),$=M(l),W=r(l,"P",{"data-svelte-h":!0}),U(W)!=="svelte-12155we"&&(W.innerHTML=yl),S=M(l),_=r(l,"P",{"data-svelte-h":!0}),U(_)!=="svelte-zqzew3"&&(_.textContent=Jl),H=M(l),X=r(l,"TABLE",{"data-svelte-h":!0}),U(X)!=="svelte-yb0epc"&&(X.innerHTML=rl),x=M(l),g=r(l,"SMALL",{"data-svelte-h":!0}),U(g)!=="svelte-moa23m"&&(g.textContent=ul),A=M(l),V=r(l,"P",{"data-svelte-h":!0}),U(V)!=="svelte-jsp2de"&&(V.innerHTML=Ul),D=M(l),b(v.$$.fragment,l),L=M(l),b(R.$$.fragment,l),P=M(l),C=r(l,"P",{"data-svelte-h":!0}),U(C)!=="svelte-9fcnzw"&&(C.innerHTML=Tl),K=M(l),Y=r(l,"P",{"data-svelte-h":!0}),U(Y)!=="svelte-5t95ee"&&(Y.innerHTML=dl),O=M(l),b(Q.$$.fragment,l),ll=M(l),b(F.$$.fragment,l),tl=M(l),k=r(l,"P",{"data-svelte-h":!0}),U(k)!=="svelte-14h7pc0"&&(k.textContent=bl),sl=M(l),N=r(l,"P",{"data-svelte-h":!0}),U(N)!=="svelte-gy5ew7"&&(N.innerHTML=fl),el=M(l),b(G.$$.fragment,l),al=M(l),b(q.$$.fragment,l),nl=M(l),E=r(l,"P",{}),hl(E).forEach(s),this.h()},h(){il(n,"name","hf:doc:metadata"),il(n,"content",Ql),il(T,"class","tip")},m(l,t){Xl(document.head,n),e(l,u,t),e(l,m,t),e(l,p,t),f(a,l,t),e(l,o,t),e(l,y,t),e(l,c,t),e(l,T,t),e(l,z,t),e(l,I,t),e(l,$,t),e(l,W,t),e(l,S,t),e(l,_,t),e(l,H,t),e(l,X,t),e(l,x,t),e(l,g,t),e(l,A,t),e(l,V,t),e(l,D,t),f(v,l,t),e(l,L,t),f(R,l,t),e(l,P,t),e(l,C,t),e(l,K,t),e(l,Y,t),e(l,O,t),f(Q,l,t),e(l,ll,t),f(F,l,t),e(l,tl,t),e(l,k,t),e(l,sl,t),e(l,N,t),e(l,el,t),f(G,l,t),e(l,al,t),f(q,l,t),e(l,nl,t),e(l,E,t),ol=!0},p(l,[t]){const wl={};t&2&&(wl.$$scope={dirty:t,ctx:l}),G.$set(wl)},i(l){ol||(w(a.$$.fragment,l),w(v.$$.fragment,l),w(R.$$.fragment,l),w(Q.$$.fragment,l),w(F.$$.fragment,l),w(G.$$.fragment,l),w(q.$$.fragment,l),ol=!0)},o(l){h(a.$$.fragment,l),h(v.$$.fragment,l),h(R.$$.fragment,l),h(Q.$$.fragment,l),h(F.$$.fragment,l),h(G.$$.fragment,l),h(q.$$.fragment,l),ol=!1},d(l){l&&(s(u),s(m),s(p),s(o),s(y),s(c),s(T),s(z),s(I),s($),s(W),s(S),s(_),s(H),s(X),s(x),s(g),s(A),s(V),s(D),s(L),s(P),s(C),s(K),s(Y),s(O),s(ll),s(tl),s(k),s(sl),s(N),s(el),s(al),s(nl),s(E)),s(n),j(a,l),j(v,l),j(R,l),j(Q,l),j(F,l),j(G,l),j(q,l)}}}const Ql='{"title":"编译和卸载量化模型","local":"编译和卸载量化模型","sections":[{"title":"量化和 torch.compile","local":"量化和-torchcompile","sections":[],"depth":2},{"title":"量化、torch.compile 和卸载","local":"量化torchcompile-和卸载","sections":[],"depth":2}],"depth":1}';function Fl(B){return Gl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class $l extends Il{constructor(n){super(),Wl(this,n,Fl,Yl,Bl,{})}}export{$l as component}; | |
Xet Storage Details
- Size:
- 27 kB
- Xet hash:
- 4d590dcbfbc44fd74e45de9581e69af8849828bdf76a3f3eff8fd48ad9655d1c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.