Buckets:
| import{s as Bl,o as Gl,n as ml}from"../chunks/scheduler.5c93273d.js";import{S as Il,i as Wl,g as r,s as m,r as d,A as _l,h as u,f as s,c as M,j as hl,u as f,x as U,k as jl,y as Xl,a,v as b,d as w,t as h,w as j}from"../chunks/index.e43dd92b.js";import{T as gl}from"../chunks/Tip.1cbfe904.js";import{C as ol}from"../chunks/CodeBlock.6896320e.js";import{H as il,E as Vl}from"../chunks/getInferenceSnippets.7d64e4c6.js";import{H as vl,a as Zl}from"../chunks/HfOption.d50154c3.js";function Rl(Z){let e,J='查看 <a href="./fp16#torchcompile">torch.compile</a> 指南以了解更多关于编译以及如何在此处应用的信息。例如,区域编译可以显著减少编译时间,而不会放弃任何加速。';return{c(){e=r("p"),e.innerHTML=J},l(o){e=u(o,"P",{"data-svelte-h":!0}),U(e)!=="svelte-1a3z1wk"&&(e.innerHTML=J)},m(o,i){a(o,e,i)},p:ml,d(o){o&&s(e)}}}function Cl(Z){let e,J='<a href="./memory#model-offloading">模型 CPU 卸载</a> 将单个管道组件(如 transformer 模型)在需要计算时移动到 GPU。否则,它会被卸载到 CPU。',o,i,n;return i=new ol({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQSUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhY2hlX3NpemVfbGltaXQlMjAlM0QlMjAxMDAwJTBBdG9yY2guX2R5bmFtby5jb25maWcuY2FwdHVyZV9keW5hbWljX291dHB1dF9zaGFwZV9vcHMlMjAlM0QlMjBUcnVlJTBBJTBBJTIzJTIwJUU5JTg3JThGJUU1JThDJTk2JTBBcGlwZWxpbmVfcXVhbnRfY29uZmlnJTIwJTNEJTIwUGlwZWxpbmVRdWFudGl6YXRpb25Db25maWcoJTBBJTIwJTIwJTIwJTIwcXVhbnRfYmFja2VuZCUzRCUyMmJpdHNhbmRieXRlc180Yml0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRfa3dhcmdzJTNEJTdCJTIybG9hZF9pbl80Yml0JTIyJTNBJTIwVHJ1ZSUyQyUyMCUyMmJuYl80Yml0X3F1YW50X3R5cGUlMjIlM0ElMjAlMjJuZjQlMjIlMkMlMjAlMjJibmJfNGJpdF9jb21wdXRlX2R0eXBlJTIyJTNBJTIwdG9yY2guYmZsb2F0MTYlN0QlMkMlMEElMjAlMjAlMjAlMjBjb21wb25lbnRzX3RvX3F1YW50aXplJTNEJTVCJTIydHJhbnNmb3JtZXIlMjIlMkMlMjAlMjJ0ZXh0X2VuY29kZXJfMiUyMiU1RCUyQyUwQSklMEFwaXBlbGluZSUyMCUzRCUyMERpZmZ1c2lvblBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEcGlwZWxpbmVfcXVhbnRfY29uZmlnJTJDJTBBJTIwJTIwJTIwJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUyQyUwQSkudG8oJTIyY3VkYSUyMiklMEElMEElMjMlMjAlRTYlQTglQTElRTUlOUUlOEIlMjBDUFUlMjAlRTUlOEQlQjglRTglQkQlQkQlMEFwaXBlbGluZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQSUyMyUyMCVFNyVCQyU5NiVFOCVBRiU5MSUwQXBpcGVsaW5lLnRyYW5zZm9ybWVyLmNvbXBpbGUoKSUwQXBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMCUyMmNpbmVtYXRpYyUyMGZpbG0lMjBzdGlsbCUyMG9mJTIwYSUyMGNhdCUyMHNpcHBpbmclMjBhJTIwbWFyZ2FyaXRhJTIwaW4lMjBhJTIwcG9vbCUyMGluJTIwUGFsbSUyMFNwcmluZ3MlMkMlMjBDYWxpZm9ybmlhJTJDJTIwaGlnaGx5JTIwZGV0YWlsZWQlMkMlMjBoaWdoJTIwYnVkZ2V0JTIwaG9sbHl3b29kJTIwbW92aWUlMkMlMjBjaW5lbWFzY29wZSUyQyUyMG1vb2R5JTJDJTIwZXBpYyUyQyUyMGdvcmdlb3VzJTJDJTIwZmlsbSUyMGdyYWluJTIyJTBBKS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| torch._dynamo.config.cache_size_limit = <span class="hljs-number">1000</span> | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder_2"</span>], | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 模型 CPU 卸载</span> | |
| pipeline.enable_model_cpu_offload() | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>() | |
| pipeline( | |
| <span class="hljs-string">"cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain"</span> | |
| ).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){e=r("p"),e.innerHTML=J,o=m(),d(i.$$.fragment)},l(p){e=u(p,"P",{"data-svelte-h":!0}),U(e)!=="svelte-4ay8nf"&&(e.innerHTML=J),o=M(p),f(i.$$.fragment,p)},m(p,y){a(p,e,y),a(p,o,y),b(i,p,y),n=!0},p:ml,i(p){n||(w(i.$$.fragment,p),n=!0)},o(p){h(i.$$.fragment,p),n=!1},d(p){p&&(s(e),s(o)),j(i,p)}}}function Yl(Z){let e,J='<a href="./memory#group-offloading">组卸载</a> 将单个管道组件(如变换器模型)的内部层移动到 GPU 进行计算,并在不需要时将其卸载。同时,它使用 <a href="./memory#cuda-stream">CUDA 流</a> 功能来预取下一层以执行。',o,i,n="通过重叠计算和数据传输,它比模型 CPU 卸载更快,同时还能节省内存。",p,y,B;return y=new ol({props:{code:"JTIzJTIwcGlwJTIwaW5zdGFsbCUyMGZ0ZnklMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBBdXRvTW9kZWwlMkMlMjBEaWZmdXNpb25QaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMuaG9va3MlMjBpbXBvcnQlMjBhcHBseV9ncm91cF9vZmZsb2FkaW5nJTBBZnJvbSUyMGRpZmZ1c2Vycy51dGlscyUyMGltcG9ydCUyMGV4cG9ydF90b192aWRlbyUwQWZyb20lMjBkaWZmdXNlcnMucXVhbnRpemVycyUyMGltcG9ydCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFVNVDVFbmNvZGVyTW9kZWwlMEElMEF0b3JjaC5fZHluYW1vLmNvbmZpZy5jYWNoZV9zaXplX2xpbWl0JTIwJTNEJTIwMTAwMCUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhcHR1cmVfZHluYW1pY19vdXRwdXRfc2hhcGVfb3BzJTIwJTNEJTIwVHJ1ZSUwQSUwQSUyMyUyMCVFOSU4NyU4RiVFNSU4QyU5NiUwQXBpcGVsaW5lX3F1YW50X2NvbmZpZyUyMCUzRCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHF1YW50X2JhY2tlbmQlM0QlMjJiaXRzYW5kYnl0ZXNfNGJpdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50X2t3YXJncyUzRCU3QiUyMmxvYWRfaW5fNGJpdCUyMiUzQSUyMFRydWUlMkMlMjAlMjJibmJfNGJpdF9xdWFudF90eXBlJTIyJTNBJTIwJTIybmY0JTIyJTJDJTIwJTIyYm5iXzRiaXRfY29tcHV0ZV9kdHlwZSUyMiUzQSUyMHRvcmNoLmJmbG9hdDE2JTdEJTJDJTBBJTIwJTIwJTIwJTIwY29tcG9uZW50c190b19xdWFudGl6ZSUzRCU1QiUyMnRyYW5zZm9ybWVyJTIyJTJDJTIwJTIydGV4dF9lbmNvZGVyJTIyJTVEJTJDJTBBKSUwQSUwQXRleHRfZW5jb2RlciUyMCUzRCUyMFVNVDVFbmNvZGVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMldhbi1BSSUyRldhbjIuMS1UMlYtMTRCLURpZmZ1c2VycyUyMiUyQyUyMHN1YmZvbGRlciUzRCUyMnRleHRfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpJTBBcGlwZWxpbmUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyV2FuLUFJJTJGV2FuMi4xLVQyVi0xNEItRGlmZnVzZXJzJTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRHBpcGVsaW5lX3F1YW50X2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU3JUJCJTg0JUU1JThEJUI4JUU4JUJEJUJEJTBBb25sb2FkX2RldmljZSUyMCUzRCUyMHRvcmNoLmRldmljZSglMjJjdWRhJTIyKSUwQW9mZmxvYWRfZGV2aWNlJTIwJTNEJTIwdG9yY2guZGV2aWNlKCUyMmNwdSUyMiklMEElMEFwaXBlbGluZS50cmFuc2Zvcm1lci5lbmFibGVfZ3JvdXBfb2ZmbG9hZCglMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQXBpcGVsaW5lLnZhZS5lbmFibGVfZ3JvdXBfb2ZmbG9hZCglMEElMjAlMjAlMjAlMjBvbmxvYWRfZGV2aWNlJTNEb25sb2FkX2RldmljZSUyQyUwQSUyMCUyMCUyMCUyMG9mZmxvYWRfZGV2aWNlJTNEb2ZmbG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQWFwcGx5X2dyb3VwX29mZmxvYWRpbmcoJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUudGV4dF9lbmNvZGVyJTJDJTBBJTIwJTIwJTIwJTIwb25sb2FkX2RldmljZSUzRG9ubG9hZF9kZXZpY2UlMkMlMEElMjAlMjAlMjAlMjBvZmZsb2FkX3R5cGUlM0QlMjJsZWFmX2xldmVsJTIyJTJDJTBBJTIwJTIwJTIwJTIwdXNlX3N0cmVhbSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBub25fYmxvY2tpbmclM0RUcnVlJTBBKSUwQSUwQSUyMyUyMCVFNyVCQyU5NiVFOCVBRiU5MSUwQXBpcGVsaW5lLnRyYW5zZm9ybWVyLmNvbXBpbGUoKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMiUyMiUyMiUwQVRoZSUyMGNhbWVyYSUyMHJ1c2hlcyUyMGZyb20lMjBmYXIlMjB0byUyMG5lYXIlMjBpbiUyMGElMjBsb3ctYW5nbGUlMjBzaG90JTJDJTIwJTBBcmV2ZWFsaW5nJTIwYSUyMHdoaXRlJTIwZmVycmV0JTIwb24lMjBhJTIwbG9nLiUyMEl0JTIwcGxheXMlMkMlMjBsZWFwcyUyMGludG8lMjB0aGUlMjB3YXRlciUyQyUyMGFuZCUyMGVtZXJnZXMlMkMlMjBhcyUyMHRoZSUyMGNhbWVyYSUyMHpvb21zJTIwaW4lMjAlMEFmb3IlMjBhJTIwY2xvc2UtdXAuJTIwV2F0ZXIlMjBzcGxhc2hlcyUyMGJlcnJ5JTIwYnVzaGVzJTIwbmVhcmJ5JTJDJTIwd2hpbGUlMjBtb3NzJTJDJTIwc25vdyUyQyUyMGFuZCUyMGxlYXZlcyUyMGJsYW5rZXQlMjB0aGUlMjBncm91bmQuJTIwJTBBQmlyY2glMjB0cmVlcyUyMGFuZCUyMGElMjBsaWdodCUyMGJsdWUlMjBza3klMjBmcmFtZSUyMHRoZSUyMHNjZW5lJTJDJTIwd2l0aCUyMGZlcm5zJTIwaW4lMjB0aGUlMjBmb3JlZ3JvdW5kLiUyMFNpZGUlMjBsaWdodGluZyUyMGNhc3RzJTIwZHluYW1pYyUyMCUwQXNoYWRvd3MlMjBhbmQlMjB3YXJtJTIwaGlnaGxpZ2h0cy4lMjBNZWRpdW0lMjBjb21wb3NpdGlvbiUyQyUyMGZyb250JTIwdmlldyUyQyUyMGxvdyUyMGFuZ2xlJTJDJTIwd2l0aCUyMGRlcHRoJTIwb2YlMjBmaWVsZC4lMEElMjIlMjIlMjIlMEFuZWdhdGl2ZV9wcm9tcHQlMjAlM0QlMjAlMjIlMjIlMjIlMEFCcmlnaHQlMjB0b25lcyUyQyUyMG92ZXJleHBvc2VkJTJDJTIwc3RhdGljJTJDJTIwYmx1cnJlZCUyMGRldGFpbHMlMkMlMjBzdWJ0aXRsZXMlMkMlMjBzdHlsZSUyQyUyMHdvcmtzJTJDJTIwcGFpbnRpbmdzJTJDJTIwaW1hZ2VzJTJDJTIwc3RhdGljJTJDJTIwb3ZlcmFsbCUyMGdyYXklMkMlMjB3b3JzdCUyMHF1YWxpdHklMkMlMjAlMEFsb3clMjBxdWFsaXR5JTJDJTIwSlBFRyUyMGNvbXByZXNzaW9uJTIwcmVzaWR1ZSUyQyUyMHVnbHklMkMlMjBpbmNvbXBsZXRlJTJDJTIwZXh0cmElMjBmaW5nZXJzJTJDJTIwcG9vcmx5JTIwZHJhd24lMjBoYW5kcyUyQyUyMHBvb3JseSUyMGRyYXduJTIwZmFjZXMlMkMlMjBkZWZvcm1lZCUyQyUyMGRpc2ZpZ3VyZWQlMkMlMjAlMEFtaXNzaGFwZW4lMjBsaW1icyUyQyUyMGZ1c2VkJTIwZmluZ2VycyUyQyUyMHN0aWxsJTIwcGljdHVyZSUyQyUyMG1lc3N5JTIwYmFja2dyb3VuZCUyQyUyMHRocmVlJTIwbGVncyUyQyUyMG1hbnklMjBwZW9wbGUlMjBpbiUyMHRoZSUyMGJhY2tncm91bmQlMkMlMjB3YWxraW5nJTIwYmFja3dhcmRzJTBBJTIyJTIyJTIyJTBBJTBBb3V0cHV0JTIwJTNEJTIwcGlwZWxpbmUoJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbmVnYXRpdmVfcHJvbXB0JTNEbmVnYXRpdmVfcHJvbXB0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2ZyYW1lcyUzRDgxJTJDJTBBJTIwJTIwJTIwJTIwZ3VpZGFuY2Vfc2NhbGUlM0Q1LjAlMkMlMEEpLmZyYW1lcyU1QjAlNUQlMEFleHBvcnRfdG9fdmlkZW8ob3V0cHV0JTJDJTIwJTIyb3V0cHV0Lm1wNCUyMiUyQyUyMGZwcyUzRDE2KQ==",highlighted:`<span class="hljs-comment"># pip install ftfy</span> | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoModel, DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.hooks <span class="hljs-keyword">import</span> apply_group_offloading | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> UMT5EncoderModel | |
| torch._dynamo.config.cache_size_limit = <span class="hljs-number">1000</span> | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder"</span>], | |
| ) | |
| text_encoder = UMT5EncoderModel.from_pretrained( | |
| <span class="hljs-string">"Wan-AI/Wan2.1-T2V-14B-Diffusers"</span>, subfolder=<span class="hljs-string">"text_encoder"</span>, torch_dtype=torch.bfloat16 | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"Wan-AI/Wan2.1-T2V-14B-Diffusers"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 组卸载</span> | |
| onload_device = torch.device(<span class="hljs-string">"cuda"</span>) | |
| offload_device = torch.device(<span class="hljs-string">"cpu"</span>) | |
| pipeline.transformer.enable_group_offload( | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| pipeline.vae.enable_group_offload( | |
| onload_device=onload_device, | |
| offload_device=offload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| apply_group_offloading( | |
| pipeline.text_encoder, | |
| onload_device=onload_device, | |
| offload_type=<span class="hljs-string">"leaf_level"</span>, | |
| use_stream=<span class="hljs-literal">True</span>, | |
| non_blocking=<span class="hljs-literal">True</span> | |
| ) | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>() | |
| prompt = <span class="hljs-string">""" | |
| The camera rushes from far to near in a low-angle shot, | |
| revealing a white ferret on a log. It plays, leaps into the water, and emerges, as the camera zooms in | |
| for a close-up. Water splashes berry bushes nearby, while moss, snow, and leaves blanket the ground. | |
| Birch trees and a light blue sky frame the scene, with ferns in the foreground. Side lighting casts dynamic | |
| shadows and warm highlights. Medium composition, front view, low angle, with depth of field. | |
| """</span> | |
| negative_prompt = <span class="hljs-string">""" | |
| Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, | |
| low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, | |
| misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards | |
| """</span> | |
| output = pipeline( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| num_frames=<span class="hljs-number">81</span>, | |
| guidance_scale=<span class="hljs-number">5.0</span>, | |
| ).frames[<span class="hljs-number">0</span>] | |
| export_to_video(output, <span class="hljs-string">"output.mp4"</span>, fps=<span class="hljs-number">16</span>)`,wrap:!1}}),{c(){e=r("p"),e.innerHTML=J,o=m(),i=r("p"),i.textContent=n,p=m(),d(y.$$.fragment)},l(c){e=u(c,"P",{"data-svelte-h":!0}),U(e)!=="svelte-xfxsnk"&&(e.innerHTML=J),o=M(c),i=u(c,"P",{"data-svelte-h":!0}),U(i)!=="svelte-iejgsl"&&(i.textContent=n),p=M(c),f(y.$$.fragment,c)},m(c,T){a(c,e,T),a(c,o,T),a(c,i,T),a(c,p,T),b(y,c,T),B=!0},p:ml,i(c){B||(w(y.$$.fragment,c),B=!0)},o(c){h(y.$$.fragment,c),B=!1},d(c){c&&(s(e),s(o),s(i),s(p)),j(y,c)}}}function Ql(Z){let e,J,o,i;return e=new Zl({props:{id:"offloading",option:"model CPU offloading",$$slots:{default:[Cl]},$$scope:{ctx:Z}}}),o=new Zl({props:{id:"offloading",option:"group offloading",$$slots:{default:[Yl]},$$scope:{ctx:Z}}}),{c(){d(e.$$.fragment),J=m(),d(o.$$.fragment)},l(n){f(e.$$.fragment,n),J=M(n),f(o.$$.fragment,n)},m(n,p){b(e,n,p),a(n,J,p),b(o,n,p),i=!0},p(n,p){const y={};p&2&&(y.$$scope={dirty:p,ctx:n}),e.$set(y);const B={};p&2&&(B.$$scope={dirty:p,ctx:n}),o.$set(B)},i(n){i||(w(e.$$.fragment,n),w(o.$$.fragment,n),i=!0)},o(n){h(e.$$.fragment,n),h(o.$$.fragment,n),i=!1},d(n){n&&s(J),j(e,n),j(o,n)}}}function Fl(Z){let e,J,o,i,n,p,y,B='优化模型通常涉及<a href="./fp16">推理速度</a>和<a href="./memory">内存使用</a>之间的权衡。例如,虽然<a href="./cache">缓存</a>可以提高推理速度,但它也会增加内存消耗,因为它需要存储中间注意力层的输出。一种更平衡的优化策略结合了量化模型、<a href="./fp16#torchcompile">torch.compile</a> 和各种<a href="./memory#offloading">卸载方法</a>。',c,T,E,I,Ml='对于图像生成,结合量化和<a href="./memory#model-offloading">模型卸载</a>通常可以在质量、速度和内存之间提供最佳权衡。组卸载对于图像生成效果不佳,因为如果计算内核更快完成,通常不可能<em>完全</em>重叠数据传输。这会导致 CPU 和 GPU 之间的一些通信开销。',z,W,cl='对于视频生成,结合量化和<a href="./memory#group-offloading">组卸载</a>往往更好,因为视频模型更受计算限制。',S,_,yl="下表提供了优化策略组合及其对 Flux 延迟和内存使用的影响的比较。",H,X,Jl="<thead><tr><th>组合</th> <th>延迟 (s)</th> <th>内存使用 (GB)</th></tr></thead> <tbody><tr><td>量化</td> <td>32.602</td> <td>14.9453</td></tr> <tr><td>量化, torch.compile</td> <td>25.847</td> <td>14.9448</td></tr> <tr><td>量化, torch.compile, 模型 CPU 卸载</td> <td>32.312</td> <td>12.2369</td></tr></tbody>",x,g,rl="这些结果是在 Flux 上使用 RTX 4090 进行基准测试的。transformer 和 text_encoder 组件已量化。如果您有兴趣评估自己的模型,请参考[基准测试脚本](https://gist.github.com/sayakpaul/0db9d8eeeb3d2a0e5ed7cf0d9ca19b7d)。",A,V,ul='本指南将向您展示如何使用 <a href="../quantization/bitsandbytes#torchcompile">bitsandbytes</a> 编译和卸载量化模型。确保您正在使用 <a href="https://pytorch.org/get-started/locally/" rel="nofollow">PyTorch nightly</a> 和最新版本的 bitsandbytes。',D,v,L,R,P,C,Ul='首先通过<a href="../quantization/overview">量化</a>模型来减少存储所需的内存,并<a href="./fp16#torchcompile">编译</a>它以加速推理。',K,Y,Tl='配置 <a href="https://docs.pytorch.org/docs/stable/torch.compiler_dynamo_overview.html" rel="nofollow">Dynamo</a> <code>capture_dynamic_output_shape_ops = True</code> 以在编译 bitsandbytes 模型时处理动态输出。',O,Q,ll,F,tl,k,dl="除了量化和 torch.compile,如果您需要进一步减少内存使用,可以尝试卸载。卸载根据需要将各种层或模型组件从 CPU 移动到 GPU 进行计算。",sl,N,fl='在卸载期间配置 <a href="https://docs.pytorch.org/docs/stable/torch.compiler_dynamo_overview.html" rel="nofollow">Dynamo</a> <code>cache_size_limit</code> 以避免过多的重新编译,并设置 <code>capture_dynamic_output_shape_ops = True</code> 以在编译 bitsandbytes 模型时处理动态输出。',el,G,al,$,nl,q,pl;return n=new il({props:{title:"编译和卸载量化模型",local:"编译和卸载量化模型",headingTag:"h1"}}),T=new gl({props:{warning:!1,$$slots:{default:[Rl]},$$scope:{ctx:Z}}}),v=new ol({props:{code:"cGlwJTIwaW5zdGFsbCUyMC1VJTIwYml0c2FuZGJ5dGVz",highlighted:"pip install -U bitsandbytes",wrap:!1}}),R=new il({props:{title:"量化和 torch.compile",local:"量化和-torchcompile",headingTag:"h2"}}),Q=new ol({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRGlmZnVzaW9uUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnF1YW50aXplcnMlMjBpbXBvcnQlMjBQaXBlbGluZVF1YW50aXphdGlvbkNvbmZpZyUwQSUwQXRvcmNoLl9keW5hbW8uY29uZmlnLmNhcHR1cmVfZHluYW1pY19vdXRwdXRfc2hhcGVfb3BzJTIwJTNEJTIwVHJ1ZSUwQSUwQSUyMyUyMCVFOSU4NyU4RiVFNSU4QyU5NiUwQXBpcGVsaW5lX3F1YW50X2NvbmZpZyUyMCUzRCUyMFBpcGVsaW5lUXVhbnRpemF0aW9uQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHF1YW50X2JhY2tlbmQlM0QlMjJiaXRzYW5kYnl0ZXNfNGJpdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50X2t3YXJncyUzRCU3QiUyMmxvYWRfaW5fNGJpdCUyMiUzQSUyMFRydWUlMkMlMjAlMjJibmJfNGJpdF9xdWFudF90eXBlJTIyJTNBJTIwJTIybmY0JTIyJTJDJTIwJTIyYm5iXzRiaXRfY29tcHV0ZV9kdHlwZSUyMiUzQSUyMHRvcmNoLmJmbG9hdDE2JTdEJTJDJTBBJTIwJTIwJTIwJTIwY29tcG9uZW50c190b19xdWFudGl6ZSUzRCU1QiUyMnRyYW5zZm9ybWVyJTIyJTJDJTIwJTIydGV4dF9lbmNvZGVyXzIlMjIlNUQlMkMlMEEpJTBBcGlwZWxpbmUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyYmxhY2stZm9yZXN0LWxhYnMlMkZGTFVYLjEtZGV2JTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRHBpcGVsaW5lX3F1YW50X2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMkMlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU3JUJDJTk2JUU4JUFGJTkxJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIudG8obWVtb3J5X2Zvcm1hdCUzRHRvcmNoLmNoYW5uZWxzX2xhc3QpJTBBcGlwZWxpbmUudHJhbnNmb3JtZXIuY29tcGlsZShtb2RlJTNEJTIybWF4LWF1dG90dW5lJTIyJTJDJTIwZnVsbGdyYXBoJTNEVHJ1ZSklMEFwaXBlbGluZSglMjIlMjIlMjIlMEElMjAlMjAlMjAlMjBjaW5lbWF0aWMlMjBmaWxtJTIwc3RpbGwlMjBvZiUyMGElMjBjYXQlMjBzaXBwaW5nJTIwYSUyMG1hcmdhcml0YSUyMGluJTIwYSUyMHBvb2wlMjBpbiUyMFBhbG0lMjBTcHJpbmdzJTJDJTIwQ2FsaWZvcm5pYSUwQSUyMCUyMCUyMCUyMGhpZ2hseSUyMGRldGFpbGVkJTJDJTIwaGlnaCUyMGJ1ZGdldCUyMGhvbGx5d29vZCUyMG1vdmllJTJDJTIwY2luZW1hc2NvcGUlMkMlMjBtb29keSUyQyUyMGVwaWMlMkMlMjBnb3JnZW91cyUyQyUyMGZpbG0lMjBncmFpbiUwQSUyMiUyMiUyMiUwQSkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.quantizers <span class="hljs-keyword">import</span> PipelineQuantizationConfig | |
| torch._dynamo.config.capture_dynamic_output_shape_ops = <span class="hljs-literal">True</span> | |
| <span class="hljs-comment"># 量化</span> | |
| pipeline_quant_config = PipelineQuantizationConfig( | |
| quant_backend=<span class="hljs-string">"bitsandbytes_4bit"</span>, | |
| quant_kwargs={<span class="hljs-string">"load_in_4bit"</span>: <span class="hljs-literal">True</span>, <span class="hljs-string">"bnb_4bit_quant_type"</span>: <span class="hljs-string">"nf4"</span>, <span class="hljs-string">"bnb_4bit_compute_dtype"</span>: torch.bfloat16}, | |
| components_to_quantize=[<span class="hljs-string">"transformer"</span>, <span class="hljs-string">"text_encoder_2"</span>], | |
| ) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| quantization_config=pipeline_quant_config, | |
| torch_dtype=torch.bfloat16, | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 编译</span> | |
| pipeline.transformer.to(memory_format=torch.channels_last) | |
| pipeline.transformer.<span class="hljs-built_in">compile</span>(mode=<span class="hljs-string">"max-autotune"</span>, fullgraph=<span class="hljs-literal">True</span>) | |
| pipeline(<span class="hljs-string">""" | |
| cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California | |
| highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain | |
| """</span> | |
| ).images[<span class="hljs-number">0</span>]`,wrap:!1}}),F=new il({props:{title:"量化、torch.compile 和卸载",local:"量化torchcompile-和卸载",headingTag:"h2"}}),G=new vl({props:{id:"offloading",options:["model CPU offloading","group offloading"],$$slots:{default:[Ql]},$$scope:{ctx:Z}}}),$=new Vl({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/optimization/speed-memory-optims.md"}}),{c(){e=r("meta"),J=m(),o=r("p"),i=m(),d(n.$$.fragment),p=m(),y=r("p"),y.innerHTML=B,c=m(),d(T.$$.fragment),E=m(),I=r("p"),I.innerHTML=Ml,z=m(),W=r("p"),W.innerHTML=cl,S=m(),_=r("p"),_.textContent=yl,H=m(),X=r("table"),X.innerHTML=Jl,x=m(),g=r("small"),g.textContent=rl,A=m(),V=r("p"),V.innerHTML=ul,D=m(),d(v.$$.fragment),L=m(),d(R.$$.fragment),P=m(),C=r("p"),C.innerHTML=Ul,K=m(),Y=r("p"),Y.innerHTML=Tl,O=m(),d(Q.$$.fragment),ll=m(),d(F.$$.fragment),tl=m(),k=r("p"),k.textContent=dl,sl=m(),N=r("p"),N.innerHTML=fl,el=m(),d(G.$$.fragment),al=m(),d($.$$.fragment),nl=m(),q=r("p"),this.h()},l(l){const t=_l("svelte-u9bgzb",document.head);e=u(t,"META",{name:!0,content:!0}),t.forEach(s),J=M(l),o=u(l,"P",{}),hl(o).forEach(s),i=M(l),f(n.$$.fragment,l),p=M(l),y=u(l,"P",{"data-svelte-h":!0}),U(y)!=="svelte-181wuip"&&(y.innerHTML=B),c=M(l),f(T.$$.fragment,l),E=M(l),I=u(l,"P",{"data-svelte-h":!0}),U(I)!=="svelte-15a734g"&&(I.innerHTML=Ml),z=M(l),W=u(l,"P",{"data-svelte-h":!0}),U(W)!=="svelte-12155we"&&(W.innerHTML=cl),S=M(l),_=u(l,"P",{"data-svelte-h":!0}),U(_)!=="svelte-zqzew3"&&(_.textContent=yl),H=M(l),X=u(l,"TABLE",{"data-svelte-h":!0}),U(X)!=="svelte-yb0epc"&&(X.innerHTML=Jl),x=M(l),g=u(l,"SMALL",{"data-svelte-h":!0}),U(g)!=="svelte-moa23m"&&(g.textContent=rl),A=M(l),V=u(l,"P",{"data-svelte-h":!0}),U(V)!=="svelte-jsp2de"&&(V.innerHTML=ul),D=M(l),f(v.$$.fragment,l),L=M(l),f(R.$$.fragment,l),P=M(l),C=u(l,"P",{"data-svelte-h":!0}),U(C)!=="svelte-9fcnzw"&&(C.innerHTML=Ul),K=M(l),Y=u(l,"P",{"data-svelte-h":!0}),U(Y)!=="svelte-5t95ee"&&(Y.innerHTML=Tl),O=M(l),f(Q.$$.fragment,l),ll=M(l),f(F.$$.fragment,l),tl=M(l),k=u(l,"P",{"data-svelte-h":!0}),U(k)!=="svelte-14h7pc0"&&(k.textContent=dl),sl=M(l),N=u(l,"P",{"data-svelte-h":!0}),U(N)!=="svelte-gy5ew7"&&(N.innerHTML=fl),el=M(l),f(G.$$.fragment,l),al=M(l),f($.$$.fragment,l),nl=M(l),q=u(l,"P",{}),hl(q).forEach(s),this.h()},h(){jl(e,"name","hf:doc:metadata"),jl(e,"content",kl)},m(l,t){Xl(document.head,e),a(l,J,t),a(l,o,t),a(l,i,t),b(n,l,t),a(l,p,t),a(l,y,t),a(l,c,t),b(T,l,t),a(l,E,t),a(l,I,t),a(l,z,t),a(l,W,t),a(l,S,t),a(l,_,t),a(l,H,t),a(l,X,t),a(l,x,t),a(l,g,t),a(l,A,t),a(l,V,t),a(l,D,t),b(v,l,t),a(l,L,t),b(R,l,t),a(l,P,t),a(l,C,t),a(l,K,t),a(l,Y,t),a(l,O,t),b(Q,l,t),a(l,ll,t),b(F,l,t),a(l,tl,t),a(l,k,t),a(l,sl,t),a(l,N,t),a(l,el,t),b(G,l,t),a(l,al,t),b($,l,t),a(l,nl,t),a(l,q,t),pl=!0},p(l,[t]){const bl={};t&2&&(bl.$$scope={dirty:t,ctx:l}),T.$set(bl);const wl={};t&2&&(wl.$$scope={dirty:t,ctx:l}),G.$set(wl)},i(l){pl||(w(n.$$.fragment,l),w(T.$$.fragment,l),w(v.$$.fragment,l),w(R.$$.fragment,l),w(Q.$$.fragment,l),w(F.$$.fragment,l),w(G.$$.fragment,l),w($.$$.fragment,l),pl=!0)},o(l){h(n.$$.fragment,l),h(T.$$.fragment,l),h(v.$$.fragment,l),h(R.$$.fragment,l),h(Q.$$.fragment,l),h(F.$$.fragment,l),h(G.$$.fragment,l),h($.$$.fragment,l),pl=!1},d(l){l&&(s(J),s(o),s(i),s(p),s(y),s(c),s(E),s(I),s(z),s(W),s(S),s(_),s(H),s(X),s(x),s(g),s(A),s(V),s(D),s(L),s(P),s(C),s(K),s(Y),s(O),s(ll),s(tl),s(k),s(sl),s(N),s(el),s(al),s(nl),s(q)),s(e),j(n,l),j(T,l),j(v,l),j(R,l),j(Q,l),j(F,l),j(G,l),j($,l)}}}const kl='{"title":"编译和卸载量化模型","local":"编译和卸载量化模型","sections":[{"title":"量化和 torch.compile","local":"量化和-torchcompile","sections":[],"depth":2},{"title":"量化、torch.compile 和卸载","local":"量化torchcompile-和卸载","sections":[],"depth":2}],"depth":1}';function Nl(Z){return Gl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class xl extends Il{constructor(e){super(),Wl(this,e,Nl,Fl,Bl,{})}}export{xl as component}; | |
Xet Storage Details
- Size:
- 27.2 kB
- Xet hash:
- 3e3c86154f3aadbd3b8005739aca9759324e98cd275c57cbc5290ef5b604dd9b
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.