Buckets:
| import{s as Pt,o as Ot,n as $t}from"../chunks/scheduler.5c93273d.js";import{S as Dt,i as Kt,g as m,s as r,r as h,A as tl,h as d,f as e,c as o,j as qt,u as f,x as U,k as O,y as ll,a as s,v as T,d as M,t as V,w as j}from"../chunks/index.e43dd92b.js";import{T as At}from"../chunks/Tip.1cbfe904.js";import{C as K}from"../chunks/CodeBlock.6896320e.js";import{H as D,E as el}from"../chunks/getInferenceSnippets.22672bbf.js";import{H as sl,a as zt}from"../chunks/HfOption.d50154c3.js";function al(y){let a,u=`参考 <a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html" rel="nofollow">Pruna 优化</a> 文档以了解更多关于该操作的信息。 | |
| 本示例中使用的优化技术。`;return{c(){a=m("p"),a.innerHTML=u},l(p){a=d(p,"P",{"data-svelte-h":!0}),U(a)!=="svelte-hynay3"&&(a.innerHTML=u)},m(p,c){s(p,a,c)},p:$t,d(p){p&&e(a)}}}function nl(y){let a,u="我们可以通过使用<code>EvaluationAgent</code>加载和评估优化后的模型,并将其传递给<code>Task</code>。",p,c,i;return c=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUwQWZyb20lMjBwcnVuYS5kYXRhLnBydW5hX2RhdGFtb2R1bGUlMjBpbXBvcnQlMjBQcnVuYURhdGFNb2R1bGUlMEFmcm9tJTIwcHJ1bmEuZXZhbHVhdGlvbi5ldmFsdWF0aW9uX2FnZW50JTIwaW1wb3J0JTIwRXZhbHVhdGlvbkFnZW50JTBBZnJvbSUyMHBydW5hLmV2YWx1YXRpb24ubWV0cmljcyUyMGltcG9ydCUyMCglMEElMjAlMjAlMjAlMjBUaHJvdWdocHV0TWV0cmljJTJDJTBBJTIwJTIwJTIwJTIwVG9yY2hNZXRyaWNXcmFwcGVyJTJDJTBBJTIwJTIwJTIwJTIwVG90YWxUaW1lTWV0cmljJTJDJTBBKSUwQWZyb20lMjBwcnVuYS5ldmFsdWF0aW9uLnRhc2slMjBpbXBvcnQlMjBUYXNrJTBBJTBBJTIzJTIwZGVmaW5lJTIwdGhlJTIwZGV2aWNlJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUyMiUyMGlmJTIwdG9yY2guY3VkYS5pc19hdmFpbGFibGUoKSUyMGVsc2UlMjAlMjJtcHMlMjIlMjBpZiUyMHRvcmNoLmJhY2tlbmRzLm1wcy5pc19hdmFpbGFibGUoKSUyMGVsc2UlMjAlMjJjcHUlMjIlMEElMEElMjMlMjAlRTUlOEElQTAlRTglQkQlQkQlRTYlQTglQTElRTUlOUUlOEIlMEElMjMlMjAlRTQlQkQlQkYlRTclOTQlQTglRTUlQjAlOEZHUFUlRTUlODYlODUlRTUlQUQlOTglRTUlQjAlOUQlRTglQUYlOTUlMjBQcnVuYUFJJTJGU2VnbWluZC1WZWdhLXNtYXNoZWQlMjAlRTYlODglOTYlMjBQcnVuYUFJJTJGRkxVWC4xLWRldi1zbWFzaGVkJTBBc21hc2hlZF9waXBlJTIwJTNEJTIwUHJ1bmFNb2RlbC5mcm9tX2h1YiglMjJQcnVuYUFJJTJGRkxVWC4xLWRldi1zbWFzaGVkJTIyKSUwQSUwQSUyMyUyMCVFNSVBRSU5QSVFNCVCOSU4OSVFNiU4QyU4NyVFNiVBMCU4NyUwQW1ldHJpY3MlMjAlM0QlMjAlNUIlMEElMjAlMjAlMjAlMjBUb3RhbFRpbWVNZXRyaWMobl9pdGVyYXRpb25zJTNEMjAlMkMlMjBuX3dhcm11cF9pdGVyYXRpb25zJTNENSklMkMlMEElMjAlMjAlMjAlMjBUaHJvdWdocHV0TWV0cmljKG5faXRlcmF0aW9ucyUzRDIwJTJDJTIwbl93YXJtdXBfaXRlcmF0aW9ucyUzRDUpJTJDJTBBJTIwJTIwJTIwJTIwVG9yY2hNZXRyaWNXcmFwcGVyKCUyMmNsaXAlMjIpJTJDJTBBJTVEJTBBJTBBJTIzJTIwJUU1JUFFJTlBJUU0JUI5JTg5JUU2JTk1JUIwJUU2JThEJUFFJUU2JUE4JUExJUU1JTlEJTk3JTBBZGF0YW1vZHVsZSUyMCUzRCUyMFBydW5hRGF0YU1vZHVsZS5mcm9tX3N0cmluZyglMjJMQUlPTjI1NiUyMiklMEFkYXRhbW9kdWxlLmxpbWl0X2RhdGFzZXRzKDEwKSUwQSUwQSUyMyUyMCVFNSVBRSU5QSVFNCVCOSU4OSVFNCVCQiVCQiVFNSU4QSVBMSVFNSU5MiU4QyVFOCVBRiU4NCVFNCVCQyVCMCVFNCVCQiVBMyVFNyU5MCU4NiUwQXRhc2slMjAlM0QlMjBUYXNrKG1ldHJpY3MlMkMlMjBkYXRhbW9kdWxlJTNEZGF0YW1vZHVsZSUyQyUyMGRldmljZSUzRGRldmljZSklMEFldmFsX2FnZW50JTIwJTNEJTIwRXZhbHVhdGlvbkFnZW50KHRhc2spJTBBJTBBJTIzJTIwJUU4JUFGJTg0JUU0JUJDJUIwJUU0JUJDJTk4JUU1JThDJTk2JUU2JUE4JUExJUU1JTlFJThCJUU1JUI5JUI2JUU1JThEJUI4JUU4JUJEJUJEJUU1JTg4JUIwQ1BVJTBBc21hc2hlZF9waXBlLm1vdmVfdG9fZGV2aWNlKGRldmljZSklMEFzbWFzaGVkX3BpcGVfcmVzdWx0cyUyMCUzRCUyMGV2YWxfYWdlbnQuZXZhbHVhdGUoc21hc2hlZF9waXBlKSUwQXNtYXNoZWRfcGlwZS5tb3ZlX3RvX2RldmljZSglMjJjcHUlMjIp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline | |
| <span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel | |
| <span class="hljs-keyword">from</span> pruna.data.pruna_datamodule <span class="hljs-keyword">import</span> PrunaDataModule | |
| <span class="hljs-keyword">from</span> pruna.evaluation.evaluation_agent <span class="hljs-keyword">import</span> EvaluationAgent | |
| <span class="hljs-keyword">from</span> pruna.evaluation.metrics <span class="hljs-keyword">import</span> ( | |
| ThroughputMetric, | |
| TorchMetricWrapper, | |
| TotalTimeMetric, | |
| ) | |
| <span class="hljs-keyword">from</span> pruna.evaluation.task <span class="hljs-keyword">import</span> Task | |
| <span class="hljs-comment"># define the device</span> | |
| device = <span class="hljs-string">"cuda"</span> <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">"mps"</span> <span class="hljs-keyword">if</span> torch.backends.mps.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">"cpu"</span> | |
| <span class="hljs-comment"># 加载模型</span> | |
| <span class="hljs-comment"># 使用小GPU内存尝试 PrunaAI/Segmind-Vega-smashed 或 PrunaAI/FLUX.1-dev-smashed</span> | |
| smashed_pipe = PrunaModel.from_hub(<span class="hljs-string">"PrunaAI/FLUX.1-dev-smashed"</span>) | |
| <span class="hljs-comment"># 定义指标</span> | |
| metrics = [ | |
| TotalTimeMetric(n_iterations=<span class="hljs-number">20</span>, n_warmup_iterations=<span class="hljs-number">5</span>), | |
| ThroughputMetric(n_iterations=<span class="hljs-number">20</span>, n_warmup_iterations=<span class="hljs-number">5</span>), | |
| TorchMetricWrapper(<span class="hljs-string">"clip"</span>), | |
| ] | |
| <span class="hljs-comment"># 定义数据模块</span> | |
| datamodule = PrunaDataModule.from_string(<span class="hljs-string">"LAION256"</span>) | |
| datamodule.limit_datasets(<span class="hljs-number">10</span>) | |
| <span class="hljs-comment"># 定义任务和评估代理</span> | |
| task = Task(metrics, datamodule=datamodule, device=device) | |
| eval_agent = EvaluationAgent(task) | |
| <span class="hljs-comment"># 评估优化模型并卸载到CPU</span> | |
| smashed_pipe.move_to_device(device) | |
| smashed_pipe_results = eval_agent.evaluate(smashed_pipe) | |
| smashed_pipe.move_to_device(<span class="hljs-string">"cpu"</span>)`,wrap:!1}}),{c(){a=m("p"),a.innerHTML=u,p=r(),h(c.$$.fragment)},l(n){a=d(n,"P",{"data-svelte-h":!0}),U(a)!=="svelte-1wfu4ax"&&(a.innerHTML=u),p=o(n),f(c.$$.fragment,n)},m(n,J){s(n,a,J),s(n,p,J),T(c,n,J),i=!0},p:$t,i(n){i||(M(c.$$.fragment,n),i=!0)},o(n){V(c.$$.fragment,n),i=!1},d(n){n&&(e(a),e(p)),j(c,n)}}}function il(y){let a,u="除了比较优化模型与基础模型,您还可以评估独立的 <code>diffusers</code> 模型。这在您想评估模型性能而不考虑优化时非常有用。我们可以通过使用 <code>PrunaModel</code> 包装器并运行 <code>EvaluationAgent</code> 来实现。",p,c,i;return c=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUwQSUwQSUyMyUyMCVFNSU4QSVBMCVFOCVCRCVCRCVFNiVBOCVBMSVFNSU5RSU4QiUwQSUyMyUyMCVFNCVCRCVCRiVFNyU5NCVBOCVFNSVCMCU4RkdQVSVFNSU4NiU4NSVFNSVBRCU5OCVFNSVCMCU5RCVFOCVBRiU5NSUyMFBydW5hQUklMkZTZWdtaW5kLVZlZ2Etc21hc2hlZCUyMCVFNiU4OCU5NiUyMFBydW5hQUklMkZGTFVYLjEtZGV2LXNtYXNoZWQlMEFwaXBlJTIwJTNEJTIwRmx1eFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKS50byglMjJjcHUlMjIpJTBBd3JhcHBlZF9waXBlJTIwJTNEJTIwUHJ1bmFNb2RlbChtb2RlbCUzRHBpcGUp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline | |
| <span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel | |
| <span class="hljs-comment"># 加载模型</span> | |
| <span class="hljs-comment"># 使用小GPU内存尝试 PrunaAI/Segmind-Vega-smashed 或 PrunaAI/FLUX.1-dev-smashed</span> | |
| pipe = FluxPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| torch_dtype=torch.bfloat16 | |
| ).to(<span class="hljs-string">"cpu"</span>) | |
| wrapped_pipe = PrunaModel(model=pipe)`,wrap:!1}}),{c(){a=m("p"),a.innerHTML=u,p=r(),h(c.$$.fragment)},l(n){a=d(n,"P",{"data-svelte-h":!0}),U(a)!=="svelte-im1gl4"&&(a.innerHTML=u),p=o(n),f(c.$$.fragment,n)},m(n,J){s(n,a,J),s(n,p,J),T(c,n,J),i=!0},p:$t,i(n){i||(M(c.$$.fragment,n),i=!0)},o(n){V(c.$$.fragment,n),i=!1},d(n){n&&(e(a),e(p)),j(c,n)}}}function pl(y){let a,u,p,c;return a=new zt({props:{id:"eval",option:"optimized model",$$slots:{default:[nl]},$$scope:{ctx:y}}}),p=new zt({props:{id:"eval",option:"standalone model",$$slots:{default:[il]},$$scope:{ctx:y}}}),{c(){h(a.$$.fragment),u=r(),h(p.$$.fragment)},l(i){f(a.$$.fragment,i),u=o(i),f(p.$$.fragment,i)},m(i,n){T(a,i,n),s(i,u,n),T(p,i,n),c=!0},p(i,n){const J={};n&2&&(J.$$scope={dirty:n,ctx:i}),a.$set(J);const z={};n&2&&(z.$$scope={dirty:n,ctx:i}),p.$set(z)},i(i){c||(M(a.$$.fragment,i),M(p.$$.fragment,i),c=!0)},o(i){V(a.$$.fragment,i),V(p.$$.fragment,i),c=!1},d(i){i&&e(u),j(a,i),j(p,i)}}}function rl(y){let a,u='有关基准测试 Flux 的更多详细信息,请查看 <a href="https://huggingface.co/blog/PrunaAI/flux-fastest-image-generation-endpoint" rel="nofollow">宣布 FLUX-Juiced:最快的图像生成端点(快 2.6 倍)!</a> 博客文章和 <a href="https://huggingface.co/spaces/PrunaAI/InferBench" rel="nofollow">InferBench</a> 空间。';return{c(){a=m("p"),a.innerHTML=u},l(p){a=d(p,"P",{"data-svelte-h":!0}),U(a)!=="svelte-1g34xc"&&(a.innerHTML=u)},m(p,c){s(p,a,c)},p:$t,d(p){p&&e(a)}}}function ol(y){let a,u,p,c,i,n,J,z='<a href="https://github.com/PrunaAI/pruna" rel="nofollow">Pruna</a> 是一个模型优化框架,提供多种优化方法——量化、剪枝、缓存、编译——以加速推理并减少内存使用。以下是优化方法的概览。',tt,B,Bt='<thead><tr><th>技术</th> <th>描述</th> <th align="center">速度</th> <th align="center">内存</th> <th align="center">质量</th></tr></thead> <tbody><tr><td><code>batcher</code></td> <td>将多个输入分组在一起同时处理,提高计算效率并减少处理时间。</td> <td align="center">✅</td> <td align="center">❌</td> <td align="center">➖</td></tr> <tr><td><code>cacher</code></td> <td>存储计算的中间结果以加速后续操作。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>compiler</code></td> <td>为特定硬件优化模型指令。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>distiller</code></td> <td>训练一个更小、更简单的模型来模仿一个更大、更复杂的模型。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>quantizer</code></td> <td>降低权重和激活的精度,减少内存需求。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>pruner</code></td> <td>移除不重要或冗余的连接和神经元,产生一个更稀疏、更高效的网络。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>recoverer</code></td> <td>在压缩后恢复模型的性能。</td> <td align="center">➖</td> <td align="center">➖</td> <td align="center">✅</td></tr> <tr><td><code>factorizer</code></td> <td>将多个小矩阵乘法批处理为一个大型融合操作。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>enhancer</code></td> <td>通过应用后处理算法(如去噪或上采样)来增强模型输出。</td> <td align="center">❌</td> <td align="center">-</td> <td align="center">✅</td></tr></tbody>',lt,N,Nt="✅ (改进), ➖ (大致相同), ❌ (恶化)",et,I,It='在 <a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html#configure-algorithms" rel="nofollow">Pruna 文档</a> 中探索所有优化方法。',st,Q,at,R,Qt="使用以下命令安装 Pruna。",nt,Z,it,v,pt,W,Rt="Diffusers 模型支持广泛的优化算法,如下所示。",rt,w,Zt='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/diffusers_combinations.png" alt="Diffusers 模型支持的优化算法概览"/>',ot,_,vt='下面的示例使用 factorizer、compiler 和 cacher 算法的组合优化 <a href="https://huggingface.co/black-forest-labs/FLUX.1-dev" rel="nofollow">black-forest-labs/FLUX.1-dev</a>。这种组合将推理速度加速高达 4.2 倍,并将峰值 GPU 内存使用从 34.7GB 减少到 28.0GB,同时几乎保持相同的输出质量。',ct,g,mt,C,Wt='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/flux_combination.png" alt="用于FLUX.1-dev的优化技术展示,结合了因子分解器、编译器和缓存器算法"/>',dt,S,_t="首先定义一个包含要使用的优化算法的<code>SmashConfig</code>。要优化模型,将管道和<code>SmashConfig</code>用<code>smash</code>包装,然后像往常一样使用管道进行推理。",ut,E,Ut,F,St='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/flux_smashed_comparison.png"/>',Jt,k,Et="优化后,我们可以使用Hugging Face Hub共享和加载优化后的模型。",ht,G,ft,X,Tt,Y,kt='Pruna提供了<a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/evaluate.html" rel="nofollow">EvaluationAgent</a>来评估优化后模型的质量。',Mt,H,Gt="我们可以定义我们关心的指标,如总时间和吞吐量,以及要评估的数据集。我们可以定义一个模型并将其传递给<code>EvaluationAgent</code>。",Vt,b,jt,x,Xt="现在您已经了解了如何优化和评估您的模型,可以开始使用 Pruna 来优化您自己的模型了。幸运的是,我们有许多示例来帮助您入门。",yt,$,wt,L,gt,q,Yt='<li><a href="https://github.com/pruna-ai/pruna" rel="nofollow">Pruna</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html#configure-algorithms" rel="nofollow">Pruna 优化</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/evaluate.html" rel="nofollow">Pruna 评估</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/tutorials/index.html" rel="nofollow">Pruna 教程</a></li>',Ct,A,Ft,P,bt;return i=new D({props:{title:"Pruna",local:"pruna",headingTag:"h1"}}),Q=new D({props:{title:"安装",local:"安装",headingTag:"h2"}}),Z=new K({props:{code:"cGlwJTIwaW5zdGFsbCUyMHBydW5h",highlighted:"pip install pruna",wrap:!1}}),v=new D({props:{title:"优化 Diffusers 模型",local:"优化-diffusers-模型",headingTag:"h2"}}),g=new At({props:{warning:!1,$$slots:{default:[al]},$$scope:{ctx:y}}}),E=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUyQyUyMFNtYXNoQ29uZmlnJTJDJTIwc21hc2glMEElMEElMjMlMjAlRTUlOEElQTAlRTglQkQlQkQlRTYlQTglQTElRTUlOUUlOEIlMEElMjMlMjAlRTQlQkQlQkYlRTclOTQlQTglRTUlQjAlOEZHUFUlRTUlODYlODUlRTUlQUQlOTglRTUlQjAlOUQlRTglQUYlOTVzZWdtaW5kJTJGU2VnbWluZC1WZWdhJUU2JTg4JTk2YmxhY2stZm9yZXN0LWxhYnMlMkZGTFVYLjEtc2NobmVsbCUwQXBpcGUlMjAlM0QlMjBGbHV4UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmJsYWNrLWZvcmVzdC1sYWJzJTJGRkxVWC4xLWRldiUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU1JUFFJTlBJUU0JUI5JTg5JUU5JTg1JThEJUU3JUJEJUFFJTBBc21hc2hfY29uZmlnJTIwJTNEJTIwU21hc2hDb25maWcoKSUwQXNtYXNoX2NvbmZpZyU1QiUyMmZhY3Rvcml6ZXIlMjIlNUQlMjAlM0QlMjAlMjJxa3ZfZGlmZnVzZXJzJTIyJTBBc21hc2hfY29uZmlnJTVCJTIyY29tcGlsZXIlMjIlNUQlMjAlM0QlMjAlMjJ0b3JjaF9jb21waWxlJTIyJTBBc21hc2hfY29uZmlnJTVCJTIydG9yY2hfY29tcGlsZV90YXJnZXQlMjIlNUQlMjAlM0QlMjAlMjJtb2R1bGVfbGlzdCUyMiUwQXNtYXNoX2NvbmZpZyU1QiUyMmNhY2hlciUyMiU1RCUyMCUzRCUyMCUyMmZvcmElMjIlMEFzbWFzaF9jb25maWclNUIlMjJmb3JhX2ludGVydmFsJTIyJTVEJTIwJTNEJTIwMiUwQSUwQSUyMyUyMCVFNCVCOCVCQSVFNCVCQSU4NiVFOCU4RSVCNyVFNSVCRSU5NyVFNiU5QyU4MCVFNCVCRCVCMyVFOSU4MCU5RiVFNSVCQSVBNiVFNyVCQiU5MyVFNiU5RSU5QyVFRiVCQyU4QyVFNSU4RiVBRiVFNCVCQiVBNSVFNiVCNyVCQiVFNSU4QSVBMCVFOCVCRiU5OSVFNCVCQSU5QiVFOSU4NSU4RCVFNyVCRCVBRSUwQSUyMyUyMCVFNCVCRCU4NiVFNSVBRSU4MyVFNCVCQiVBQyVFNCVCQyU5QSVFNSVCMCU4NiVFOSVBMiU4NCVFNyU4MyVBRCVFNiU5NyVCNiVFOSU5NyVCNCVFNCVCQiU4RTEuNSVFNSU4OCU4NiVFOSU5MiU5RiVFNSVBMiU5RSVFNSU4QSVBMCVFNSU4OCVCMDEwJUU1JTg4JTg2JUU5JTkyJTlGJTBBJTIzJTIwc21hc2hfY29uZmlnJTVCJTIydG9yY2hfY29tcGlsZV9tb2RlJTIyJTVEJTIwJTNEJTIwJTIybWF4LWF1dG90dW5lLW5vLWN1ZGFncmFwaHMlMjIlMEElMjMlMjBzbWFzaF9jb25maWclNUIlMjJxdWFudGl6ZXIlMjIlNUQlMjAlM0QlMjAlMjJ0b3JjaGFvJTIyJTBBJTIzJTIwc21hc2hfY29uZmlnJTVCJTIydG9yY2hhb19xdWFudF90eXBlJTIyJTVEJTIwJTNEJTIwJTIyZnA4ZHElMjIlMEElMjMlMjBzbWFzaF9jb25maWclNUIlMjJ0b3JjaGFvX2V4Y2x1ZGVkX21vZHVsZXMlMjIlNUQlMjAlM0QlMjAlMjJub3JtJTJCZW1iZWRkaW5nJTIyJTBBJTBBJTIzJTIwJUU0JUJDJTk4JUU1JThDJTk2JUU2JUE4JUExJUU1JTlFJThCJTBBc21hc2hlZF9waXBlJTIwJTNEJTIwc21hc2gocGlwZSUyQyUyMHNtYXNoX2NvbmZpZyklMEElMEElMjMlMjAlRTglQkYlOTAlRTglQTElOEMlRTYlQTglQTElRTUlOUUlOEIlMEFzbWFzaGVkX3BpcGUoJTIyYSUyMGtuaXR0ZWQlMjBwdXJwbGUlMjBwcnVuZSUyMikuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline | |
| <span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel, SmashConfig, smash | |
| <span class="hljs-comment"># 加载模型</span> | |
| <span class="hljs-comment"># 使用小GPU内存尝试segmind/Segmind-Vega或black-forest-labs/FLUX.1-schnell</span> | |
| pipe = FluxPipeline.from_pretrained( | |
| <span class="hljs-string">"black-forest-labs/FLUX.1-dev"</span>, | |
| torch_dtype=torch.bfloat16 | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># 定义配置</span> | |
| smash_config = SmashConfig() | |
| smash_config[<span class="hljs-string">"factorizer"</span>] = <span class="hljs-string">"qkv_diffusers"</span> | |
| smash_config[<span class="hljs-string">"compiler"</span>] = <span class="hljs-string">"torch_compile"</span> | |
| smash_config[<span class="hljs-string">"torch_compile_target"</span>] = <span class="hljs-string">"module_list"</span> | |
| smash_config[<span class="hljs-string">"cacher"</span>] = <span class="hljs-string">"fora"</span> | |
| smash_config[<span class="hljs-string">"fora_interval"</span>] = <span class="hljs-number">2</span> | |
| <span class="hljs-comment"># 为了获得最佳速度结果,可以添加这些配置</span> | |
| <span class="hljs-comment"># 但它们会将预热时间从1.5分钟增加到10分钟</span> | |
| <span class="hljs-comment"># smash_config["torch_compile_mode"] = "max-autotune-no-cudagraphs"</span> | |
| <span class="hljs-comment"># smash_config["quantizer"] = "torchao"</span> | |
| <span class="hljs-comment"># smash_config["torchao_quant_type"] = "fp8dq"</span> | |
| <span class="hljs-comment"># smash_config["torchao_excluded_modules"] = "norm+embedding"</span> | |
| <span class="hljs-comment"># 优化模型</span> | |
| smashed_pipe = smash(pipe, smash_config) | |
| <span class="hljs-comment"># 运行模型</span> | |
| smashed_pipe(<span class="hljs-string">"a knitted purple prune"</span>).images[<span class="hljs-number">0</span>]`,wrap:!1}}),G=new K({props:{code:"JTIzJTIwJUU0JUJGJTlEJUU1JUFEJTk4JUU2JUE4JUExJUU1JTlFJThCJTBBc21hc2hlZF9waXBlLnNhdmVfdG9faHViKCUyMiUzQ3VzZXJuYW1lJTNFJTJGRkxVWC4xLWRldi1zbWFzaGVkJTIyKSUwQSUwQSUyMyUyMCVFNSU4QSVBMCVFOCVCRCVCRCVFNiVBOCVBMSVFNSU5RSU4QiUwQXNtYXNoZWRfcGlwZSUyMCUzRCUyMFBydW5hTW9kZWwuZnJvbV9odWIoJTIyJTNDdXNlcm5hbWUlM0UlMkZGTFVYLjEtZGV2LXNtYXNoZWQlMjIp",highlighted:`<span class="hljs-comment"># 保存模型</span> | |
| smashed_pipe.save_to_hub(<span class="hljs-string">"<username>/FLUX.1-dev-smashed"</span>) | |
| <span class="hljs-comment"># 加载模型</span> | |
| smashed_pipe = PrunaModel.from_hub(<span class="hljs-string">"<username>/FLUX.1-dev-smashed"</span>)`,wrap:!1}}),X=new D({props:{title:"评估和基准测试Diffusers模型",local:"评估和基准测试diffusers模型",headingTag:"h2"}}),b=new sl({props:{id:"eval",options:["optimized model","standalone model"],$$slots:{default:[pl]},$$scope:{ctx:y}}}),$=new At({props:{warning:!1,$$slots:{default:[rl]},$$scope:{ctx:y}}}),L=new D({props:{title:"参考",local:"参考",headingTag:"h2"}}),A=new el({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/optimization/pruna.md"}}),{c(){a=m("meta"),u=r(),p=m("p"),c=r(),h(i.$$.fragment),n=r(),J=m("p"),J.innerHTML=z,tt=r(),B=m("table"),B.innerHTML=Bt,lt=r(),N=m("p"),N.textContent=Nt,et=r(),I=m("p"),I.innerHTML=It,st=r(),h(Q.$$.fragment),at=r(),R=m("p"),R.textContent=Qt,nt=r(),h(Z.$$.fragment),it=r(),h(v.$$.fragment),pt=r(),W=m("p"),W.textContent=Rt,rt=r(),w=m("div"),w.innerHTML=Zt,ot=r(),_=m("p"),_.innerHTML=vt,ct=r(),h(g.$$.fragment),mt=r(),C=m("div"),C.innerHTML=Wt,dt=r(),S=m("p"),S.innerHTML=_t,ut=r(),h(E.$$.fragment),Ut=r(),F=m("div"),F.innerHTML=St,Jt=r(),k=m("p"),k.textContent=Et,ht=r(),h(G.$$.fragment),ft=r(),h(X.$$.fragment),Tt=r(),Y=m("p"),Y.innerHTML=kt,Mt=r(),H=m("p"),H.innerHTML=Gt,Vt=r(),h(b.$$.fragment),jt=r(),x=m("p"),x.textContent=Xt,yt=r(),h($.$$.fragment),wt=r(),h(L.$$.fragment),gt=r(),q=m("ul"),q.innerHTML=Yt,Ct=r(),h(A.$$.fragment),Ft=r(),P=m("p"),this.h()},l(t){const l=tl("svelte-u9bgzb",document.head);a=d(l,"META",{name:!0,content:!0}),l.forEach(e),u=o(t),p=d(t,"P",{}),qt(p).forEach(e),c=o(t),f(i.$$.fragment,t),n=o(t),J=d(t,"P",{"data-svelte-h":!0}),U(J)!=="svelte-14zuif9"&&(J.innerHTML=z),tt=o(t),B=d(t,"TABLE",{"data-svelte-h":!0}),U(B)!=="svelte-1870rsx"&&(B.innerHTML=Bt),lt=o(t),N=d(t,"P",{"data-svelte-h":!0}),U(N)!=="svelte-1syz01b"&&(N.textContent=Nt),et=o(t),I=d(t,"P",{"data-svelte-h":!0}),U(I)!=="svelte-uq0kan"&&(I.innerHTML=It),st=o(t),f(Q.$$.fragment,t),at=o(t),R=d(t,"P",{"data-svelte-h":!0}),U(R)!=="svelte-1g5o862"&&(R.textContent=Qt),nt=o(t),f(Z.$$.fragment,t),it=o(t),f(v.$$.fragment,t),pt=o(t),W=d(t,"P",{"data-svelte-h":!0}),U(W)!=="svelte-1oye83w"&&(W.textContent=Rt),rt=o(t),w=d(t,"DIV",{class:!0,"data-svelte-h":!0}),U(w)!=="svelte-1uhmbha"&&(w.innerHTML=Zt),ot=o(t),_=d(t,"P",{"data-svelte-h":!0}),U(_)!=="svelte-1bmdjcc"&&(_.innerHTML=vt),ct=o(t),f(g.$$.fragment,t),mt=o(t),C=d(t,"DIV",{class:!0,"data-svelte-h":!0}),U(C)!=="svelte-1yaafaz"&&(C.innerHTML=Wt),dt=o(t),S=d(t,"P",{"data-svelte-h":!0}),U(S)!=="svelte-146qhef"&&(S.innerHTML=_t),ut=o(t),f(E.$$.fragment,t),Ut=o(t),F=d(t,"DIV",{class:!0,"data-svelte-h":!0}),U(F)!=="svelte-1or519q"&&(F.innerHTML=St),Jt=o(t),k=d(t,"P",{"data-svelte-h":!0}),U(k)!=="svelte-1yw57hm"&&(k.textContent=Et),ht=o(t),f(G.$$.fragment,t),ft=o(t),f(X.$$.fragment,t),Tt=o(t),Y=d(t,"P",{"data-svelte-h":!0}),U(Y)!=="svelte-ufsph6"&&(Y.innerHTML=kt),Mt=o(t),H=d(t,"P",{"data-svelte-h":!0}),U(H)!=="svelte-76s5k8"&&(H.innerHTML=Gt),Vt=o(t),f(b.$$.fragment,t),jt=o(t),x=d(t,"P",{"data-svelte-h":!0}),U(x)!=="svelte-6gtl5s"&&(x.textContent=Xt),yt=o(t),f($.$$.fragment,t),wt=o(t),f(L.$$.fragment,t),gt=o(t),q=d(t,"UL",{"data-svelte-h":!0}),U(q)!=="svelte-5ha4im"&&(q.innerHTML=Yt),Ct=o(t),f(A.$$.fragment,t),Ft=o(t),P=d(t,"P",{}),qt(P).forEach(e),this.h()},h(){O(a,"name","hf:doc:metadata"),O(a,"content",cl),O(w,"class","flex justify-center"),O(C,"class","flex justify-center"),O(F,"class","flex justify-center")},m(t,l){ll(document.head,a),s(t,u,l),s(t,p,l),s(t,c,l),T(i,t,l),s(t,n,l),s(t,J,l),s(t,tt,l),s(t,B,l),s(t,lt,l),s(t,N,l),s(t,et,l),s(t,I,l),s(t,st,l),T(Q,t,l),s(t,at,l),s(t,R,l),s(t,nt,l),T(Z,t,l),s(t,it,l),T(v,t,l),s(t,pt,l),s(t,W,l),s(t,rt,l),s(t,w,l),s(t,ot,l),s(t,_,l),s(t,ct,l),T(g,t,l),s(t,mt,l),s(t,C,l),s(t,dt,l),s(t,S,l),s(t,ut,l),T(E,t,l),s(t,Ut,l),s(t,F,l),s(t,Jt,l),s(t,k,l),s(t,ht,l),T(G,t,l),s(t,ft,l),T(X,t,l),s(t,Tt,l),s(t,Y,l),s(t,Mt,l),s(t,H,l),s(t,Vt,l),T(b,t,l),s(t,jt,l),s(t,x,l),s(t,yt,l),T($,t,l),s(t,wt,l),T(L,t,l),s(t,gt,l),s(t,q,l),s(t,Ct,l),T(A,t,l),s(t,Ft,l),s(t,P,l),bt=!0},p(t,[l]){const Ht={};l&2&&(Ht.$$scope={dirty:l,ctx:t}),g.$set(Ht);const xt={};l&2&&(xt.$$scope={dirty:l,ctx:t}),b.$set(xt);const Lt={};l&2&&(Lt.$$scope={dirty:l,ctx:t}),$.$set(Lt)},i(t){bt||(M(i.$$.fragment,t),M(Q.$$.fragment,t),M(Z.$$.fragment,t),M(v.$$.fragment,t),M(g.$$.fragment,t),M(E.$$.fragment,t),M(G.$$.fragment,t),M(X.$$.fragment,t),M(b.$$.fragment,t),M($.$$.fragment,t),M(L.$$.fragment,t),M(A.$$.fragment,t),bt=!0)},o(t){V(i.$$.fragment,t),V(Q.$$.fragment,t),V(Z.$$.fragment,t),V(v.$$.fragment,t),V(g.$$.fragment,t),V(E.$$.fragment,t),V(G.$$.fragment,t),V(X.$$.fragment,t),V(b.$$.fragment,t),V($.$$.fragment,t),V(L.$$.fragment,t),V(A.$$.fragment,t),bt=!1},d(t){t&&(e(u),e(p),e(c),e(n),e(J),e(tt),e(B),e(lt),e(N),e(et),e(I),e(st),e(at),e(R),e(nt),e(it),e(pt),e(W),e(rt),e(w),e(ot),e(_),e(ct),e(mt),e(C),e(dt),e(S),e(ut),e(Ut),e(F),e(Jt),e(k),e(ht),e(ft),e(Tt),e(Y),e(Mt),e(H),e(Vt),e(jt),e(x),e(yt),e(wt),e(gt),e(q),e(Ct),e(Ft),e(P)),e(a),j(i,t),j(Q,t),j(Z,t),j(v,t),j(g,t),j(E,t),j(G,t),j(X,t),j(b,t),j($,t),j(L,t),j(A,t)}}}const cl='{"title":"Pruna","local":"pruna","sections":[{"title":"安装","local":"安装","sections":[],"depth":2},{"title":"优化 Diffusers 模型","local":"优化-diffusers-模型","sections":[],"depth":2},{"title":"评估和基准测试Diffusers模型","local":"评估和基准测试diffusers模型","sections":[],"depth":2},{"title":"参考","local":"参考","sections":[],"depth":2}],"depth":1}';function ml(y){return Ot(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Tl extends Dt{constructor(a){super(),Kt(this,a,ml,ol,Pt,{})}}export{Tl as component}; | |
Xet Storage Details
- Size:
- 25.6 kB
- Xet hash:
- dc1b798e2f89051516efa2a56090d84ccae98776a793aae0923e4d24d8e1ad83
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.