Buckets:

rtrm's picture
download
raw
25.1 kB
import{s as zt,o as Pt,n as At}from"../chunks/scheduler.5c93273d.js";import{S as Ot,i as Dt,g as m,s as p,r as h,A as Kt,h as d,f as e,c as o,j as qt,u as T,x as u,k as z,y as tl,a as s,v as M,d as f,t as V,w as j}from"../chunks/index.e43dd92b.js";import{C as K}from"../chunks/CodeBlock.6896320e.js";import{H as D,E as ll}from"../chunks/getInferenceSnippets.161194d2.js";import{H as el,a as Lt}from"../chunks/HfOption.d50154c3.js";function sl(y){let i,J="我们可以通过使用<code>EvaluationAgent</code>加载和评估优化后的模型,并将其传递给<code>Task</code>。",c,r,n;return r=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUwQWZyb20lMjBwcnVuYS5kYXRhLnBydW5hX2RhdGFtb2R1bGUlMjBpbXBvcnQlMjBQcnVuYURhdGFNb2R1bGUlMEFmcm9tJTIwcHJ1bmEuZXZhbHVhdGlvbi5ldmFsdWF0aW9uX2FnZW50JTIwaW1wb3J0JTIwRXZhbHVhdGlvbkFnZW50JTBBZnJvbSUyMHBydW5hLmV2YWx1YXRpb24ubWV0cmljcyUyMGltcG9ydCUyMCglMEElMjAlMjAlMjAlMjBUaHJvdWdocHV0TWV0cmljJTJDJTBBJTIwJTIwJTIwJTIwVG9yY2hNZXRyaWNXcmFwcGVyJTJDJTBBJTIwJTIwJTIwJTIwVG90YWxUaW1lTWV0cmljJTJDJTBBKSUwQWZyb20lMjBwcnVuYS5ldmFsdWF0aW9uLnRhc2slMjBpbXBvcnQlMjBUYXNrJTBBJTBBJTIzJTIwZGVmaW5lJTIwdGhlJTIwZGV2aWNlJTBBZGV2aWNlJTIwJTNEJTIwJTIyY3VkYSUyMiUyMGlmJTIwdG9yY2guY3VkYS5pc19hdmFpbGFibGUoKSUyMGVsc2UlMjAlMjJtcHMlMjIlMjBpZiUyMHRvcmNoLmJhY2tlbmRzLm1wcy5pc19hdmFpbGFibGUoKSUyMGVsc2UlMjAlMjJjcHUlMjIlMEElMEElMjMlMjAlRTUlOEElQTAlRTglQkQlQkQlRTYlQTglQTElRTUlOUUlOEIlMEElMjMlMjAlRTQlQkQlQkYlRTclOTQlQTglRTUlQjAlOEZHUFUlRTUlODYlODUlRTUlQUQlOTglRTUlQjAlOUQlRTglQUYlOTUlMjBQcnVuYUFJJTJGU2VnbWluZC1WZWdhLXNtYXNoZWQlMjAlRTYlODglOTYlMjBQcnVuYUFJJTJGRkxVWC4xLWRldi1zbWFzaGVkJTBBc21hc2hlZF9waXBlJTIwJTNEJTIwUHJ1bmFNb2RlbC5mcm9tX2h1YiglMjJQcnVuYUFJJTJGRkxVWC4xLWRldi1zbWFzaGVkJTIyKSUwQSUwQSUyMyUyMCVFNSVBRSU5QSVFNCVCOSU4OSVFNiU4QyU4NyVFNiVBMCU4NyUwQW1ldHJpY3MlMjAlM0QlMjAlNUIlMEElMjAlMjAlMjAlMjBUb3RhbFRpbWVNZXRyaWMobl9pdGVyYXRpb25zJTNEMjAlMkMlMjBuX3dhcm11cF9pdGVyYXRpb25zJTNENSklMkMlMEElMjAlMjAlMjAlMjBUaHJvdWdocHV0TWV0cmljKG5faXRlcmF0aW9ucyUzRDIwJTJDJTIwbl93YXJtdXBfaXRlcmF0aW9ucyUzRDUpJTJDJTBBJTIwJTIwJTIwJTIwVG9yY2hNZXRyaWNXcmFwcGVyKCUyMmNsaXAlMjIpJTJDJTBBJTVEJTBBJTBBJTIzJTIwJUU1JUFFJTlBJUU0JUI5JTg5JUU2JTk1JUIwJUU2JThEJUFFJUU2JUE4JUExJUU1JTlEJTk3JTBBZGF0YW1vZHVsZSUyMCUzRCUyMFBydW5hRGF0YU1vZHVsZS5mcm9tX3N0cmluZyglMjJMQUlPTjI1NiUyMiklMEFkYXRhbW9kdWxlLmxpbWl0X2RhdGFzZXRzKDEwKSUwQSUwQSUyMyUyMCVFNSVBRSU5QSVFNCVCOSU4OSVFNCVCQiVCQiVFNSU4QSVBMSVFNSU5MiU4QyVFOCVBRiU4NCVFNCVCQyVCMCVFNCVCQiVBMyVFNyU5MCU4NiUwQXRhc2slMjAlM0QlMjBUYXNrKG1ldHJpY3MlMkMlMjBkYXRhbW9kdWxlJTNEZGF0YW1vZHVsZSUyQyUyMGRldmljZSUzRGRldmljZSklMEFldmFsX2FnZW50JTIwJTNEJTIwRXZhbHVhdGlvbkFnZW50KHRhc2spJTBBJTBBJTIzJTIwJUU4JUFGJTg0JUU0JUJDJUIwJUU0JUJDJTk4JUU1JThDJTk2JUU2JUE4JUExJUU1JTlFJThCJUU1JUI5JUI2JUU1JThEJUI4JUU4JUJEJUJEJUU1JTg4JUIwQ1BVJTBBc21hc2hlZF9waXBlLm1vdmVfdG9fZGV2aWNlKGRldmljZSklMEFzbWFzaGVkX3BpcGVfcmVzdWx0cyUyMCUzRCUyMGV2YWxfYWdlbnQuZXZhbHVhdGUoc21hc2hlZF9waXBlKSUwQXNtYXNoZWRfcGlwZS5tb3ZlX3RvX2RldmljZSglMjJjcHUlMjIp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel
<span class="hljs-keyword">from</span> pruna.data.pruna_datamodule <span class="hljs-keyword">import</span> PrunaDataModule
<span class="hljs-keyword">from</span> pruna.evaluation.evaluation_agent <span class="hljs-keyword">import</span> EvaluationAgent
<span class="hljs-keyword">from</span> pruna.evaluation.metrics <span class="hljs-keyword">import</span> (
ThroughputMetric,
TorchMetricWrapper,
TotalTimeMetric,
)
<span class="hljs-keyword">from</span> pruna.evaluation.task <span class="hljs-keyword">import</span> Task
<span class="hljs-comment"># define the device</span>
device = <span class="hljs-string">&quot;cuda&quot;</span> <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">&quot;mps&quot;</span> <span class="hljs-keyword">if</span> torch.backends.mps.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">&quot;cpu&quot;</span>
<span class="hljs-comment"># 加载模型</span>
<span class="hljs-comment"># 使用小GPU内存尝试 PrunaAI/Segmind-Vega-smashed 或 PrunaAI/FLUX.1-dev-smashed</span>
smashed_pipe = PrunaModel.from_hub(<span class="hljs-string">&quot;PrunaAI/FLUX.1-dev-smashed&quot;</span>)
<span class="hljs-comment"># 定义指标</span>
metrics = [
TotalTimeMetric(n_iterations=<span class="hljs-number">20</span>, n_warmup_iterations=<span class="hljs-number">5</span>),
ThroughputMetric(n_iterations=<span class="hljs-number">20</span>, n_warmup_iterations=<span class="hljs-number">5</span>),
TorchMetricWrapper(<span class="hljs-string">&quot;clip&quot;</span>),
]
<span class="hljs-comment"># 定义数据模块</span>
datamodule = PrunaDataModule.from_string(<span class="hljs-string">&quot;LAION256&quot;</span>)
datamodule.limit_datasets(<span class="hljs-number">10</span>)
<span class="hljs-comment"># 定义任务和评估代理</span>
task = Task(metrics, datamodule=datamodule, device=device)
eval_agent = EvaluationAgent(task)
<span class="hljs-comment"># 评估优化模型并卸载到CPU</span>
smashed_pipe.move_to_device(device)
smashed_pipe_results = eval_agent.evaluate(smashed_pipe)
smashed_pipe.move_to_device(<span class="hljs-string">&quot;cpu&quot;</span>)`,wrap:!1}}),{c(){i=m("p"),i.innerHTML=J,c=p(),h(r.$$.fragment)},l(a){i=d(a,"P",{"data-svelte-h":!0}),u(i)!=="svelte-1wfu4ax"&&(i.innerHTML=J),c=o(a),T(r.$$.fragment,a)},m(a,U){s(a,i,U),s(a,c,U),M(r,a,U),n=!0},p:At,i(a){n||(f(r.$$.fragment,a),n=!0)},o(a){V(r.$$.fragment,a),n=!1},d(a){a&&(e(i),e(c)),j(r,a)}}}function al(y){let i,J="除了比较优化模型与基础模型,您还可以评估独立的 <code>diffusers</code> 模型。这在您想评估模型性能而不考虑优化时非常有用。我们可以通过使用 <code>PrunaModel</code> 包装器并运行 <code>EvaluationAgent</code> 来实现。",c,r,n;return r=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUwQSUwQSUyMyUyMCVFNSU4QSVBMCVFOCVCRCVCRCVFNiVBOCVBMSVFNSU5RSU4QiUwQSUyMyUyMCVFNCVCRCVCRiVFNyU5NCVBOCVFNSVCMCU4RkdQVSVFNSU4NiU4NSVFNSVBRCU5OCVFNSVCMCU5RCVFOCVBRiU5NSUyMFBydW5hQUklMkZTZWdtaW5kLVZlZ2Etc21hc2hlZCUyMCVFNiU4OCU5NiUyMFBydW5hQUklMkZGTFVYLjEtZGV2LXNtYXNoZWQlMEFwaXBlJTIwJTNEJTIwRmx1eFBpcGVsaW5lLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJibGFjay1mb3Jlc3QtbGFicyUyRkZMVVguMS1kZXYlMjIlMkMlMEElMjAlMjAlMjAlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTBBKS50byglMjJjcHUlMjIpJTBBd3JhcHBlZF9waXBlJTIwJTNEJTIwUHJ1bmFNb2RlbChtb2RlbCUzRHBpcGUp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel
<span class="hljs-comment"># 加载模型</span>
<span class="hljs-comment"># 使用小GPU内存尝试 PrunaAI/Segmind-Vega-smashed 或 PrunaAI/FLUX.1-dev-smashed</span>
pipe = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
torch_dtype=torch.bfloat16
).to(<span class="hljs-string">&quot;cpu&quot;</span>)
wrapped_pipe = PrunaModel(model=pipe)`,wrap:!1}}),{c(){i=m("p"),i.innerHTML=J,c=p(),h(r.$$.fragment)},l(a){i=d(a,"P",{"data-svelte-h":!0}),u(i)!=="svelte-im1gl4"&&(i.innerHTML=J),c=o(a),T(r.$$.fragment,a)},m(a,U){s(a,i,U),s(a,c,U),M(r,a,U),n=!0},p:At,i(a){n||(f(r.$$.fragment,a),n=!0)},o(a){V(r.$$.fragment,a),n=!1},d(a){a&&(e(i),e(c)),j(r,a)}}}function nl(y){let i,J,c,r;return i=new Lt({props:{id:"eval",option:"optimized model",$$slots:{default:[sl]},$$scope:{ctx:y}}}),c=new Lt({props:{id:"eval",option:"standalone model",$$slots:{default:[al]},$$scope:{ctx:y}}}),{c(){h(i.$$.fragment),J=p(),h(c.$$.fragment)},l(n){T(i.$$.fragment,n),J=o(n),T(c.$$.fragment,n)},m(n,a){M(i,n,a),s(n,J,a),M(c,n,a),r=!0},p(n,a){const U={};a&2&&(U.$$scope={dirty:a,ctx:n}),i.$set(U);const P={};a&2&&(P.$$scope={dirty:a,ctx:n}),c.$set(P)},i(n){r||(f(i.$$.fragment,n),f(c.$$.fragment,n),r=!0)},o(n){V(i.$$.fragment,n),V(c.$$.fragment,n),r=!1},d(n){n&&e(J),j(i,n),j(c,n)}}}function il(y){let i,J,c,r,n,a,U,P='<a href="https://github.com/PrunaAI/pruna" rel="nofollow">Pruna</a> 是一个模型优化框架,提供多种优化方法——量化、剪枝、缓存、编译——以加速推理并减少内存使用。以下是优化方法的概览。',tt,N,Bt='<thead><tr><th>技术</th> <th>描述</th> <th align="center">速度</th> <th align="center">内存</th> <th align="center">质量</th></tr></thead> <tbody><tr><td><code>batcher</code></td> <td>将多个输入分组在一起同时处理,提高计算效率并减少处理时间。</td> <td align="center">✅</td> <td align="center">❌</td> <td align="center">➖</td></tr> <tr><td><code>cacher</code></td> <td>存储计算的中间结果以加速后续操作。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>compiler</code></td> <td>为特定硬件优化模型指令。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>distiller</code></td> <td>训练一个更小、更简单的模型来模仿一个更大、更复杂的模型。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>quantizer</code></td> <td>降低权重和激活的精度,减少内存需求。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>pruner</code></td> <td>移除不重要或冗余的连接和神经元,产生一个更稀疏、更高效的网络。</td> <td align="center">✅</td> <td align="center">✅</td> <td align="center">❌</td></tr> <tr><td><code>recoverer</code></td> <td>在压缩后恢复模型的性能。</td> <td align="center">➖</td> <td align="center">➖</td> <td align="center">✅</td></tr> <tr><td><code>factorizer</code></td> <td>将多个小矩阵乘法批处理为一个大型融合操作。</td> <td align="center">✅</td> <td align="center">➖</td> <td align="center">➖</td></tr> <tr><td><code>enhancer</code></td> <td>通过应用后处理算法(如去噪或上采样)来增强模型输出。</td> <td align="center">❌</td> <td align="center">-</td> <td align="center">✅</td></tr></tbody>',lt,I,Nt="✅ (改进), ➖ (大致相同), ❌ (恶化)",et,Q,It='在 <a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html#configure-algorithms" rel="nofollow">Pruna 文档</a> 中探索所有优化方法。',st,$,at,R,Qt="使用以下命令安装 Pruna。",nt,Z,it,v,pt,W,$t="Diffusers 模型支持广泛的优化算法,如下所示。",ot,w,Rt='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/diffusers_combinations.png" alt="Diffusers 模型支持的优化算法概览"/>',rt,_,Zt='下面的示例使用 factorizer、compiler 和 cacher 算法的组合优化 <a href="https://huggingface.co/black-forest-labs/FLUX.1-dev" rel="nofollow">black-forest-labs/FLUX.1-dev</a>。这种组合将推理速度加速高达 4.2 倍,并将峰值 GPU 内存使用从 34.7GB 减少到 28.0GB,同时几乎保持相同的输出质量。',ct,C,vt=`<p>参考 <a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html" rel="nofollow">Pruna 优化</a> 文档以了解更多关于该操作的信息。
本示例中使用的优化技术。</p>`,mt,g,Wt='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/flux_combination.png" alt="用于FLUX.1-dev的优化技术展示,结合了因子分解器、编译器和缓存器算法"/>',dt,S,_t="首先定义一个包含要使用的优化算法的<code>SmashConfig</code>。要优化模型,将管道和<code>SmashConfig</code>用<code>smash</code>包装,然后像往常一样使用管道进行推理。",ut,E,Ut,F,St='<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/diffusers/flux_smashed_comparison.png"/>',Jt,k,Et="优化后,我们可以使用Hugging Face Hub共享和加载优化后的模型。",ht,G,Tt,X,Mt,Y,kt='Pruna提供了<a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/evaluate.html" rel="nofollow">EvaluationAgent</a>来评估优化后模型的质量。',ft,x,Gt="我们可以定义我们关心的指标,如总时间和吞吐量,以及要评估的数据集。我们可以定义一个模型并将其传递给<code>EvaluationAgent</code>。",Vt,b,jt,H,Xt="现在您已经了解了如何优化和评估您的模型,可以开始使用 Pruna 来优化您自己的模型了。幸运的是,我们有许多示例来帮助您入门。",yt,B,Yt='<p>有关基准测试 Flux 的更多详细信息,请查看 <a href="https://huggingface.co/blog/PrunaAI/flux-fastest-image-generation-endpoint" rel="nofollow">宣布 FLUX-Juiced:最快的图像生成端点(快 2.6 倍)!</a> 博客文章和 <a href="https://huggingface.co/spaces/PrunaAI/InferBench" rel="nofollow">InferBench</a> 空间。</p>',wt,q,Ct,L,xt='<li><a href="https://github.com/pruna-ai/pruna" rel="nofollow">Pruna</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/configure.html#configure-algorithms" rel="nofollow">Pruna 优化</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/user_manual/evaluate.html" rel="nofollow">Pruna 评估</a></li> <li><a href="https://docs.pruna.ai/en/stable/docs_pruna/tutorials/index.html" rel="nofollow">Pruna 教程</a></li>',gt,A,Ft,O,bt;return n=new D({props:{title:"Pruna",local:"pruna",headingTag:"h1"}}),$=new D({props:{title:"安装",local:"安装",headingTag:"h2"}}),Z=new K({props:{code:"cGlwJTIwaW5zdGFsbCUyMHBydW5h",highlighted:"pip install pruna",wrap:!1}}),v=new D({props:{title:"优化 Diffusers 模型",local:"优化-diffusers-模型",headingTag:"h2"}}),E=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwRmx1eFBpcGVsaW5lJTBBJTBBZnJvbSUyMHBydW5hJTIwaW1wb3J0JTIwUHJ1bmFNb2RlbCUyQyUyMFNtYXNoQ29uZmlnJTJDJTIwc21hc2glMEElMEElMjMlMjAlRTUlOEElQTAlRTglQkQlQkQlRTYlQTglQTElRTUlOUUlOEIlMEElMjMlMjAlRTQlQkQlQkYlRTclOTQlQTglRTUlQjAlOEZHUFUlRTUlODYlODUlRTUlQUQlOTglRTUlQjAlOUQlRTglQUYlOTVzZWdtaW5kJTJGU2VnbWluZC1WZWdhJUU2JTg4JTk2YmxhY2stZm9yZXN0LWxhYnMlMkZGTFVYLjEtc2NobmVsbCUwQXBpcGUlMjAlM0QlMjBGbHV4UGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmJsYWNrLWZvcmVzdC1sYWJzJTJGRkxVWC4xLWRldiUyMiUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEdG9yY2guYmZsb2F0MTYlMEEpLnRvKCUyMmN1ZGElMjIpJTBBJTBBJTIzJTIwJUU1JUFFJTlBJUU0JUI5JTg5JUU5JTg1JThEJUU3JUJEJUFFJTBBc21hc2hfY29uZmlnJTIwJTNEJTIwU21hc2hDb25maWcoKSUwQXNtYXNoX2NvbmZpZyU1QiUyMmZhY3Rvcml6ZXIlMjIlNUQlMjAlM0QlMjAlMjJxa3ZfZGlmZnVzZXJzJTIyJTBBc21hc2hfY29uZmlnJTVCJTIyY29tcGlsZXIlMjIlNUQlMjAlM0QlMjAlMjJ0b3JjaF9jb21waWxlJTIyJTBBc21hc2hfY29uZmlnJTVCJTIydG9yY2hfY29tcGlsZV90YXJnZXQlMjIlNUQlMjAlM0QlMjAlMjJtb2R1bGVfbGlzdCUyMiUwQXNtYXNoX2NvbmZpZyU1QiUyMmNhY2hlciUyMiU1RCUyMCUzRCUyMCUyMmZvcmElMjIlMEFzbWFzaF9jb25maWclNUIlMjJmb3JhX2ludGVydmFsJTIyJTVEJTIwJTNEJTIwMiUwQSUwQSUyMyUyMCVFNCVCOCVCQSVFNCVCQSU4NiVFOCU4RSVCNyVFNSVCRSU5NyVFNiU5QyU4MCVFNCVCRCVCMyVFOSU4MCU5RiVFNSVCQSVBNiVFNyVCQiU5MyVFNiU5RSU5QyVFRiVCQyU4QyVFNSU4RiVBRiVFNCVCQiVBNSVFNiVCNyVCQiVFNSU4QSVBMCVFOCVCRiU5OSVFNCVCQSU5QiVFOSU4NSU4RCVFNyVCRCVBRSUwQSUyMyUyMCVFNCVCRCU4NiVFNSVBRSU4MyVFNCVCQiVBQyVFNCVCQyU5QSVFNSVCMCU4NiVFOSVBMiU4NCVFNyU4MyVBRCVFNiU5NyVCNiVFOSU5NyVCNCVFNCVCQiU4RTEuNSVFNSU4OCU4NiVFOSU5MiU5RiVFNSVBMiU5RSVFNSU4QSVBMCVFNSU4OCVCMDEwJUU1JTg4JTg2JUU5JTkyJTlGJTBBJTIzJTIwc21hc2hfY29uZmlnJTVCJTIydG9yY2hfY29tcGlsZV9tb2RlJTIyJTVEJTIwJTNEJTIwJTIybWF4LWF1dG90dW5lLW5vLWN1ZGFncmFwaHMlMjIlMEElMjMlMjBzbWFzaF9jb25maWclNUIlMjJxdWFudGl6ZXIlMjIlNUQlMjAlM0QlMjAlMjJ0b3JjaGFvJTIyJTBBJTIzJTIwc21hc2hfY29uZmlnJTVCJTIydG9yY2hhb19xdWFudF90eXBlJTIyJTVEJTIwJTNEJTIwJTIyZnA4ZHElMjIlMEElMjMlMjBzbWFzaF9jb25maWclNUIlMjJ0b3JjaGFvX2V4Y2x1ZGVkX21vZHVsZXMlMjIlNUQlMjAlM0QlMjAlMjJub3JtJTJCZW1iZWRkaW5nJTIyJTBBJTBBJTIzJTIwJUU0JUJDJTk4JUU1JThDJTk2JUU2JUE4JUExJUU1JTlFJThCJTBBc21hc2hlZF9waXBlJTIwJTNEJTIwc21hc2gocGlwZSUyQyUyMHNtYXNoX2NvbmZpZyklMEElMEElMjMlMjAlRTglQkYlOTAlRTglQTElOEMlRTYlQTglQTElRTUlOUUlOEIlMEFzbWFzaGVkX3BpcGUoJTIyYSUyMGtuaXR0ZWQlMjBwdXJwbGUlMjBwcnVuZSUyMikuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> FluxPipeline
<span class="hljs-keyword">from</span> pruna <span class="hljs-keyword">import</span> PrunaModel, SmashConfig, smash
<span class="hljs-comment"># 加载模型</span>
<span class="hljs-comment"># 使用小GPU内存尝试segmind/Segmind-Vega或black-forest-labs/FLUX.1-schnell</span>
pipe = FluxPipeline.from_pretrained(
<span class="hljs-string">&quot;black-forest-labs/FLUX.1-dev&quot;</span>,
torch_dtype=torch.bfloat16
).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># 定义配置</span>
smash_config = SmashConfig()
smash_config[<span class="hljs-string">&quot;factorizer&quot;</span>] = <span class="hljs-string">&quot;qkv_diffusers&quot;</span>
smash_config[<span class="hljs-string">&quot;compiler&quot;</span>] = <span class="hljs-string">&quot;torch_compile&quot;</span>
smash_config[<span class="hljs-string">&quot;torch_compile_target&quot;</span>] = <span class="hljs-string">&quot;module_list&quot;</span>
smash_config[<span class="hljs-string">&quot;cacher&quot;</span>] = <span class="hljs-string">&quot;fora&quot;</span>
smash_config[<span class="hljs-string">&quot;fora_interval&quot;</span>] = <span class="hljs-number">2</span>
<span class="hljs-comment"># 为了获得最佳速度结果,可以添加这些配置</span>
<span class="hljs-comment"># 但它们会将预热时间从1.5分钟增加到10分钟</span>
<span class="hljs-comment"># smash_config[&quot;torch_compile_mode&quot;] = &quot;max-autotune-no-cudagraphs&quot;</span>
<span class="hljs-comment"># smash_config[&quot;quantizer&quot;] = &quot;torchao&quot;</span>
<span class="hljs-comment"># smash_config[&quot;torchao_quant_type&quot;] = &quot;fp8dq&quot;</span>
<span class="hljs-comment"># smash_config[&quot;torchao_excluded_modules&quot;] = &quot;norm+embedding&quot;</span>
<span class="hljs-comment"># 优化模型</span>
smashed_pipe = smash(pipe, smash_config)
<span class="hljs-comment"># 运行模型</span>
smashed_pipe(<span class="hljs-string">&quot;a knitted purple prune&quot;</span>).images[<span class="hljs-number">0</span>]`,wrap:!1}}),G=new K({props:{code:"JTIzJTIwJUU0JUJGJTlEJUU1JUFEJTk4JUU2JUE4JUExJUU1JTlFJThCJTBBc21hc2hlZF9waXBlLnNhdmVfdG9faHViKCUyMiUzQ3VzZXJuYW1lJTNFJTJGRkxVWC4xLWRldi1zbWFzaGVkJTIyKSUwQSUwQSUyMyUyMCVFNSU4QSVBMCVFOCVCRCVCRCVFNiVBOCVBMSVFNSU5RSU4QiUwQXNtYXNoZWRfcGlwZSUyMCUzRCUyMFBydW5hTW9kZWwuZnJvbV9odWIoJTIyJTNDdXNlcm5hbWUlM0UlMkZGTFVYLjEtZGV2LXNtYXNoZWQlMjIp",highlighted:`<span class="hljs-comment"># 保存模型</span>
smashed_pipe.save_to_hub(<span class="hljs-string">&quot;&lt;username&gt;/FLUX.1-dev-smashed&quot;</span>)
<span class="hljs-comment"># 加载模型</span>
smashed_pipe = PrunaModel.from_hub(<span class="hljs-string">&quot;&lt;username&gt;/FLUX.1-dev-smashed&quot;</span>)`,wrap:!1}}),X=new D({props:{title:"评估和基准测试Diffusers模型",local:"评估和基准测试diffusers模型",headingTag:"h2"}}),b=new el({props:{id:"eval",options:["optimized model","standalone model"],$$slots:{default:[nl]},$$scope:{ctx:y}}}),q=new D({props:{title:"参考",local:"参考",headingTag:"h2"}}),A=new ll({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/optimization/pruna.md"}}),{c(){i=m("meta"),J=p(),c=m("p"),r=p(),h(n.$$.fragment),a=p(),U=m("p"),U.innerHTML=P,tt=p(),N=m("table"),N.innerHTML=Bt,lt=p(),I=m("p"),I.textContent=Nt,et=p(),Q=m("p"),Q.innerHTML=It,st=p(),h($.$$.fragment),at=p(),R=m("p"),R.textContent=Qt,nt=p(),h(Z.$$.fragment),it=p(),h(v.$$.fragment),pt=p(),W=m("p"),W.textContent=$t,ot=p(),w=m("div"),w.innerHTML=Rt,rt=p(),_=m("p"),_.innerHTML=Zt,ct=p(),C=m("blockquote"),C.innerHTML=vt,mt=p(),g=m("div"),g.innerHTML=Wt,dt=p(),S=m("p"),S.innerHTML=_t,ut=p(),h(E.$$.fragment),Ut=p(),F=m("div"),F.innerHTML=St,Jt=p(),k=m("p"),k.textContent=Et,ht=p(),h(G.$$.fragment),Tt=p(),h(X.$$.fragment),Mt=p(),Y=m("p"),Y.innerHTML=kt,ft=p(),x=m("p"),x.innerHTML=Gt,Vt=p(),h(b.$$.fragment),jt=p(),H=m("p"),H.textContent=Xt,yt=p(),B=m("blockquote"),B.innerHTML=Yt,wt=p(),h(q.$$.fragment),Ct=p(),L=m("ul"),L.innerHTML=xt,gt=p(),h(A.$$.fragment),Ft=p(),O=m("p"),this.h()},l(t){const l=Kt("svelte-u9bgzb",document.head);i=d(l,"META",{name:!0,content:!0}),l.forEach(e),J=o(t),c=d(t,"P",{}),qt(c).forEach(e),r=o(t),T(n.$$.fragment,t),a=o(t),U=d(t,"P",{"data-svelte-h":!0}),u(U)!=="svelte-14zuif9"&&(U.innerHTML=P),tt=o(t),N=d(t,"TABLE",{"data-svelte-h":!0}),u(N)!=="svelte-1870rsx"&&(N.innerHTML=Bt),lt=o(t),I=d(t,"P",{"data-svelte-h":!0}),u(I)!=="svelte-1syz01b"&&(I.textContent=Nt),et=o(t),Q=d(t,"P",{"data-svelte-h":!0}),u(Q)!=="svelte-uq0kan"&&(Q.innerHTML=It),st=o(t),T($.$$.fragment,t),at=o(t),R=d(t,"P",{"data-svelte-h":!0}),u(R)!=="svelte-1g5o862"&&(R.textContent=Qt),nt=o(t),T(Z.$$.fragment,t),it=o(t),T(v.$$.fragment,t),pt=o(t),W=d(t,"P",{"data-svelte-h":!0}),u(W)!=="svelte-1oye83w"&&(W.textContent=$t),ot=o(t),w=d(t,"DIV",{class:!0,"data-svelte-h":!0}),u(w)!=="svelte-1uhmbha"&&(w.innerHTML=Rt),rt=o(t),_=d(t,"P",{"data-svelte-h":!0}),u(_)!=="svelte-1bmdjcc"&&(_.innerHTML=Zt),ct=o(t),C=d(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),u(C)!=="svelte-137yx3g"&&(C.innerHTML=vt),mt=o(t),g=d(t,"DIV",{class:!0,"data-svelte-h":!0}),u(g)!=="svelte-1yaafaz"&&(g.innerHTML=Wt),dt=o(t),S=d(t,"P",{"data-svelte-h":!0}),u(S)!=="svelte-146qhef"&&(S.innerHTML=_t),ut=o(t),T(E.$$.fragment,t),Ut=o(t),F=d(t,"DIV",{class:!0,"data-svelte-h":!0}),u(F)!=="svelte-1or519q"&&(F.innerHTML=St),Jt=o(t),k=d(t,"P",{"data-svelte-h":!0}),u(k)!=="svelte-1yw57hm"&&(k.textContent=Et),ht=o(t),T(G.$$.fragment,t),Tt=o(t),T(X.$$.fragment,t),Mt=o(t),Y=d(t,"P",{"data-svelte-h":!0}),u(Y)!=="svelte-ufsph6"&&(Y.innerHTML=kt),ft=o(t),x=d(t,"P",{"data-svelte-h":!0}),u(x)!=="svelte-76s5k8"&&(x.innerHTML=Gt),Vt=o(t),T(b.$$.fragment,t),jt=o(t),H=d(t,"P",{"data-svelte-h":!0}),u(H)!=="svelte-6gtl5s"&&(H.textContent=Xt),yt=o(t),B=d(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),u(B)!=="svelte-t41fkn"&&(B.innerHTML=Yt),wt=o(t),T(q.$$.fragment,t),Ct=o(t),L=d(t,"UL",{"data-svelte-h":!0}),u(L)!=="svelte-5ha4im"&&(L.innerHTML=xt),gt=o(t),T(A.$$.fragment,t),Ft=o(t),O=d(t,"P",{}),qt(O).forEach(e),this.h()},h(){z(i,"name","hf:doc:metadata"),z(i,"content",pl),z(w,"class","flex justify-center"),z(C,"class","tip"),z(g,"class","flex justify-center"),z(F,"class","flex justify-center"),z(B,"class","tip")},m(t,l){tl(document.head,i),s(t,J,l),s(t,c,l),s(t,r,l),M(n,t,l),s(t,a,l),s(t,U,l),s(t,tt,l),s(t,N,l),s(t,lt,l),s(t,I,l),s(t,et,l),s(t,Q,l),s(t,st,l),M($,t,l),s(t,at,l),s(t,R,l),s(t,nt,l),M(Z,t,l),s(t,it,l),M(v,t,l),s(t,pt,l),s(t,W,l),s(t,ot,l),s(t,w,l),s(t,rt,l),s(t,_,l),s(t,ct,l),s(t,C,l),s(t,mt,l),s(t,g,l),s(t,dt,l),s(t,S,l),s(t,ut,l),M(E,t,l),s(t,Ut,l),s(t,F,l),s(t,Jt,l),s(t,k,l),s(t,ht,l),M(G,t,l),s(t,Tt,l),M(X,t,l),s(t,Mt,l),s(t,Y,l),s(t,ft,l),s(t,x,l),s(t,Vt,l),M(b,t,l),s(t,jt,l),s(t,H,l),s(t,yt,l),s(t,B,l),s(t,wt,l),M(q,t,l),s(t,Ct,l),s(t,L,l),s(t,gt,l),M(A,t,l),s(t,Ft,l),s(t,O,l),bt=!0},p(t,[l]){const Ht={};l&2&&(Ht.$$scope={dirty:l,ctx:t}),b.$set(Ht)},i(t){bt||(f(n.$$.fragment,t),f($.$$.fragment,t),f(Z.$$.fragment,t),f(v.$$.fragment,t),f(E.$$.fragment,t),f(G.$$.fragment,t),f(X.$$.fragment,t),f(b.$$.fragment,t),f(q.$$.fragment,t),f(A.$$.fragment,t),bt=!0)},o(t){V(n.$$.fragment,t),V($.$$.fragment,t),V(Z.$$.fragment,t),V(v.$$.fragment,t),V(E.$$.fragment,t),V(G.$$.fragment,t),V(X.$$.fragment,t),V(b.$$.fragment,t),V(q.$$.fragment,t),V(A.$$.fragment,t),bt=!1},d(t){t&&(e(J),e(c),e(r),e(a),e(U),e(tt),e(N),e(lt),e(I),e(et),e(Q),e(st),e(at),e(R),e(nt),e(it),e(pt),e(W),e(ot),e(w),e(rt),e(_),e(ct),e(C),e(mt),e(g),e(dt),e(S),e(ut),e(Ut),e(F),e(Jt),e(k),e(ht),e(Tt),e(Mt),e(Y),e(ft),e(x),e(Vt),e(jt),e(H),e(yt),e(B),e(wt),e(Ct),e(L),e(gt),e(Ft),e(O)),e(i),j(n,t),j($,t),j(Z,t),j(v,t),j(E,t),j(G,t),j(X,t),j(b,t),j(q,t),j(A,t)}}}const pl='{"title":"Pruna","local":"pruna","sections":[{"title":"安装","local":"安装","sections":[],"depth":2},{"title":"优化 Diffusers 模型","local":"优化-diffusers-模型","sections":[],"depth":2},{"title":"评估和基准测试Diffusers模型","local":"评估和基准测试diffusers模型","sections":[],"depth":2},{"title":"参考","local":"参考","sections":[],"depth":2}],"depth":1}';function ol(y){return Pt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ul extends Ot{constructor(i){super(),Dt(this,i,ol,il,zt,{})}}export{Ul as component};

Xet Storage Details

Size:
25.1 kB
·
Xet hash:
fef61f62db4733de7dc9c15457d80ad7257fd44491ea54e2921f592a26a09d50

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.