Buckets:
| import{s as dr,o as fr,n as ks}from"../chunks/scheduler.9991993c.js";import{S as cr,i as ur,g as s,s as o,r as m,A as br,h as i,f as n,c as a,j as x,u as p,x as r,k,l as as,y as b,a as l,v as d,d as f,t as c,w as u,m as gr,n as hr}from"../chunks/index.7fc9a5e7.js";import{T as ss}from"../chunks/Tip.9de92fc6.js";import{D as Z}from"../chunks/Docstring.ef7d0149.js";import{C as _}from"../chunks/CodeBlock.e11cba92.js";import{H as g,E as _r}from"../chunks/EditOnGithub.84ab7f0e.js";function $r(U){let h;return{c(){h=gr("目前,GPTQ量化仅适用于文本模型。此外,量化过程可能会花费很多时间,具体取决于硬件性能(175B模型在NVIDIA A100上需要4小时)。请在Hub上检查是否有模型的GPTQ量化版本。如果没有,您可以在GitHub上提交需求。")},l(M){h=hr(M,"目前,GPTQ量化仅适用于文本模型。此外,量化过程可能会花费很多时间,具体取决于硬件性能(175B模型在NVIDIA A100上需要4小时)。请在Hub上检查是否有模型的GPTQ量化版本。如果没有,您可以在GitHub上提交需求。")},m(M,$){l(M,h,$)},d(M){M&&n(h)}}}function yr(U){let h,M="需要注意的是,一旦模型以 4 位量化方式加载,就无法将量化后的权重推送到 Hub 上。此外,您不能训练 4 位量化权重,因为目前尚不支持此功能。但是,您可以使用 4 位量化模型来训练额外参数,这将在下一部分中介绍。";return{c(){h=s("p"),h.textContent=M},l($){h=i($,"P",{"data-svelte-h":!0}),r(h)!=="svelte-qms63b"&&(h.textContent=M)},m($,T){l($,h,T)},p:ks,d($){$&&n(h)}}}function Mr(U){let h,M="需要注意的是,一旦模型以 8 位量化方式加载,除了使用最新的 <code>transformers</code> 和 <code>bitsandbytes</code> 之外,目前尚无法将量化后的权重推送到 Hub 上。此外,您不能训练 8 位量化权重,因为目前尚不支持此功能。但是,您可以使用 8 位量化模型来训练额外参数,这将在下一部分中介绍。",$,T,Q="注意,<code>device_map</code> 是可选的,但设置 <code>device_map = 'auto'</code> 更适合用于推理,因为它将更有效地调度可用资源上的模型。";return{c(){h=s("p"),h.innerHTML=M,$=o(),T=s("p"),T.innerHTML=Q},l(C){h=i(C,"P",{"data-svelte-h":!0}),r(h)!=="svelte-awm97l"&&(h.innerHTML=M),$=a(C),T=i(C,"P",{"data-svelte-h":!0}),r(T)!=="svelte-gophk0"&&(T.innerHTML=Q)},m(C,q){l(C,h,q),l(C,$,q),l(C,T,q)},p:ks,d(C){C&&(n(h),n($),n(T))}}}function Tr(U){let h,M="对大模型,强烈鼓励将 8 位量化模型推送到 Hub 上,以便让社区能够从内存占用减少和加载中受益,例如在 Google Colab 上加载大模型。";return{c(){h=s("p"),h.textContent=M},l($){h=i($,"P",{"data-svelte-h":!0}),r(h)!=="svelte-7qh2nm"&&(h.textContent=M)},m($,T){l($,h,T)},p:ks,d($){$&&n(h)}}}function vr(U){let h,M,$,T,Q,C,q,al,D,qs='AWQ方法已经在<a href="https://arxiv.org/abs/2306.00978" rel="nofollow"><em>AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration</em>论文</a>中引入。通过AWQ,您可以以4位精度运行模型,同时保留其原始性能(即没有性能降级),并具有比下面介绍的其他量化方法更出色的吞吐量 - 达到与纯<code>float16</code>推理相似的吞吐量。',sl,K,Ws="我们现在支持使用任何AWQ模型进行推理,这意味着任何人都可以加载和使用在Hub上推送或本地保存的AWQ权重。请注意,使用AWQ需要访问NVIDIA GPU。目前不支持CPU推理。",il,O,rl,ee,Zs="我们建议用户查看生态系统中不同的现有工具,以使用AWQ算法对其模型进行量化,例如:",ml,te,Us='<li><a href="https://github.com/mit-han-lab/llm-awq" rel="nofollow"><code>llm-awq</code></a>,来自MIT Han Lab</li> <li><a href="https://github.com/casper-hansen/AutoAWQ" rel="nofollow"><code>autoawq</code></a>,来自<a href="https://github.com/casper-hansen" rel="nofollow"><code>casper-hansen</code></a></li> <li>Intel neural compressor,来自Intel - 通过<a href="https://huggingface.co/docs/optimum/main/en/intel/optimization_inc" rel="nofollow"><code>optimum-intel</code></a>使用</li>',pl,ne,Qs=`生态系统中可能存在许多其他工具,请随时提出PR将它们添加到列表中。 | |
| 目前与🤗 Transformers的集成仅适用于使用<code>autoawq</code>和<code>llm-awq</code>量化后的模型。大多数使用<code>auto-awq</code>量化的模型可以在🤗 Hub的<a href="https://huggingface.co/TheBloke" rel="nofollow"><code>TheBloke</code></a>命名空间下找到,要使用<code>llm-awq</code>对模型进行量化,请参阅<a href="https://github.com/mit-han-lab/llm-awq/" rel="nofollow"><code>llm-awq</code></a>的示例文件夹中的<a href="https://github.com/mit-han-lab/llm-awq/blob/main/examples/convert_to_hf.py" rel="nofollow"><code>convert_to_hf.py</code></a>脚本。`,dl,le,fl,oe,zs="您可以使用<code>from_pretrained</code>方法从Hub加载一个量化模型。通过检查模型配置文件(<code>configuration.json</code>)中是否存在<code>quantization_config</code>属性,来进行确认推送的权重是量化的。您可以通过检查字段<code>quantization_config.quant_method</code>来确认模型是否以AWQ格式进行量化,该字段应该设置为<code>"awq"</code>。请注意,为了性能原因,默认情况下加载模型将设置其他权重为<code>float16</code>。如果您想更改这种设置,可以通过将<code>torch_dtype</code>参数设置为<code>torch.float32</code>或<code>torch.bfloat16</code>。在下面的部分中,您可以找到一些示例片段和notebook。",cl,ae,ul,se,Gs='首先,您需要安装<a href="https://github.com/casper-hansen/AutoAWQ" rel="nofollow"><code>autoawq</code></a>库',bl,ie,gl,re,hl,me,js="如果您首先将模型加载到CPU上,请确保在使用之前将其移动到GPU设备上。",_l,pe,$l,de,yl,fe,Bs="您可以将AWQ量化与Flash Attention结合起来,得到一个既被量化又更快速的模型。只需使用<code>from_pretrained</code>加载模型,并传递<code>attn_implementation="flash_attention_2"</code>参数。",Ml,ce,Tl,ue,vl,be,Xs='我们使用<a href="https://github.com/huggingface/optimum-benchmark" rel="nofollow"><code>optimum-benchmark</code></a>库进行了一些速度、吞吐量和延迟基准测试。',wl,ge,Vs="请注意,在编写本文档部分时,可用的量化方法包括:<code>awq</code>、<code>gptq</code>和<code>bitsandbytes</code>。",Cl,he,Ls='基准测试在一台NVIDIA-A100实例上运行,使用<a href="https://huggingface.co/TheBloke/Mistral-7B-v0.1-AWQ" rel="nofollow"><code>TheBloke/Mistral-7B-v0.1-AWQ</code></a>作为AWQ模型,<a href="https://huggingface.co/TheBloke/Mistral-7B-v0.1-GPTQ" rel="nofollow"><code>TheBloke/Mistral-7B-v0.1-GPTQ</code></a>作为GPTQ模型。我们还将其与<code>bitsandbytes</code>量化模型和<code>float16</code>模型进行了对比。以下是一些结果示例:',Jl,G,Hs='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/quantization/forward_memory_plot.png"/>',xl,j,As='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/quantization/generate_memory_plot.png"/>',kl,B,Rs='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/quantization/generate_throughput_plot.png"/>',ql,X,Fs='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/quantization/forward_latency_plot.png"/>',Wl,_e,Ns='你可以在<a href="https://github.com/huggingface/optimum-benchmark/tree/main/examples/running-mistrals" rel="nofollow">此链接</a>中找到完整的结果以及包版本。',Zl,$e,Is="从结果来看,AWQ量化方法是推理、文本生成中最快的量化方法,并且在文本生成的峰值内存方面属于最低。然而,对于每批数据,AWQ似乎有最大的前向延迟。",Ul,ye,Ql,Me,Ps='查看如何在<a href="https://colab.research.google.com/drive/1HzZH89yAXJaZgwJDhQj9LqSBux932BvY" rel="nofollow">Google Colab演示</a>中使用此集成!',zl,Te,Gl,W,ve,is,Nn,Es=`This is a wrapper class about all possible attributes and features that you can play with a model that has been | |
| loaded using <code>auto-awq</code> library awq quantization relying on auto_awq backend.`,rs,V,we,ms,In,Ys="Safety checker that arguments are correct",jl,Ce,Bl,Je,Ss="🤗 Transformers已经整合了<code>optimum</code> API,用于对语言模型执行GPTQ量化。您可以以8、4、3甚至2位加载和量化您的模型,而性能无明显下降,并且推理速度更快!这受到大多数GPU硬件的支持。",Xl,xe,Ds="要了解更多关于量化模型的信息,请查看:",Vl,ke,Ks='<li><a href="https://arxiv.org/pdf/2210.17323.pdf" rel="nofollow">GPTQ</a>论文</li> <li><code>optimum</code>关于GPTQ量化的<a href="https://huggingface.co/docs/optimum/llm_quantization/usage_guides/quantization" rel="nofollow">指南</a></li> <li>用作后端的<a href="https://github.com/PanQiWei/AutoGPTQ" rel="nofollow"><code>AutoGPTQ</code></a>库</li>',Ll,qe,Hl,We,Os="为了运行下面的代码,您需要安装:",Al,Ze,ei=`<li><p>安装最新版本的 <code>AutoGPTQ</code> 库 | |
| <code>pip install auto-gptq</code></p></li> <li><p>从源代码安装最新版本的<code>optimum</code> <code>pip install git+https://github.com/huggingface/optimum.git</code></p></li> <li><p>从源代码安装最新版本的<code>transformers</code> <code>pip install git+https://github.com/huggingface/transformers.git</code></p></li> <li><p>安装最新版本的<code>accelerate</code>库: | |
| <code>pip install --upgrade accelerate</code></p></li>`,Rl,Ue,ti="请注意,目前GPTQ集成仅支持文本模型,对于视觉、语音或多模态模型可能会遇到预期以外结果。",Fl,Qe,Nl,ze,ni="GPTQ是一种在使用量化模型之前需要进行权重校准的量化方法。如果您想从头开始对transformers模型进行量化,生成量化模型可能需要一些时间(在Google Colab上对<code>facebook/opt-350m</code>模型量化约为5分钟)。",Il,Ge,li="因此,有两种不同的情况下您可能想使用GPTQ量化模型。第一种情况是加载已经由其他用户在Hub上量化的模型,第二种情况是从头开始对您的模型进行量化并保存或推送到Hub,以便其他用户也可以使用它。",Pl,je,El,Be,oi='为了加载和量化一个模型,您需要创建一个<a href="/docs/transformers/pr_33914/zh/main_classes/quantization#transformers.GPTQConfig">GPTQConfig</a>。您需要传递<code>bits</code>的数量,一个用于校准量化的<code>dataset</code>,以及模型的<code>tokenizer</code>以准备数据集。',Yl,Xe,Sl,Ve,ai="请注意,您可以将自己的数据集以字符串列表形式传递到模型。然而,强烈建议您使用GPTQ论文中提供的数据集。",Dl,Le,Kl,He,Ol,Ae,si="您可以通过使用<code>from_pretrained</code>并设置<code>quantization_config</code>来对模型进行量化。",eo,Re,to,Fe,ii="请注意,您需要一个GPU来量化模型。我们将模型放在cpu中,并将模块来回移动到gpu中,以便对其进行量化。",no,Ne,ri="如果您想在使用 CPU 卸载的同时最大化 GPU 使用率,您可以设置 <code>device_map = "auto"</code>。",lo,Ie,oo,Pe,mi='请注意,不支持磁盘卸载。此外,如果由于数据集而内存不足,您可能需要在<code>from_pretrained</code>中设置<code>max_memory</code>。查看这个<a href="https://huggingface.co/docs/accelerate/usage_guides/big_modeling#designing-a-device-map" rel="nofollow">指南</a>以了解有关<code>device_map</code>和<code>max_memory</code>的更多信息。',ao,L,so,Ee,io,Ye,pi="您可以使用<code>push_to_hub</code>将量化模型像任何模型一样推送到Hub。量化配置将与模型一起保存和推送。",ro,Se,mo,De,di="如果您想在本地计算机上保存量化模型,您也可以使用<code>save_pretrained</code>来完成:",po,Ke,fo,Oe,fi="请注意,如果您量化模型时想使用<code>device_map</code>,请确保在保存之前将整个模型移动到您的GPU或CPU之一。",co,et,uo,tt,bo,nt,ci=`您可以使用<code>from_pretrained</code>从Hub加载量化模型。 | |
| 请确保推送权重是量化的,检查模型配置对象中是否存在<code>quantization_config</code>属性。`,go,lt,ho,ot,ui="如果您想更快地加载模型,并且不需要分配比实际需要内存更多的内存,量化模型也使用<code>device_map</code>参数。确保您已安装<code>accelerate</code>库。",_o,at,$o,st,yo,it,bi='保留格式:对于 4 位模型,您可以使用 exllama 内核来提高推理速度。默认情况下,它处于启用状态。您可以通过在 <a href="/docs/transformers/pr_33914/zh/main_classes/quantization#transformers.GPTQConfig">GPTQConfig</a> 中传递 <code>use_exllama</code> 来更改此配置。这将覆盖存储在配置中的量化配置。请注意,您只能覆盖与内核相关的属性。此外,如果您想使用 exllama 内核,整个模型需要全部部署在 gpus 上。此外,您可以使用 版本 > 0.4.2 的 Auto-GPTQ 并传递 <code>device_map</code> = “cpu” 来执行 CPU 推理。对于 CPU 推理,您必须在 <code>GPTQConfig</code> 中传递 <code>use_exllama = False</code>。',Mo,rt,To,mt,gi='随着 exllamav2 内核的发布,与 exllama 内核相比,您可以获得更快的推理速度。您只需在 <a href="/docs/transformers/pr_33914/zh/main_classes/quantization#transformers.GPTQConfig">GPTQConfig</a> 中传递 <code>exllama_config={"version": 2}</code>:',vo,pt,wo,dt,hi="请注意,目前仅支持 4 位模型。此外,如果您正在使用 peft 对量化模型进行微调,建议禁用 exllama 内核。",Co,ft,_i='您可以在此找到这些内核的基准测试 <a href="https://github.com/huggingface/optimum/tree/main/tests/benchmark#gptq-benchmark" rel="nofollow">这里</a>',Jo,ct,xo,ut,$i=`在Hugging Face生态系统的官方支持下,您可以使用GPTQ进行量化后的模型进行微调。 | |
| 请查看<code>peft</code>库了解更多详情。`,ko,bt,qo,gt,yi='请查看 Google Colab <a href="https://colab.research.google.com/drive/1_TIrmuKOFhuRRiTWN94ilkUFu6ZX4ceb?usp=sharing" rel="nofollow">notebook</a>,了解如何使用GPTQ量化您的模型以及如何使用peft微调量化模型。',Wo,ht,Zo,w,_t,ps,Pn,Mi=`This is a wrapper class about all possible attributes and features that you can play with a model that has been | |
| loaded using <code>optimum</code> api for gptq quantization relying on auto_gptq backend.`,ds,H,$t,fs,En,Ti="Get compatible class with optimum gptq config dict",cs,A,yt,us,Yn,vi="Safety checker that arguments are correct",bs,R,Mt,gs,Sn,wi="Get compatible dict for optimum gptq config",Uo,Tt,Qo,vt,Ci=`🤗 Transformers 与 <code>bitsandbytes</code> 上最常用的模块紧密集成。您可以使用几行代码以 8 位精度加载您的模型。 | |
| 自bitsandbytes的0.37.0版本发布以来,大多数GPU硬件都支持这一点。`,zo,wt,Ji='在<a href="https://arxiv.org/abs/2208.07339" rel="nofollow">LLM.int8()</a>论文中了解更多关于量化方法的信息,或者在<a href="https://huggingface.co/blog/hf-bitsandbytes-integration" rel="nofollow">博客文章</a>中了解关于合作的更多信息。',Go,Ct,xi="自其“0.39.0”版本发布以来,您可以使用FP4数据类型,通过4位量化加载任何支持“device_map”的模型。",jo,Jt,ki='如果您想量化自己的 pytorch 模型,请查看 🤗 Accelerate 的<a href="https://huggingface.co/docs/accelerate/main/en/usage_guides/quantization" rel="nofollow">文档</a>。',Bo,xt,qi="以下是您可以使用“bitsandbytes”集成完成的事情",Xo,kt,Vo,qt,Wi='只要您的模型支持使用 🤗 Accelerate 进行加载并包含 <code>torch.nn.Linear</code> 层,您可以在调用 <a href="/docs/transformers/pr_33914/zh/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> 方法时使用 <code>load_in_8bit</code> 或 <code>load_in_4bit</code> 参数来量化模型。这也应该适用于任何模态。',Lo,Wt,Ho,Zt,Zi="默认情况下,所有其他模块(例如 <code>torch.nn.LayerNorm</code>)将被转换为 <code>torch.float16</code> 类型。但如果您想更改它们的 <code>dtype</code>,可以重载 <code>torch_dtype</code> 参数:",Ao,Ut,Ro,Qt,Fo,zt,No,Gt,Ui="确保在运行以下代码段之前已完成以下要求:",Io,jt,Qi=`<li><p>最新版本 <code>bitsandbytes</code> 库 | |
| <code>pip install bitsandbytes>=0.39.0</code></p></li> <li><p>安装最新版本 <code>accelerate</code> <code>pip install --upgrade accelerate</code></p></li> <li><p>安装最新版本 <code>transformers</code> <code>pip install --upgrade transformers</code></p></li>`,Po,Bt,Eo,Xt,zi='<li><p><strong>高级用法:</strong> 请参考 <a href="https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf" rel="nofollow">此 Google Colab notebook</a> 以获取 4 位量化高级用法和所有可选选项。</p></li> <li><p><strong>使用 <code>batch_size=1</code> 实现更快的推理:</strong> 自 <code>bitsandbytes</code> 的 <code>0.40.0</code> 版本以来,设置 <code>batch_size=1</code>,您可以从快速推理中受益。请查看 <a href="https://github.com/TimDettmers/bitsandbytes/releases/tag/0.40.0" rel="nofollow">这些发布说明</a> ,并确保使用大于 <code>0.40.0</code> 的版本以直接利用此功能。</p></li> <li><p><strong>训练:</strong> 根据 <a href="https://arxiv.org/abs/2305.14314" rel="nofollow">QLoRA 论文</a>,对于4位基模型训练(使用 LoRA 适配器),应使用 <code>bnb_4bit_quant_type='nf4'</code>。</p></li> <li><p><strong>推理:</strong> 对于推理,<code>bnb_4bit_quant_type</code> 对性能影响不大。但是为了与模型的权重保持一致,请确保使用相同的 <code>bnb_4bit_compute_dtype</code> 和 <code>torch_dtype</code> 参数。</p></li>',Yo,Vt,So,Lt,Gi="在调用 <code>.from_pretrained</code> 方法时使用 <code>load_in_4bit=True</code>,可以将您的内存使用量减少到大约原来的 1/4。",Do,Ht,Ko,F,Oo,At,ea,Rt,ji="您可以通过在调用 <code>.from_pretrained</code> 方法时使用 <code>load_in_8bit=True</code> 参数,将内存需求大致减半来加载模型",ta,Ft,na,Nt,Bi="然后,像通常使用 <code>PreTrainedModel</code> 一样使用您的模型。",la,It,Xi="您可以使用 <code>get_memory_footprint</code> 方法检查模型的内存占用。",oa,Pt,aa,Et,Vi="通过这种集成,我们能够在较小的设备上加载大模型并运行它们而没有任何问题。",sa,N,ia,Yt,ra,St,Li="在这里,我们将介绍使用 FP4 量化的一些高级用例。",ma,Dt,pa,Kt,Hi="计算数据类型用于改变在进行计算时使用的数据类型。例如,hidden states可以是 <code>float32</code>,但为了加速,计算时可以被设置为 <code>bf16</code>。默认情况下,计算数据类型被设置为 <code>float32</code>。",da,Ot,fa,en,ca,tn,Ai="您还可以使用 NF4 数据类型,这是一种针对使用正态分布初始化的权重而适应的新型 4 位数据类型。要运行:",ua,nn,ba,ln,ga,on,Ri="我们还建议用户使用嵌套量化技术。从我们的经验观察来看,这种方法在不增加额外性能的情况下节省更多内存。这使得 llama-13b 模型能够在具有 1024 个序列长度、1 个批次大小和 4 个梯度累积步骤的 NVIDIA-T4 16GB 上进行 fine-tuning。",ha,an,_a,sn,$a,rn,Fi=`您可以使用 <code>push_to_hub</code> 方法将量化模型推送到 Hub 上。这将首先推送量化配置文件,然后推送量化模型权重。 | |
| 请确保使用 <code>bitsandbytes>0.37.2</code>(在撰写本文时,我们使用的是 <code>bitsandbytes==0.38.0.post1</code>)才能使用此功能。`,ya,mn,Ma,I,Ta,pn,va,dn,Ni="您可以使用 <code>from_pretrained</code> 方法从 Hub 加载量化模型。请确保推送的权重是量化的,检查模型配置对象中是否存在 <code>quantization_config</code> 属性。",wa,fn,Ca,cn,Ii=`请注意,在这种情况下,您不需要指定 <code>load_in_8bit=True</code> 参数,但需要确保 <code>bitsandbytes</code> 和 <code>accelerate</code> 已安装。 | |
| 情注意,<code>device_map</code> 是可选的,但设置 <code>device_map = 'auto'</code> 更适合用于推理,因为它将更有效地调度可用资源上的模型。`,Ja,un,xa,bn,Pi="本节面向希望探索除了加载和运行 8 位模型之外还能做什么的进阶用户。",ka,gn,qa,hn,Ei="此高级用例之一是能够加载模型并将权重分派到 <code>CPU</code> 和 <code>GPU</code> 之间。请注意,将在 CPU 上分派的权重 <strong>不会</strong> 转换为 8 位,因此会保留为 <code>float32</code>。此功能适用于想要适应非常大的模型并将模型分派到 GPU 和 CPU 之间的用户。",Wa,_n,Yi='首先,从 <code>transformers</code> 中加载一个 <a href="/docs/transformers/pr_33914/zh/main_classes/quantization#transformers.BitsAndBytesConfig">BitsAndBytesConfig</a>,并将属性 <code>llm_int8_enable_fp32_cpu_offload</code> 设置为 <code>True</code>:',Za,$n,Ua,yn,Si="假设您想加载 <code>bigscience/bloom-1b7</code> 模型,您的 GPU显存仅足够容纳除了<code>lm_head</code>外的整个模型。因此,您可以按照以下方式编写自定义的 device_map:",Qa,Mn,za,Tn,Di="然后如下加载模型:",Ga,vn,ja,wn,Ki="这就是全部内容!享受您的模型吧!",Ba,Cn,Xa,Jn,Oi=`您可以使用 <code>llm_int8_threshold</code> 参数来更改异常值的阈值。“异常值”是一个大于特定阈值的<code>hidden state</code>值。 | |
| 这对应于<code>LLM.int8()</code>论文中描述的异常检测的异常阈值。任何高于此阈值的<code>hidden state</code>值都将被视为异常值,对这些值的操作将在 fp16 中完成。值通常是正态分布的,也就是说,大多数值在 [-3.5, 3.5] 范围内,但有一些额外的系统异常值,对于大模型来说,它们的分布非常不同。这些异常值通常在区间 [-60, -6] 或 [6, 60] 内。Int8 量化对于幅度为 ~5 的值效果很好,但超出这个范围,性能就会明显下降。一个好的默认阈值是 6,但对于更不稳定的模型(小模型、微调)可能需要更低的阈值。 | |
| 这个参数会影响模型的推理速度。我们建议尝试这个参数,以找到最适合您的用例的参数。`,Va,xn,La,kn,Ha,qn,er="一些模型有几个需要保持未转换状态以确保稳定性的模块。例如,Jukebox 模型有几个 <code>lm_head</code> 模块需要跳过。使用 <code>llm_int8_skip_modules</code> 参数进行相应操作。",Aa,Wn,Ra,Zn,Fa,Un,tr='借助Hugging Face生态系统中适配器(adapters)的官方支持,您可以在8位精度下微调模型。这使得可以在单个Google Colab中微调大模型,例如<code>flan-t5-large</code>或<code>facebook/opt-6.7b</code>。请查看<a href="https://github.com/huggingface/peft" rel="nofollow"><code>peft</code></a>库了解更多详情。',Na,Qn,nr="注意,加载模型进行训练时无需传递<code>device_map</code>。它将自动将您的模型加载到GPU上。如果需要,您可以将设备映射为特定设备(例如<code>cuda:0</code>、<code>0</code>、<code>torch.device('cuda:0')</code>)。请注意,<code>device_map=auto</code>仅应用于推理。",Ia,zn,Pa,y,Gn,hs,Dn,lr=`This is a wrapper class about all possible attributes and features that you can play with a model that has been | |
| loaded using <code>bitsandbytes</code>.`,_s,Kn,or="This replaces <code>load_in_8bit</code> or <code>load_in_4bit</code>therefore both options are mutually exclusive.",$s,On,ar=`Currently only supports <code>LLM.int8()</code>, <code>FP4</code>, and <code>NF4</code> quantization. If more methods are added to <code>bitsandbytes</code>, | |
| then more arguments will be added to this class.`,ys,P,jn,Ms,el,sr="Returns <code>True</code> if the model is quantizable, <code>False</code> otherwise.",Ts,E,Bn,vs,tl,ir="Safety checker that arguments are correct - also replaces some NoneType arguments with their default values.",ws,Y,Xn,Cs,nl,rr=`This method returns the quantization method used for the model. If the model is not quantizable, it returns | |
| <code>None</code>.`,Js,S,Vn,xs,ll,mr=`Removes all attributes from config which correspond to the default config attributes for better readability and | |
| serializes to a Python dictionary.`,Ea,Ln,Ya,Hn,pr='请查看<a href="https://huggingface.co/docs/optimum/index" rel="nofollow">Optimum 文档</a>以了解更多关于<code>optimum</code>支持的量化方法,并查看这些方法是否适用于您的用例。',Sa,An,Da,ol,Ka;return Q=new g({props:{title:"量化 🤗 Transformers 模型",local:"量化--transformers-模型",headingTag:"h1"}}),q=new g({props:{title:"AWQ集成",local:"awq集成",headingTag:"h2"}}),O=new g({props:{title:"量化一个模型",local:"量化一个模型",headingTag:"h3"}}),le=new g({props:{title:"加载一个量化的模型",local:"加载一个量化的模型",headingTag:"h3"}}),ae=new g({props:{title:"示例使用",local:"示例使用",headingTag:"h2"}}),ie=new _({props:{code:"cGlwJTIwaW5zdGFsbCUyMGF1dG9hd3E=",highlighted:"pip install autoawq",wrap:!1}}),re=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyVGhlQmxva2UlMkZ6ZXBoeXItN0ItYWxwaGEtQVdRJTIyJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBkZXZpY2VfbWFwJTNEJTIyY3VkYSUzQTAlMjIp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| model_id = <span class="hljs-string">"TheBloke/zephyr-7B-alpha-AWQ"</span> | |
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map=<span class="hljs-string">"cuda:0"</span>)`,wrap:!1}}),pe=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyVGhlQmxva2UlMkZ6ZXBoeXItN0ItYWxwaGEtQVdRJTIyJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQpLnRvKCUyMmN1ZGElM0EwJTIyKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| model_id = <span class="hljs-string">"TheBloke/zephyr-7B-alpha-AWQ"</span> | |
| model = AutoModelForCausalLM.from_pretrained(model_id).to(<span class="hljs-string">"cuda:0"</span>)`,wrap:!1}}),de=new g({props:{title:"结合 AWQ 和 Flash Attention",local:"结合-awq-和-flash-attention",headingTag:"h3"}}),ce=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUyMlRoZUJsb2tlJTJGemVwaHlyLTdCLWFscGhhLUFXUSUyMiUyQyUyMGF0dG5faW1wbGVtZW50YXRpb24lM0QlMjJmbGFzaF9hdHRlbnRpb25fMiUyMiUyQyUyMGRldmljZV9tYXAlM0QlMjJjdWRhJTNBMCUyMik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"TheBloke/zephyr-7B-alpha-AWQ"</span>, attn_implementation=<span class="hljs-string">"flash_attention_2"</span>, device_map=<span class="hljs-string">"cuda:0"</span>)`,wrap:!1}}),ue=new g({props:{title:"基准测试",local:"基准测试",headingTag:"h3"}}),ye=new g({props:{title:"Google colab 演示",local:"google-colab-演示",headingTag:"h3"}}),Te=new g({props:{title:"AwqConfig",local:"transformers.AwqConfig",headingTag:"h3"}}),ve=new Z({props:{name:"class transformers.AwqConfig",anchor:"transformers.AwqConfig",parameters:[{name:"bits",val:": int = 4"},{name:"group_size",val:": int = 128"},{name:"zero_point",val:": bool = True"},{name:"version",val:": AWQLinearVersion = <AWQLinearVersion.GEMM: 'gemm'>"},{name:"backend",val:": AwqBackendPackingMethod = <AwqBackendPackingMethod.AUTOAWQ: 'autoawq'>"},{name:"do_fuse",val:": Optional = None"},{name:"fuse_max_seq_len",val:": Optional = None"},{name:"modules_to_fuse",val:": Optional = None"},{name:"modules_to_not_convert",val:": Optional = None"},{name:"exllama_config",val:": Optional = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.AwqConfig.bits",description:`<strong>bits</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — | |
| The number of bits to quantize to.`,name:"bits"},{anchor:"transformers.AwqConfig.group_size",description:`<strong>group_size</strong> (<code>int</code>, <em>optional</em>, defaults to 128) — | |
| The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization.`,name:"group_size"},{anchor:"transformers.AwqConfig.zero_point",description:`<strong>zero_point</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use zero point quantization.`,name:"zero_point"},{anchor:"transformers.AwqConfig.version",description:`<strong>version</strong> (<code>AWQLinearVersion</code>, <em>optional</em>, defaults to <code>AWQLinearVersion.GEMM</code>) — | |
| The version of the quantization algorithm to use. GEMM is better for big batch_size (e.g. >= 8) otherwise, | |
| GEMV is better (e.g. < 8 ). GEMM models are compatible with Exllama kernels.`,name:"version"},{anchor:"transformers.AwqConfig.backend",description:`<strong>backend</strong> (<code>AwqBackendPackingMethod</code>, <em>optional</em>, defaults to <code>AwqBackendPackingMethod.AUTOAWQ</code>) — | |
| The quantization backend. Some models might be quantized using <code>llm-awq</code> backend. This is useful for users | |
| that quantize their own models using <code>llm-awq</code> library.`,name:"backend"},{anchor:"transformers.AwqConfig.do_fuse",description:`<strong>do_fuse</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to fuse attention and mlp layers together for faster inference`,name:"do_fuse"},{anchor:"transformers.AwqConfig.fuse_max_seq_len",description:`<strong>fuse_max_seq_len</strong> (<code>int</code>, <em>optional</em>) — | |
| The Maximum sequence length to generate when using fusing.`,name:"fuse_max_seq_len"},{anchor:"transformers.AwqConfig.modules_to_fuse",description:`<strong>modules_to_fuse</strong> (<code>dict</code>, <em>optional</em>, default to <code>None</code>) — | |
| Overwrite the natively supported fusing scheme with the one specified by the users.`,name:"modules_to_fuse"},{anchor:"transformers.AwqConfig.modules_to_not_convert",description:`<strong>modules_to_not_convert</strong> (<code>list</code>, <em>optional</em>, default to <code>None</code>) — | |
| The list of modules to not quantize, useful for quantizing models that explicitly require to have | |
| some modules left in their original precision (e.g. Whisper encoder, Llava encoder, Mixtral gate layers). | |
| Note you cannot quantize directly with transformers, please refer to <code>AutoAWQ</code> documentation for quantizing HF models.`,name:"modules_to_not_convert"},{anchor:"transformers.AwqConfig.exllama_config",description:`<strong>exllama_config</strong> (<code>Dict[str, Any]</code>, <em>optional</em>) — | |
| You can specify the version of the exllama kernel through the <code>version</code> key, the maximum sequence | |
| length through the <code>max_input_len</code> key, and the maximum batch size through the <code>max_batch_size</code> key. | |
| Defaults to <code>{"version": 2, "max_input_len": 2048, "max_batch_size": 8}</code> if unset.`,name:"exllama_config"}],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L760"}}),we=new Z({props:{name:"post_init",anchor:"transformers.AwqConfig.post_init",parameters:[],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L829"}}),Ce=new g({props:{title:"AutoGPTQ 集成",local:"autogptq-集成",headingTag:"h2"}}),qe=new g({props:{title:"要求",local:"要求",headingTag:"h3"}}),Qe=new g({props:{title:"加载和量化模型",local:"加载和量化模型",headingTag:"h3"}}),je=new g({props:{title:"GPTQ 配置",local:"gptq-配置",headingTag:"h4"}}),Xe=new _({props:{code:"bW9kZWxfaWQlMjAlM0QlMjAlMjJmYWNlYm9vayUyRm9wdC0xMjVtJTIyJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQpJTBBZ3B0cV9jb25maWclMjAlM0QlMjBHUFRRQ29uZmlnKGJpdHMlM0Q0JTJDJTIwZGF0YXNldCUyMCUzRCUyMCUyMmM0JTIyJTJDJTIwdG9rZW5pemVyJTNEdG9rZW5pemVyKQ==",highlighted:`model_id = <span class="hljs-string">"facebook/opt-125m"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| gptq_config = GPTQConfig(bits=<span class="hljs-number">4</span>, dataset = <span class="hljs-string">"c4"</span>, tokenizer=tokenizer)`,wrap:!1}}),Le=new _({props:{code:"ZGF0YXNldCUyMCUzRCUyMCU1QiUyMmF1dG8tZ3B0cSUyMGlzJTIwYW4lMjBlYXN5LXRvLXVzZSUyMG1vZGVsJTIwcXVhbnRpemF0aW9uJTIwbGlicmFyeSUyMHdpdGglMjB1c2VyLWZyaWVuZGx5JTIwYXBpcyUyQyUyMGJhc2VkJTIwb24lMjBHUFRRJTIwYWxnb3JpdGhtLiUyMiU1RCUwQXF1YW50aXphdGlvbiUyMCUzRCUyMEdQVFFDb25maWcoYml0cyUzRDQlMkMlMjBkYXRhc2V0JTIwJTNEJTIwZGF0YXNldCUyQyUyMHRva2VuaXplciUzRHRva2VuaXplcik=",highlighted:`dataset = [<span class="hljs-string">"auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."</span>] | |
| quantization = GPTQConfig(bits=<span class="hljs-number">4</span>, dataset = dataset, tokenizer=tokenizer)`,wrap:!1}}),He=new g({props:{title:"量化",local:"量化",headingTag:"h4"}}),Re=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEZ3B0cV9jb25maWcpJTBB",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=gptq_config) | |
| `,wrap:!1}}),Ie=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUyMHF1YW50aXphdGlvbl9jb25maWclM0RncHRxX2NvbmZpZyk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map=<span class="hljs-string">"auto"</span>, quantization_config=gptq_config)`,wrap:!1}}),L=new ss({props:{warning:!0,$$slots:{default:[$r]},$$scope:{ctx:U}}}),Ee=new g({props:{title:"推送量化模型到 🤗 Hub",local:"推送量化模型到--hub",headingTag:"h3"}}),Se=new _({props:{code:"cXVhbnRpemVkX21vZGVsLnB1c2hfdG9faHViKCUyMm9wdC0xMjVtLWdwdHElMjIpJTBBdG9rZW5pemVyLnB1c2hfdG9faHViKCUyMm9wdC0xMjVtLWdwdHElMjIp",highlighted:`quantized_model.push_to_hub(<span class="hljs-string">"opt-125m-gptq"</span>) | |
| tokenizer.push_to_hub(<span class="hljs-string">"opt-125m-gptq"</span>)`,wrap:!1}}),Ke=new _({props:{code:"cXVhbnRpemVkX21vZGVsLnNhdmVfcHJldHJhaW5lZCglMjJvcHQtMTI1bS1ncHRxJTIyKSUwQXRva2VuaXplci5zYXZlX3ByZXRyYWluZWQoJTIyb3B0LTEyNW0tZ3B0cSUyMik=",highlighted:`quantized_model.save_pretrained(<span class="hljs-string">"opt-125m-gptq"</span>) | |
| tokenizer.save_pretrained(<span class="hljs-string">"opt-125m-gptq"</span>)`,wrap:!1}}),et=new _({props:{code:"cXVhbnRpemVkX21vZGVsLnRvKCUyMmNwdSUyMiklMEFxdWFudGl6ZWRfbW9kZWwuc2F2ZV9wcmV0cmFpbmVkKCUyMm9wdC0xMjVtLWdwdHElMjIp",highlighted:`quantized_model.to(<span class="hljs-string">"cpu"</span>) | |
| quantized_model.save_pretrained(<span class="hljs-string">"opt-125m-gptq"</span>)`,wrap:!1}}),tt=new g({props:{title:"从 🤗 Hub 加载一个量化模型",local:"从--hub-加载一个量化模型",headingTag:"h3"}}),lt=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoJTIyJTdCeW91cl91c2VybmFtZSU3RCUyRm9wdC0xMjVtLWdwdHElMjIp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"{your_username}/opt-125m-gptq"</span>)`,wrap:!1}}),at=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoJTIyJTdCeW91cl91c2VybmFtZSU3RCUyRm9wdC0xMjVtLWdwdHElMjIlMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"{your_username}/opt-125m-gptq"</span>, device_map=<span class="hljs-string">"auto"</span>)`,wrap:!1}}),st=new g({props:{title:"Exllama内核加快推理速度",local:"exllama内核加快推理速度",headingTag:"h3"}}),rt=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFncHRxX2NvbmZpZyUyMCUzRCUyMEdQVFFDb25maWcoYml0cyUzRDQpJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoJTIyJTdCeW91cl91c2VybmFtZSU3RCUyRm9wdC0xMjVtLWdwdHElMjIlMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUyMHF1YW50aXphdGlvbl9jb25maWclM0RncHRxX2NvbmZpZyk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| gptq_config = GPTQConfig(bits=<span class="hljs-number">4</span>) | |
| model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"{your_username}/opt-125m-gptq"</span>, device_map=<span class="hljs-string">"auto"</span>, quantization_config=gptq_config)`,wrap:!1}}),pt=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFncHRxX2NvbmZpZyUyMCUzRCUyMEdQVFFDb25maWcoYml0cyUzRDQlMkMlMjBleGxsYW1hX2NvbmZpZyUzRCU3QiUyMnZlcnNpb24lMjIlM0EyJTdEKSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUyMiU3QnlvdXJfdXNlcm5hbWUlN0QlMkZvcHQtMTI1bS1ncHRxJTIyJTJDJTIwZGV2aWNlX21hcCUzRCUyMmF1dG8lMjIlMkMlMjBxdWFudGl6YXRpb25fY29uZmlnJTIwJTNEJTIwZ3B0cV9jb25maWcp",highlighted:`<span class="hljs-keyword">import</span> torch | |
| gptq_config = GPTQConfig(bits=<span class="hljs-number">4</span>, exllama_config={<span class="hljs-string">"version"</span>:<span class="hljs-number">2</span>}) | |
| model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"{your_username}/opt-125m-gptq"</span>, device_map=<span class="hljs-string">"auto"</span>, quantization_config = gptq_config)`,wrap:!1}}),ct=new g({props:{title:"微调一个量化模型",local:"微调一个量化模型",headingTag:"h4"}}),bt=new g({props:{title:"示例演示",local:"示例演示",headingTag:"h3"}}),ht=new g({props:{title:"GPTQConfig",local:"transformers.GPTQConfig",headingTag:"h3"}}),_t=new Z({props:{name:"class transformers.GPTQConfig",anchor:"transformers.GPTQConfig",parameters:[{name:"bits",val:": int"},{name:"tokenizer",val:": Any = None"},{name:"dataset",val:": Union = None"},{name:"group_size",val:": int = 128"},{name:"damp_percent",val:": float = 0.1"},{name:"desc_act",val:": bool = False"},{name:"sym",val:": bool = True"},{name:"true_sequential",val:": bool = True"},{name:"use_cuda_fp16",val:": bool = False"},{name:"model_seqlen",val:": Optional = None"},{name:"block_name_to_quantize",val:": Optional = None"},{name:"module_name_preceding_first_block",val:": Optional = None"},{name:"batch_size",val:": int = 1"},{name:"pad_token_id",val:": Optional = None"},{name:"use_exllama",val:": Optional = None"},{name:"max_input_length",val:": Optional = None"},{name:"exllama_config",val:": Optional = None"},{name:"cache_block_outputs",val:": bool = True"},{name:"modules_in_block_to_quantize",val:": Optional = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.GPTQConfig.bits",description:`<strong>bits</strong> (<code>int</code>) — | |
| The number of bits to quantize to, supported numbers are (2, 3, 4, 8).`,name:"bits"},{anchor:"transformers.GPTQConfig.tokenizer",description:`<strong>tokenizer</strong> (<code>str</code> or <code>PreTrainedTokenizerBase</code>, <em>optional</em>) — | |
| The tokenizer used to process the dataset. You can pass either:<ul> | |
| <li>A custom tokenizer object.</li> | |
| <li>A string, the <em>model id</em> of a predefined tokenizer hosted inside a model repo on huggingface.co.</li> | |
| <li>A path to a <em>directory</em> containing vocabulary files required by the tokenizer, for instance saved | |
| using the <a href="/docs/transformers/pr_33914/zh/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.save_pretrained">save_pretrained()</a> method, e.g., <code>./my_model_directory/</code>.</li> | |
| </ul>`,name:"tokenizer"},{anchor:"transformers.GPTQConfig.dataset",description:`<strong>dataset</strong> (<code>Union[List[str]]</code>, <em>optional</em>) — | |
| The dataset used for quantization. You can provide your own dataset in a list of string or just use the | |
| original datasets used in GPTQ paper [‘wikitext2’,‘c4’,‘c4-new’]`,name:"dataset"},{anchor:"transformers.GPTQConfig.group_size",description:`<strong>group_size</strong> (<code>int</code>, <em>optional</em>, defaults to 128) — | |
| The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization.`,name:"group_size"},{anchor:"transformers.GPTQConfig.damp_percent",description:`<strong>damp_percent</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) — | |
| The percent of the average Hessian diagonal to use for dampening. Recommended value is 0.1.`,name:"damp_percent"},{anchor:"transformers.GPTQConfig.desc_act",description:`<strong>desc_act</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to quantize columns in order of decreasing activation size. Setting it to False can significantly | |
| speed up inference but the perplexity may become slightly worse. Also known as act-order.`,name:"desc_act"},{anchor:"transformers.GPTQConfig.sym",description:`<strong>sym</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to use symetric quantization.`,name:"sym"},{anchor:"transformers.GPTQConfig.true_sequential",description:`<strong>true_sequential</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to perform sequential quantization even within a single Transformer block. Instead of quantizing | |
| the entire block at once, we perform layer-wise quantization. As a result, each layer undergoes | |
| quantization using inputs that have passed through the previously quantized layers.`,name:"true_sequential"},{anchor:"transformers.GPTQConfig.use_cuda_fp16",description:`<strong>use_cuda_fp16</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not to use optimized cuda kernel for fp16 model. Need to have model in fp16.`,name:"use_cuda_fp16"},{anchor:"transformers.GPTQConfig.model_seqlen",description:`<strong>model_seqlen</strong> (<code>int</code>, <em>optional</em>) — | |
| The maximum sequence length that the model can take.`,name:"model_seqlen"},{anchor:"transformers.GPTQConfig.block_name_to_quantize",description:`<strong>block_name_to_quantize</strong> (<code>str</code>, <em>optional</em>) — | |
| The transformers block name to quantize. If None, we will infer the block name using common patterns (e.g. model.layers)`,name:"block_name_to_quantize"},{anchor:"transformers.GPTQConfig.module_name_preceding_first_block",description:`<strong>module_name_preceding_first_block</strong> (<code>List[str]</code>, <em>optional</em>) — | |
| The layers that are preceding the first Transformer block.`,name:"module_name_preceding_first_block"},{anchor:"transformers.GPTQConfig.batch_size",description:`<strong>batch_size</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| The batch size used when processing the dataset`,name:"batch_size"},{anchor:"transformers.GPTQConfig.pad_token_id",description:`<strong>pad_token_id</strong> (<code>int</code>, <em>optional</em>) — | |
| The pad token id. Needed to prepare the dataset when <code>batch_size</code> > 1.`,name:"pad_token_id"},{anchor:"transformers.GPTQConfig.use_exllama",description:`<strong>use_exllama</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether to use exllama backend. Defaults to <code>True</code> if unset. Only works with <code>bits</code> = 4.`,name:"use_exllama"},{anchor:"transformers.GPTQConfig.max_input_length",description:`<strong>max_input_length</strong> (<code>int</code>, <em>optional</em>) — | |
| The maximum input length. This is needed to initialize a buffer that depends on the maximum expected input | |
| length. It is specific to the exllama backend with act-order.`,name:"max_input_length"},{anchor:"transformers.GPTQConfig.exllama_config",description:`<strong>exllama_config</strong> (<code>Dict[str, Any]</code>, <em>optional</em>) — | |
| The exllama config. You can specify the version of the exllama kernel through the <code>version</code> key. Defaults | |
| to <code>{"version": 1}</code> if unset.`,name:"exllama_config"},{anchor:"transformers.GPTQConfig.cache_block_outputs",description:`<strong>cache_block_outputs</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to cache block outputs to reuse as inputs for the succeeding block.`,name:"cache_block_outputs"},{anchor:"transformers.GPTQConfig.modules_in_block_to_quantize",description:`<strong>modules_in_block_to_quantize</strong> (<code>List[List[str]]</code>, <em>optional</em>) — | |
| List of list of module names to quantize in the specified block. This argument is useful to exclude certain linear modules from being quantized. | |
| The block to quantize can be specified by setting <code>block_name_to_quantize</code>. We will quantize each list sequentially. If not set, we will quantize all linear layers. | |
| Example: <code>modules_in_block_to_quantize =[["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"], ["self_attn.o_proj"]]</code>. | |
| In this example, we will first quantize the q,k,v layers simultaneously since they are independent. | |
| Then, we will quantize <code>self_attn.o_proj</code> layer with the q,k,v layers quantized. This way, we will get | |
| better results since it reflects the real input <code>self_attn.o_proj</code> will get when the model is quantized.`,name:"modules_in_block_to_quantize"}],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L543"}}),$t=new Z({props:{name:"from_dict_optimum",anchor:"transformers.GPTQConfig.from_dict_optimum",parameters:[{name:"config_dict",val:""}],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L745"}}),yt=new Z({props:{name:"post_init",anchor:"transformers.GPTQConfig.post_init",parameters:[],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L657"}}),Mt=new Z({props:{name:"to_dict_optimum",anchor:"transformers.GPTQConfig.to_dict_optimum",parameters:[],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L736"}}),Tt=new g({props:{title:"bitsandbytes 集成",local:"bitsandbytes-集成",headingTag:"h2"}}),kt=new g({props:{title:"通用用法",local:"通用用法",headingTag:"h3"}}),Wt=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBJTBBbW9kZWxfOGJpdCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMjJmYWNlYm9vayUyRm9wdC0zNTBtJTIyJTJDJTIwbG9hZF9pbl84Yml0JTNEVHJ1ZSklMEFtb2RlbF80Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUyMmZhY2Vib29rJTJGb3B0LTM1MG0lMjIlMkMlMjBsb2FkX2luXzRiaXQlM0RUcnVlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| model_8bit = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"facebook/opt-350m"</span>, load_in_8bit=<span class="hljs-literal">True</span>) | |
| model_4bit = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"facebook/opt-350m"</span>, load_in_4bit=<span class="hljs-literal">True</span>)`,wrap:!1}}),Ut=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0lMEElMEFtb2RlbF84Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUyMmZhY2Vib29rJTJGb3B0LTM1MG0lMjIlMkMlMjBsb2FkX2luXzhiaXQlM0RUcnVlJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDMyKSUwQW1vZGVsXzhiaXQubW9kZWwuZGVjb2Rlci5sYXllcnMlNUItMSU1RC5maW5hbF9sYXllcl9ub3JtLndlaWdodC5kdHlwZQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| <span class="hljs-meta">>>> </span>model_8bit = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"facebook/opt-350m"</span>, load_in_8bit=<span class="hljs-literal">True</span>, torch_dtype=torch.float32) | |
| <span class="hljs-meta">>>> </span>model_8bit.model.decoder.layers[-<span class="hljs-number">1</span>].final_layer_norm.weight.dtype | |
| torch.float32`,wrap:!1}}),Qt=new g({props:{title:"FP4 量化",local:"fp4-量化",headingTag:"h3"}}),zt=new g({props:{title:"要求",local:"要求",headingTag:"h4"}}),Bt=new g({props:{title:"提示和最佳实践",local:"提示和最佳实践",headingTag:"h4"}}),Vt=new g({props:{title:"加载 4 位量化的大模型",local:"加载-4-位量化的大模型",headingTag:"h4"}}),Ht=new _({props:{code:"JTIzJTIwcGlwJTIwaW5zdGFsbCUyMHRyYW5zZm9ybWVycyUyMGFjY2VsZXJhdGUlMjBiaXRzYW5kYnl0ZXMlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0lMkMlMjBBdXRvVG9rZW5pemVyJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJiaWdzY2llbmNlJTJGYmxvb20tMWI3JTIyJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQpJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUyMGxvYWRfaW5fNGJpdCUzRFRydWUp",highlighted:`<span class="hljs-comment"># pip install transformers accelerate bitsandbytes</span> | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| model_id = <span class="hljs-string">"bigscience/bloom-1b7"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map=<span class="hljs-string">"auto"</span>, load_in_4bit=<span class="hljs-literal">True</span>)`,wrap:!1}}),F=new ss({props:{warning:!0,$$slots:{default:[yr]},$$scope:{ctx:U}}}),At=new g({props:{title:"加载 8 位量化的大模型",local:"加载-8-位量化的大模型",headingTag:"h3"}}),Ft=new _({props:{code:"JTIzJTIwcGlwJTIwaW5zdGFsbCUyMHRyYW5zZm9ybWVycyUyMGFjY2VsZXJhdGUlMjBiaXRzYW5kYnl0ZXMlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0lMkMlMjBBdXRvVG9rZW5pemVyJTJDJTIwQml0c0FuZEJ5dGVzQ29uZmlnJTBBJTBBbW9kZWxfaWQlMjAlM0QlMjAlMjJiaWdzY2llbmNlJTJGYmxvb20tMWI3JTIyJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQpJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEQml0c0FuZEJ5dGVzQ29uZmlnKGxvYWRfaW5fOGJpdCUzRFRydWUpKQ==",highlighted:`<span class="hljs-comment"># pip install transformers accelerate bitsandbytes</span> | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| model_id = <span class="hljs-string">"bigscience/bloom-1b7"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>))`,wrap:!1}}),Pt=new _({props:{code:"cHJpbnQobW9kZWwuZ2V0X21lbW9yeV9mb290cHJpbnQoKSk=",highlighted:'<span class="hljs-built_in">print</span>(model.get_memory_footprint())',wrap:!1}}),N=new ss({props:{warning:!0,$$slots:{default:[Mr]},$$scope:{ctx:U}}}),Yt=new g({props:{title:"高级用例",local:"高级用例",headingTag:"h4"}}),Dt=new g({props:{title:"更改计算数据类型",local:"更改计算数据类型",headingTag:"h5"}}),Ot=new _({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQml0c0FuZEJ5dGVzQ29uZmlnJTBBJTBBcXVhbnRpemF0aW9uX2NvbmZpZyUyMCUzRCUyMEJpdHNBbmRCeXRlc0NvbmZpZyhsb2FkX2luXzRiaXQlM0RUcnVlJTJDJTIwYm5iXzRiaXRfY29tcHV0ZV9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig | |
| quantization_config = BitsAndBytesConfig(load_in_4bit=<span class="hljs-literal">True</span>, bnb_4bit_compute_dtype=torch.bfloat16)`,wrap:!1}}),en=new g({props:{title:"使用 NF4(普通浮点数 4)数据类型",local:"使用-nf4普通浮点数-4数据类型",headingTag:"h4"}}),nn=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEJpdHNBbmRCeXRlc0NvbmZpZyUwQSUwQW5mNF9jb25maWclMjAlM0QlMjBCaXRzQW5kQnl0ZXNDb25maWcoJTBBJTIwJTIwJTIwJTIwbG9hZF9pbl80Yml0JTNEVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMGJuYl80Yml0X3F1YW50X3R5cGUlM0QlMjJuZjQlMjIlMkMlMEEpJTBBJTBBbW9kZWxfbmY0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRG5mNF9jb25maWcp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig | |
| nf4_config = BitsAndBytesConfig( | |
| load_in_4bit=<span class="hljs-literal">True</span>, | |
| bnb_4bit_quant_type=<span class="hljs-string">"nf4"</span>, | |
| ) | |
| model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)`,wrap:!1}}),ln=new g({props:{title:"使用嵌套量化进行更高效的内存推理",local:"使用嵌套量化进行更高效的内存推理",headingTag:"h4"}}),an=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEJpdHNBbmRCeXRlc0NvbmZpZyUwQSUwQWRvdWJsZV9xdWFudF9jb25maWclMjAlM0QlMjBCaXRzQW5kQnl0ZXNDb25maWcoJTBBJTIwJTIwJTIwJTIwbG9hZF9pbl80Yml0JTNEVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMGJuYl80Yml0X3VzZV9kb3VibGVfcXVhbnQlM0RUcnVlJTJDJTBBKSUwQSUwQW1vZGVsX2RvdWJsZV9xdWFudCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCUyQyUyMHF1YW50aXphdGlvbl9jb25maWclM0Rkb3VibGVfcXVhbnRfY29uZmlnKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig | |
| double_quant_config = BitsAndBytesConfig( | |
| load_in_4bit=<span class="hljs-literal">True</span>, | |
| bnb_4bit_use_double_quant=<span class="hljs-literal">True</span>, | |
| ) | |
| model_double_quant = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=double_quant_config)`,wrap:!1}}),sn=new g({props:{title:"将量化模型推送到🤗 Hub",local:"将量化模型推送到-hub",headingTag:"h3"}}),mn=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUyQyUyMEJpdHNBbmRCeXRlc0NvbmZpZyUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUyMmJpZ3NjaWVuY2UlMkZibG9vbS01NjBtJTIyJTJDJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzREJpdHNBbmRCeXRlc0NvbmZpZyhsb2FkX2luXzhiaXQlM0RUcnVlKSklMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJiaWdzY2llbmNlJTJGYmxvb20tNTYwbSUyMiklMEElMEFtb2RlbC5wdXNoX3RvX2h1YiglMjJibG9vbS01NjBtLThiaXQlMjIp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"bigscience/bloom-560m"</span>, quantization_config=BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>)) | |
| tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"bigscience/bloom-560m"</span>) | |
| model.push_to_hub(<span class="hljs-string">"bloom-560m-8bit"</span>)`,wrap:!1}}),I=new ss({props:{warning:!0,$$slots:{default:[Tr]},$$scope:{ctx:U}}}),pn=new g({props:{title:"从🤗 Hub加载量化模型",local:"从-hub加载量化模型",headingTag:"h3"}}),fn=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUyMiU3QnlvdXJfdXNlcm5hbWUlN0QlMkZibG9vbS01NjBtLThiaXQlMjIlMkMlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"{your_username}/bloom-560m-8bit"</span>, device_map=<span class="hljs-string">"auto"</span>)`,wrap:!1}}),un=new g({props:{title:"高级用例",local:"高级用例",headingTag:"h3"}}),gn=new g({props:{title:"在 cpu 和 gpu 之间卸载",local:"在-cpu-和-gpu-之间卸载",headingTag:"h4"}}),$n=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUyQyUyMEJpdHNBbmRCeXRlc0NvbmZpZyUwQSUwQXF1YW50aXphdGlvbl9jb25maWclMjAlM0QlMjBCaXRzQW5kQnl0ZXNDb25maWcobGxtX2ludDhfZW5hYmxlX2ZwMzJfY3B1X29mZmxvYWQlM0RUcnVlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=<span class="hljs-literal">True</span>)`,wrap:!1}}),Mn=new _({props:{code:"ZGV2aWNlX21hcCUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMnRyYW5zZm9ybWVyLndvcmRfZW1iZWRkaW5ncyUyMiUzQSUyMDAlMkMlMEElMjAlMjAlMjAlMjAlMjJ0cmFuc2Zvcm1lci53b3JkX2VtYmVkZGluZ3NfbGF5ZXJub3JtJTIyJTNBJTIwMCUyQyUwQSUyMCUyMCUyMCUyMCUyMmxtX2hlYWQlMjIlM0ElMjAlMjJjcHUlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJ0cmFuc2Zvcm1lci5oJTIyJTNBJTIwMCUyQyUwQSUyMCUyMCUyMCUyMCUyMnRyYW5zZm9ybWVyLmxuX2YlMjIlM0ElMjAwJTJDJTBBJTdE",highlighted:`device_map = { | |
| <span class="hljs-string">"transformer.word_embeddings"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-string">"transformer.word_embeddings_layernorm"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-string">"lm_head"</span>: <span class="hljs-string">"cpu"</span>, | |
| <span class="hljs-string">"transformer.h"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-string">"transformer.ln_f"</span>: <span class="hljs-number">0</span>, | |
| }`,wrap:!1}}),vn=new _({props:{code:"bW9kZWxfOGJpdCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJiaWdzY2llbmNlJTJGYmxvb20tMWI3JTIyJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlX21hcCUzRGRldmljZV9tYXAlMkMlMEElMjAlMjAlMjAlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEcXVhbnRpemF0aW9uX2NvbmZpZyUyQyUwQSk=",highlighted:`model_8bit = AutoModelForCausalLM.from_pretrained( | |
| <span class="hljs-string">"bigscience/bloom-1b7"</span>, | |
| device_map=device_map, | |
| quantization_config=quantization_config, | |
| )`,wrap:!1}}),Cn=new g({props:{title:"使用 llm_int8_threshold",local:"使用-llmint8threshold",headingTag:"h4"}}),xn=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUyQyUyMEJpdHNBbmRCeXRlc0NvbmZpZyUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyYmlnc2NpZW5jZSUyRmJsb29tLTFiNyUyMiUwQSUwQXF1YW50aXphdGlvbl9jb25maWclMjAlM0QlMjBCaXRzQW5kQnl0ZXNDb25maWcoJTBBJTIwJTIwJTIwJTIwbGxtX2ludDhfdGhyZXNob2xkJTNEMTAlMkMlMEEpJTBBJTBBbW9kZWxfOGJpdCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUwQSUyMCUyMCUyMCUyMGRldmljZV9tYXAlM0RkZXZpY2VfbWFwJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRHF1YW50aXphdGlvbl9jb25maWclMkMlMEEpJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| model_id = <span class="hljs-string">"bigscience/bloom-1b7"</span> | |
| quantization_config = BitsAndBytesConfig( | |
| llm_int8_threshold=<span class="hljs-number">10</span>, | |
| ) | |
| model_8bit = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| device_map=device_map, | |
| quantization_config=quantization_config, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id)`,wrap:!1}}),kn=new g({props:{title:"跳过某些模块的转换",local:"跳过某些模块的转换",headingTag:"h4"}}),Wn=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTJDJTIwQXV0b1Rva2VuaXplciUyQyUyMEJpdHNBbmRCeXRlc0NvbmZpZyUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIyYmlnc2NpZW5jZSUyRmJsb29tLTFiNyUyMiUwQSUwQXF1YW50aXphdGlvbl9jb25maWclMjAlM0QlMjBCaXRzQW5kQnl0ZXNDb25maWcoJTBBJTIwJTIwJTIwJTIwbGxtX2ludDhfc2tpcF9tb2R1bGVzJTNEJTVCJTIybG1faGVhZCUyMiU1RCUyQyUwQSklMEElMEFtb2RlbF84Yml0JTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX2lkJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlX21hcCUzRGRldmljZV9tYXAlMkMlMEElMjAlMjAlMjAlMjBxdWFudGl6YXRpb25fY29uZmlnJTNEcXVhbnRpemF0aW9uX2NvbmZpZyUyQyUwQSklMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| model_id = <span class="hljs-string">"bigscience/bloom-1b7"</span> | |
| quantization_config = BitsAndBytesConfig( | |
| llm_int8_skip_modules=[<span class="hljs-string">"lm_head"</span>], | |
| ) | |
| model_8bit = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| device_map=device_map, | |
| quantization_config=quantization_config, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id)`,wrap:!1}}),Zn=new g({props:{title:"微调已加载为8位精度的模型",local:"微调已加载为8位精度的模型",headingTag:"h4"}}),zn=new g({props:{title:"BitsAndBytesConfig",local:"transformers.BitsAndBytesConfig",headingTag:"h3"}}),Gn=new Z({props:{name:"class transformers.BitsAndBytesConfig",anchor:"transformers.BitsAndBytesConfig",parameters:[{name:"load_in_8bit",val:" = False"},{name:"load_in_4bit",val:" = False"},{name:"llm_int8_threshold",val:" = 6.0"},{name:"llm_int8_skip_modules",val:" = None"},{name:"llm_int8_enable_fp32_cpu_offload",val:" = False"},{name:"llm_int8_has_fp16_weight",val:" = False"},{name:"bnb_4bit_compute_dtype",val:" = None"},{name:"bnb_4bit_quant_type",val:" = 'fp4'"},{name:"bnb_4bit_use_double_quant",val:" = False"},{name:"bnb_4bit_quant_storage",val:" = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.BitsAndBytesConfig.load_in_8bit",description:`<strong>load_in_8bit</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| This flag is used to enable 8-bit quantization with LLM.int8().`,name:"load_in_8bit"},{anchor:"transformers.BitsAndBytesConfig.load_in_4bit",description:`<strong>load_in_4bit</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| This flag is used to enable 4-bit quantization by replacing the Linear layers with FP4/NF4 layers from | |
| <code>bitsandbytes</code>.`,name:"load_in_4bit"},{anchor:"transformers.BitsAndBytesConfig.llm_int8_threshold",description:`<strong>llm_int8_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 6.0) — | |
| This corresponds to the outlier threshold for outlier detection as described in <code>LLM.int8() : 8-bit Matrix Multiplication for Transformers at Scale</code> paper: <a href="https://arxiv.org/abs/2208.07339" rel="nofollow">https://arxiv.org/abs/2208.07339</a> Any hidden states value | |
| that is above this threshold will be considered an outlier and the operation on those values will be done | |
| in fp16. Values are usually normally distributed, that is, most values are in the range [-3.5, 3.5], but | |
| there are some exceptional systematic outliers that are very differently distributed for large models. | |
| These outliers are often in the interval [-60, -6] or [6, 60]. Int8 quantization works well for values of | |
| magnitude ~5, but beyond that, there is a significant performance penalty. A good default threshold is 6, | |
| but a lower threshold might be needed for more unstable models (small models, fine-tuning).`,name:"llm_int8_threshold"},{anchor:"transformers.BitsAndBytesConfig.llm_int8_skip_modules",description:`<strong>llm_int8_skip_modules</strong> (<code>List[str]</code>, <em>optional</em>) — | |
| An explicit list of the modules that we do not want to convert in 8-bit. This is useful for models such as | |
| Jukebox that has several heads in different places and not necessarily at the last position. For example | |
| for <code>CausalLM</code> models, the last <code>lm_head</code> is kept in its original <code>dtype</code>.`,name:"llm_int8_skip_modules"},{anchor:"transformers.BitsAndBytesConfig.llm_int8_enable_fp32_cpu_offload",description:`<strong>llm_int8_enable_fp32_cpu_offload</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| This flag is used for advanced use cases and users that are aware of this feature. If you want to split | |
| your model in different parts and run some parts in int8 on GPU and some parts in fp32 on CPU, you can use | |
| this flag. This is useful for offloading large models such as <code>google/flan-t5-xxl</code>. Note that the int8 | |
| operations will not be run on CPU.`,name:"llm_int8_enable_fp32_cpu_offload"},{anchor:"transformers.BitsAndBytesConfig.llm_int8_has_fp16_weight",description:`<strong>llm_int8_has_fp16_weight</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| This flag runs LLM.int8() with 16-bit main weights. This is useful for fine-tuning as the weights do not | |
| have to be converted back and forth for the backward pass.`,name:"llm_int8_has_fp16_weight"},{anchor:"transformers.BitsAndBytesConfig.bnb_4bit_compute_dtype",description:`<strong>bnb_4bit_compute_dtype</strong> (<code>torch.dtype</code> or str, <em>optional</em>, defaults to <code>torch.float32</code>) — | |
| This sets the computational type which might be different than the input type. For example, inputs might be | |
| fp32, but computation can be set to bf16 for speedups.`,name:"bnb_4bit_compute_dtype"},{anchor:"transformers.BitsAndBytesConfig.bnb_4bit_quant_type",description:`<strong>bnb_4bit_quant_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"fp4"</code>) — | |
| This sets the quantization data type in the bnb.nn.Linear4Bit layers. Options are FP4 and NF4 data types | |
| which are specified by <code>fp4</code> or <code>nf4</code>.`,name:"bnb_4bit_quant_type"},{anchor:"transformers.BitsAndBytesConfig.bnb_4bit_use_double_quant",description:`<strong>bnb_4bit_use_double_quant</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| This flag is used for nested quantization where the quantization constants from the first quantization are | |
| quantized again.`,name:"bnb_4bit_use_double_quant"},{anchor:"transformers.BitsAndBytesConfig.bnb_4bit_quant_storage",description:`<strong>bnb_4bit_quant_storage</strong> (<code>torch.dtype</code> or str, <em>optional</em>, defaults to <code>torch.uint8</code>) — | |
| This sets the storage type to pack the quanitzed 4-bit prarams.`,name:"bnb_4bit_quant_storage"},{anchor:"transformers.BitsAndBytesConfig.kwargs",description:`<strong>kwargs</strong> (<code>Dict[str, Any]</code>, <em>optional</em>) — | |
| Additional parameters from which to initialize the configuration object.`,name:"kwargs"}],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L307"}}),jn=new Z({props:{name:"is_quantizable",anchor:"transformers.BitsAndBytesConfig.is_quantizable",parameters:[],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L478"}}),Bn=new Z({props:{name:"post_init",anchor:"transformers.BitsAndBytesConfig.post_init",parameters:[],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L441"}}),Xn=new Z({props:{name:"quantization_method",anchor:"transformers.BitsAndBytesConfig.quantization_method",parameters:[],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L484"}}),Vn=new Z({props:{name:"to_diff_dict",anchor:"transformers.BitsAndBytesConfig.to_diff_dict",parameters:[],source:"https://github.com/huggingface/transformers/blob/vr_33914/src/transformers/utils/quantization_config.py#L515",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Dictionary of all the attributes that make up this configuration instance,</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>Dict[str, Any]</code></p> | |
| `}}),Ln=new g({props:{title:"使用 🤗 optimum 进行量化",local:"使用--optimum-进行量化",headingTag:"h2"}}),An=new _r({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/zh/main_classes/quantization.md"}}),{c(){h=s("meta"),M=o(),$=s("p"),T=o(),m(Q.$$.fragment),C=o(),m(q.$$.fragment),al=o(),D=s("p"),D.innerHTML=qs,sl=o(),K=s("p"),K.textContent=Ws,il=o(),m(O.$$.fragment),rl=o(),ee=s("p"),ee.textContent=Zs,ml=o(),te=s("ul"),te.innerHTML=Us,pl=o(),ne=s("p"),ne.innerHTML=Qs,dl=o(),m(le.$$.fragment),fl=o(),oe=s("p"),oe.innerHTML=zs,cl=o(),m(ae.$$.fragment),ul=o(),se=s("p"),se.innerHTML=Gs,bl=o(),m(ie.$$.fragment),gl=o(),m(re.$$.fragment),hl=o(),me=s("p"),me.textContent=js,_l=o(),m(pe.$$.fragment),$l=o(),m(de.$$.fragment),yl=o(),fe=s("p"),fe.innerHTML=Bs,Ml=o(),m(ce.$$.fragment),Tl=o(),m(ue.$$.fragment),vl=o(),be=s("p"),be.innerHTML=Xs,wl=o(),ge=s("p"),ge.innerHTML=Vs,Cl=o(),he=s("p"),he.innerHTML=Ls,Jl=o(),G=s("div"),G.innerHTML=Hs,xl=o(),j=s("div"),j.innerHTML=As,kl=o(),B=s("div"),B.innerHTML=Rs,ql=o(),X=s("div"),X.innerHTML=Fs,Wl=o(),_e=s("p"),_e.innerHTML=Ns,Zl=o(),$e=s("p"),$e.textContent=Is,Ul=o(),m(ye.$$.fragment),Ql=o(),Me=s("p"),Me.innerHTML=Ps,zl=o(),m(Te.$$.fragment),Gl=o(),W=s("div"),m(ve.$$.fragment),is=o(),Nn=s("p"),Nn.innerHTML=Es,rs=o(),V=s("div"),m(we.$$.fragment),ms=o(),In=s("p"),In.textContent=Ys,jl=o(),m(Ce.$$.fragment),Bl=o(),Je=s("p"),Je.innerHTML=Ss,Xl=o(),xe=s("p"),xe.textContent=Ds,Vl=o(),ke=s("ul"),ke.innerHTML=Ks,Ll=o(),m(qe.$$.fragment),Hl=o(),We=s("p"),We.textContent=Os,Al=o(),Ze=s("ul"),Ze.innerHTML=ei,Rl=o(),Ue=s("p"),Ue.textContent=ti,Fl=o(),m(Qe.$$.fragment),Nl=o(),ze=s("p"),ze.innerHTML=ni,Il=o(),Ge=s("p"),Ge.textContent=li,Pl=o(),m(je.$$.fragment),El=o(),Be=s("p"),Be.innerHTML=oi,Yl=o(),m(Xe.$$.fragment),Sl=o(),Ve=s("p"),Ve.textContent=ai,Dl=o(),m(Le.$$.fragment),Kl=o(),m(He.$$.fragment),Ol=o(),Ae=s("p"),Ae.innerHTML=si,eo=o(),m(Re.$$.fragment),to=o(),Fe=s("p"),Fe.textContent=ii,no=o(),Ne=s("p"),Ne.innerHTML=ri,lo=o(),m(Ie.$$.fragment),oo=o(),Pe=s("p"),Pe.innerHTML=mi,ao=o(),m(L.$$.fragment),so=o(),m(Ee.$$.fragment),io=o(),Ye=s("p"),Ye.innerHTML=pi,ro=o(),m(Se.$$.fragment),mo=o(),De=s("p"),De.innerHTML=di,po=o(),m(Ke.$$.fragment),fo=o(),Oe=s("p"),Oe.innerHTML=fi,co=o(),m(et.$$.fragment),uo=o(),m(tt.$$.fragment),bo=o(),nt=s("p"),nt.innerHTML=ci,go=o(),m(lt.$$.fragment),ho=o(),ot=s("p"),ot.innerHTML=ui,_o=o(),m(at.$$.fragment),$o=o(),m(st.$$.fragment),yo=o(),it=s("p"),it.innerHTML=bi,Mo=o(),m(rt.$$.fragment),To=o(),mt=s("p"),mt.innerHTML=gi,vo=o(),m(pt.$$.fragment),wo=o(),dt=s("p"),dt.textContent=hi,Co=o(),ft=s("p"),ft.innerHTML=_i,Jo=o(),m(ct.$$.fragment),xo=o(),ut=s("p"),ut.innerHTML=$i,ko=o(),m(bt.$$.fragment),qo=o(),gt=s("p"),gt.innerHTML=yi,Wo=o(),m(ht.$$.fragment),Zo=o(),w=s("div"),m(_t.$$.fragment),ps=o(),Pn=s("p"),Pn.innerHTML=Mi,ds=o(),H=s("div"),m($t.$$.fragment),fs=o(),En=s("p"),En.textContent=Ti,cs=o(),A=s("div"),m(yt.$$.fragment),us=o(),Yn=s("p"),Yn.textContent=vi,bs=o(),R=s("div"),m(Mt.$$.fragment),gs=o(),Sn=s("p"),Sn.textContent=wi,Uo=o(),m(Tt.$$.fragment),Qo=o(),vt=s("p"),vt.innerHTML=Ci,zo=o(),wt=s("p"),wt.innerHTML=Ji,Go=o(),Ct=s("p"),Ct.textContent=xi,jo=o(),Jt=s("p"),Jt.innerHTML=ki,Bo=o(),xt=s("p"),xt.textContent=qi,Xo=o(),m(kt.$$.fragment),Vo=o(),qt=s("p"),qt.innerHTML=Wi,Lo=o(),m(Wt.$$.fragment),Ho=o(),Zt=s("p"),Zt.innerHTML=Zi,Ao=o(),m(Ut.$$.fragment),Ro=o(),m(Qt.$$.fragment),Fo=o(),m(zt.$$.fragment),No=o(),Gt=s("p"),Gt.textContent=Ui,Io=o(),jt=s("ul"),jt.innerHTML=Qi,Po=o(),m(Bt.$$.fragment),Eo=o(),Xt=s("ul"),Xt.innerHTML=zi,Yo=o(),m(Vt.$$.fragment),So=o(),Lt=s("p"),Lt.innerHTML=Gi,Do=o(),m(Ht.$$.fragment),Ko=o(),m(F.$$.fragment),Oo=o(),m(At.$$.fragment),ea=o(),Rt=s("p"),Rt.innerHTML=ji,ta=o(),m(Ft.$$.fragment),na=o(),Nt=s("p"),Nt.innerHTML=Bi,la=o(),It=s("p"),It.innerHTML=Xi,oa=o(),m(Pt.$$.fragment),aa=o(),Et=s("p"),Et.textContent=Vi,sa=o(),m(N.$$.fragment),ia=o(),m(Yt.$$.fragment),ra=o(),St=s("p"),St.textContent=Li,ma=o(),m(Dt.$$.fragment),pa=o(),Kt=s("p"),Kt.innerHTML=Hi,da=o(),m(Ot.$$.fragment),fa=o(),m(en.$$.fragment),ca=o(),tn=s("p"),tn.textContent=Ai,ua=o(),m(nn.$$.fragment),ba=o(),m(ln.$$.fragment),ga=o(),on=s("p"),on.textContent=Ri,ha=o(),m(an.$$.fragment),_a=o(),m(sn.$$.fragment),$a=o(),rn=s("p"),rn.innerHTML=Fi,ya=o(),m(mn.$$.fragment),Ma=o(),m(I.$$.fragment),Ta=o(),m(pn.$$.fragment),va=o(),dn=s("p"),dn.innerHTML=Ni,wa=o(),m(fn.$$.fragment),Ca=o(),cn=s("p"),cn.innerHTML=Ii,Ja=o(),m(un.$$.fragment),xa=o(),bn=s("p"),bn.textContent=Pi,ka=o(),m(gn.$$.fragment),qa=o(),hn=s("p"),hn.innerHTML=Ei,Wa=o(),_n=s("p"),_n.innerHTML=Yi,Za=o(),m($n.$$.fragment),Ua=o(),yn=s("p"),yn.innerHTML=Si,Qa=o(),m(Mn.$$.fragment),za=o(),Tn=s("p"),Tn.textContent=Di,Ga=o(),m(vn.$$.fragment),ja=o(),wn=s("p"),wn.textContent=Ki,Ba=o(),m(Cn.$$.fragment),Xa=o(),Jn=s("p"),Jn.innerHTML=Oi,Va=o(),m(xn.$$.fragment),La=o(),m(kn.$$.fragment),Ha=o(),qn=s("p"),qn.innerHTML=er,Aa=o(),m(Wn.$$.fragment),Ra=o(),m(Zn.$$.fragment),Fa=o(),Un=s("p"),Un.innerHTML=tr,Na=o(),Qn=s("p"),Qn.innerHTML=nr,Ia=o(),m(zn.$$.fragment),Pa=o(),y=s("div"),m(Gn.$$.fragment),hs=o(),Dn=s("p"),Dn.innerHTML=lr,_s=o(),Kn=s("p"),Kn.innerHTML=or,$s=o(),On=s("p"),On.innerHTML=ar,ys=o(),P=s("div"),m(jn.$$.fragment),Ms=o(),el=s("p"),el.innerHTML=sr,Ts=o(),E=s("div"),m(Bn.$$.fragment),vs=o(),tl=s("p"),tl.textContent=ir,ws=o(),Y=s("div"),m(Xn.$$.fragment),Cs=o(),nl=s("p"),nl.innerHTML=rr,Js=o(),S=s("div"),m(Vn.$$.fragment),xs=o(),ll=s("p"),ll.textContent=mr,Ea=o(),m(Ln.$$.fragment),Ya=o(),Hn=s("p"),Hn.innerHTML=pr,Sa=o(),m(An.$$.fragment),Da=o(),ol=s("p"),this.h()},l(e){const t=br("svelte-u9bgzb",document.head);h=i(t,"META",{name:!0,content:!0}),t.forEach(n),M=a(e),$=i(e,"P",{}),x($).forEach(n),T=a(e),p(Q.$$.fragment,e),C=a(e),p(q.$$.fragment,e),al=a(e),D=i(e,"P",{"data-svelte-h":!0}),r(D)!=="svelte-ecgix0"&&(D.innerHTML=qs),sl=a(e),K=i(e,"P",{"data-svelte-h":!0}),r(K)!=="svelte-8zaqwb"&&(K.textContent=Ws),il=a(e),p(O.$$.fragment,e),rl=a(e),ee=i(e,"P",{"data-svelte-h":!0}),r(ee)!=="svelte-thrt6i"&&(ee.textContent=Zs),ml=a(e),te=i(e,"UL",{"data-svelte-h":!0}),r(te)!=="svelte-ads6xy"&&(te.innerHTML=Us),pl=a(e),ne=i(e,"P",{"data-svelte-h":!0}),r(ne)!=="svelte-tuzvmh"&&(ne.innerHTML=Qs),dl=a(e),p(le.$$.fragment,e),fl=a(e),oe=i(e,"P",{"data-svelte-h":!0}),r(oe)!=="svelte-ntsut5"&&(oe.innerHTML=zs),cl=a(e),p(ae.$$.fragment,e),ul=a(e),se=i(e,"P",{"data-svelte-h":!0}),r(se)!=="svelte-kibpw7"&&(se.innerHTML=Gs),bl=a(e),p(ie.$$.fragment,e),gl=a(e),p(re.$$.fragment,e),hl=a(e),me=i(e,"P",{"data-svelte-h":!0}),r(me)!=="svelte-1raiibs"&&(me.textContent=js),_l=a(e),p(pe.$$.fragment,e),$l=a(e),p(de.$$.fragment,e),yl=a(e),fe=i(e,"P",{"data-svelte-h":!0}),r(fe)!=="svelte-zx2h9i"&&(fe.innerHTML=Bs),Ml=a(e),p(ce.$$.fragment,e),Tl=a(e),p(ue.$$.fragment,e),vl=a(e),be=i(e,"P",{"data-svelte-h":!0}),r(be)!=="svelte-1dghds0"&&(be.innerHTML=Xs),wl=a(e),ge=i(e,"P",{"data-svelte-h":!0}),r(ge)!=="svelte-ce4li1"&&(ge.innerHTML=Vs),Cl=a(e),he=i(e,"P",{"data-svelte-h":!0}),r(he)!=="svelte-9sh2bq"&&(he.innerHTML=Ls),Jl=a(e),G=i(e,"DIV",{style:!0,"data-svelte-h":!0}),r(G)!=="svelte-qy9lc8"&&(G.innerHTML=Hs),xl=a(e),j=i(e,"DIV",{style:!0,"data-svelte-h":!0}),r(j)!=="svelte-1oan9h4"&&(j.innerHTML=As),kl=a(e),B=i(e,"DIV",{style:!0,"data-svelte-h":!0}),r(B)!=="svelte-1p6kg0n"&&(B.innerHTML=Rs),ql=a(e),X=i(e,"DIV",{style:!0,"data-svelte-h":!0}),r(X)!=="svelte-ex9c15"&&(X.innerHTML=Fs),Wl=a(e),_e=i(e,"P",{"data-svelte-h":!0}),r(_e)!=="svelte-1ft7b39"&&(_e.innerHTML=Ns),Zl=a(e),$e=i(e,"P",{"data-svelte-h":!0}),r($e)!=="svelte-3lvbqz"&&($e.textContent=Is),Ul=a(e),p(ye.$$.fragment,e),Ql=a(e),Me=i(e,"P",{"data-svelte-h":!0}),r(Me)!=="svelte-1vbph39"&&(Me.innerHTML=Ps),zl=a(e),p(Te.$$.fragment,e),Gl=a(e),W=i(e,"DIV",{class:!0});var z=x(W);p(ve.$$.fragment,z),is=a(z),Nn=i(z,"P",{"data-svelte-h":!0}),r(Nn)!=="svelte-1i667it"&&(Nn.innerHTML=Es),rs=a(z),V=i(z,"DIV",{class:!0});var Rn=x(V);p(we.$$.fragment,Rn),ms=a(Rn),In=i(Rn,"P",{"data-svelte-h":!0}),r(In)!=="svelte-1ozftb6"&&(In.textContent=Ys),Rn.forEach(n),z.forEach(n),jl=a(e),p(Ce.$$.fragment,e),Bl=a(e),Je=i(e,"P",{"data-svelte-h":!0}),r(Je)!=="svelte-17s7r94"&&(Je.innerHTML=Ss),Xl=a(e),xe=i(e,"P",{"data-svelte-h":!0}),r(xe)!=="svelte-1ll5cvb"&&(xe.textContent=Ds),Vl=a(e),ke=i(e,"UL",{"data-svelte-h":!0}),r(ke)!=="svelte-1qfg3be"&&(ke.innerHTML=Ks),Ll=a(e),p(qe.$$.fragment,e),Hl=a(e),We=i(e,"P",{"data-svelte-h":!0}),r(We)!=="svelte-fdtanc"&&(We.textContent=Os),Al=a(e),Ze=i(e,"UL",{"data-svelte-h":!0}),r(Ze)!=="svelte-ncs17j"&&(Ze.innerHTML=ei),Rl=a(e),Ue=i(e,"P",{"data-svelte-h":!0}),r(Ue)!=="svelte-1b2r56w"&&(Ue.textContent=ti),Fl=a(e),p(Qe.$$.fragment,e),Nl=a(e),ze=i(e,"P",{"data-svelte-h":!0}),r(ze)!=="svelte-aj21cd"&&(ze.innerHTML=ni),Il=a(e),Ge=i(e,"P",{"data-svelte-h":!0}),r(Ge)!=="svelte-pcgkdc"&&(Ge.textContent=li),Pl=a(e),p(je.$$.fragment,e),El=a(e),Be=i(e,"P",{"data-svelte-h":!0}),r(Be)!=="svelte-1abq1um"&&(Be.innerHTML=oi),Yl=a(e),p(Xe.$$.fragment,e),Sl=a(e),Ve=i(e,"P",{"data-svelte-h":!0}),r(Ve)!=="svelte-py00dp"&&(Ve.textContent=ai),Dl=a(e),p(Le.$$.fragment,e),Kl=a(e),p(He.$$.fragment,e),Ol=a(e),Ae=i(e,"P",{"data-svelte-h":!0}),r(Ae)!=="svelte-um9zqp"&&(Ae.innerHTML=si),eo=a(e),p(Re.$$.fragment,e),to=a(e),Fe=i(e,"P",{"data-svelte-h":!0}),r(Fe)!=="svelte-18966sx"&&(Fe.textContent=ii),no=a(e),Ne=i(e,"P",{"data-svelte-h":!0}),r(Ne)!=="svelte-1bwqtp4"&&(Ne.innerHTML=ri),lo=a(e),p(Ie.$$.fragment,e),oo=a(e),Pe=i(e,"P",{"data-svelte-h":!0}),r(Pe)!=="svelte-1r6xrdg"&&(Pe.innerHTML=mi),ao=a(e),p(L.$$.fragment,e),so=a(e),p(Ee.$$.fragment,e),io=a(e),Ye=i(e,"P",{"data-svelte-h":!0}),r(Ye)!=="svelte-i7fyuc"&&(Ye.innerHTML=pi),ro=a(e),p(Se.$$.fragment,e),mo=a(e),De=i(e,"P",{"data-svelte-h":!0}),r(De)!=="svelte-1l0f6g9"&&(De.innerHTML=di),po=a(e),p(Ke.$$.fragment,e),fo=a(e),Oe=i(e,"P",{"data-svelte-h":!0}),r(Oe)!=="svelte-su51nm"&&(Oe.innerHTML=fi),co=a(e),p(et.$$.fragment,e),uo=a(e),p(tt.$$.fragment,e),bo=a(e),nt=i(e,"P",{"data-svelte-h":!0}),r(nt)!=="svelte-3q2e9r"&&(nt.innerHTML=ci),go=a(e),p(lt.$$.fragment,e),ho=a(e),ot=i(e,"P",{"data-svelte-h":!0}),r(ot)!=="svelte-2r05wl"&&(ot.innerHTML=ui),_o=a(e),p(at.$$.fragment,e),$o=a(e),p(st.$$.fragment,e),yo=a(e),it=i(e,"P",{"data-svelte-h":!0}),r(it)!=="svelte-15xscf"&&(it.innerHTML=bi),Mo=a(e),p(rt.$$.fragment,e),To=a(e),mt=i(e,"P",{"data-svelte-h":!0}),r(mt)!=="svelte-ghj53s"&&(mt.innerHTML=gi),vo=a(e),p(pt.$$.fragment,e),wo=a(e),dt=i(e,"P",{"data-svelte-h":!0}),r(dt)!=="svelte-nuv2x8"&&(dt.textContent=hi),Co=a(e),ft=i(e,"P",{"data-svelte-h":!0}),r(ft)!=="svelte-1m4itqb"&&(ft.innerHTML=_i),Jo=a(e),p(ct.$$.fragment,e),xo=a(e),ut=i(e,"P",{"data-svelte-h":!0}),r(ut)!=="svelte-mozsqh"&&(ut.innerHTML=$i),ko=a(e),p(bt.$$.fragment,e),qo=a(e),gt=i(e,"P",{"data-svelte-h":!0}),r(gt)!=="svelte-1uhcexr"&&(gt.innerHTML=yi),Wo=a(e),p(ht.$$.fragment,e),Zo=a(e),w=i(e,"DIV",{class:!0});var J=x(w);p(_t.$$.fragment,J),ps=a(J),Pn=i(J,"P",{"data-svelte-h":!0}),r(Pn)!=="svelte-i3efvr"&&(Pn.innerHTML=Mi),ds=a(J),H=i(J,"DIV",{class:!0});var Fn=x(H);p($t.$$.fragment,Fn),fs=a(Fn),En=i(Fn,"P",{"data-svelte-h":!0}),r(En)!=="svelte-4jdj2l"&&(En.textContent=Ti),Fn.forEach(n),cs=a(J),A=i(J,"DIV",{class:!0});var Oa=x(A);p(yt.$$.fragment,Oa),us=a(Oa),Yn=i(Oa,"P",{"data-svelte-h":!0}),r(Yn)!=="svelte-1ozftb6"&&(Yn.textContent=vi),Oa.forEach(n),bs=a(J),R=i(J,"DIV",{class:!0});var es=x(R);p(Mt.$$.fragment,es),gs=a(es),Sn=i(es,"P",{"data-svelte-h":!0}),r(Sn)!=="svelte-pjgtd6"&&(Sn.textContent=wi),es.forEach(n),J.forEach(n),Uo=a(e),p(Tt.$$.fragment,e),Qo=a(e),vt=i(e,"P",{"data-svelte-h":!0}),r(vt)!=="svelte-1hh96p7"&&(vt.innerHTML=Ci),zo=a(e),wt=i(e,"P",{"data-svelte-h":!0}),r(wt)!=="svelte-1j6dol1"&&(wt.innerHTML=Ji),Go=a(e),Ct=i(e,"P",{"data-svelte-h":!0}),r(Ct)!=="svelte-17iiuoe"&&(Ct.textContent=xi),jo=a(e),Jt=i(e,"P",{"data-svelte-h":!0}),r(Jt)!=="svelte-g1ckp3"&&(Jt.innerHTML=ki),Bo=a(e),xt=i(e,"P",{"data-svelte-h":!0}),r(xt)!=="svelte-10ye29r"&&(xt.textContent=qi),Xo=a(e),p(kt.$$.fragment,e),Vo=a(e),qt=i(e,"P",{"data-svelte-h":!0}),r(qt)!=="svelte-3sq910"&&(qt.innerHTML=Wi),Lo=a(e),p(Wt.$$.fragment,e),Ho=a(e),Zt=i(e,"P",{"data-svelte-h":!0}),r(Zt)!=="svelte-e5nhvd"&&(Zt.innerHTML=Zi),Ao=a(e),p(Ut.$$.fragment,e),Ro=a(e),p(Qt.$$.fragment,e),Fo=a(e),p(zt.$$.fragment,e),No=a(e),Gt=i(e,"P",{"data-svelte-h":!0}),r(Gt)!=="svelte-hqjahh"&&(Gt.textContent=Ui),Io=a(e),jt=i(e,"UL",{"data-svelte-h":!0}),r(jt)!=="svelte-1rz81mq"&&(jt.innerHTML=Qi),Po=a(e),p(Bt.$$.fragment,e),Eo=a(e),Xt=i(e,"UL",{"data-svelte-h":!0}),r(Xt)!=="svelte-10uc6le"&&(Xt.innerHTML=zi),Yo=a(e),p(Vt.$$.fragment,e),So=a(e),Lt=i(e,"P",{"data-svelte-h":!0}),r(Lt)!=="svelte-a38biy"&&(Lt.innerHTML=Gi),Do=a(e),p(Ht.$$.fragment,e),Ko=a(e),p(F.$$.fragment,e),Oo=a(e),p(At.$$.fragment,e),ea=a(e),Rt=i(e,"P",{"data-svelte-h":!0}),r(Rt)!=="svelte-1e0nw92"&&(Rt.innerHTML=ji),ta=a(e),p(Ft.$$.fragment,e),na=a(e),Nt=i(e,"P",{"data-svelte-h":!0}),r(Nt)!=="svelte-nja6yg"&&(Nt.innerHTML=Bi),la=a(e),It=i(e,"P",{"data-svelte-h":!0}),r(It)!=="svelte-1xv0occ"&&(It.innerHTML=Xi),oa=a(e),p(Pt.$$.fragment,e),aa=a(e),Et=i(e,"P",{"data-svelte-h":!0}),r(Et)!=="svelte-v3jp8e"&&(Et.textContent=Vi),sa=a(e),p(N.$$.fragment,e),ia=a(e),p(Yt.$$.fragment,e),ra=a(e),St=i(e,"P",{"data-svelte-h":!0}),r(St)!=="svelte-1hv6n9l"&&(St.textContent=Li),ma=a(e),p(Dt.$$.fragment,e),pa=a(e),Kt=i(e,"P",{"data-svelte-h":!0}),r(Kt)!=="svelte-yw7m57"&&(Kt.innerHTML=Hi),da=a(e),p(Ot.$$.fragment,e),fa=a(e),p(en.$$.fragment,e),ca=a(e),tn=i(e,"P",{"data-svelte-h":!0}),r(tn)!=="svelte-6rqah2"&&(tn.textContent=Ai),ua=a(e),p(nn.$$.fragment,e),ba=a(e),p(ln.$$.fragment,e),ga=a(e),on=i(e,"P",{"data-svelte-h":!0}),r(on)!=="svelte-1ksuiwo"&&(on.textContent=Ri),ha=a(e),p(an.$$.fragment,e),_a=a(e),p(sn.$$.fragment,e),$a=a(e),rn=i(e,"P",{"data-svelte-h":!0}),r(rn)!=="svelte-14iisu"&&(rn.innerHTML=Fi),ya=a(e),p(mn.$$.fragment,e),Ma=a(e),p(I.$$.fragment,e),Ta=a(e),p(pn.$$.fragment,e),va=a(e),dn=i(e,"P",{"data-svelte-h":!0}),r(dn)!=="svelte-1iuhck1"&&(dn.innerHTML=Ni),wa=a(e),p(fn.$$.fragment,e),Ca=a(e),cn=i(e,"P",{"data-svelte-h":!0}),r(cn)!=="svelte-jfdebg"&&(cn.innerHTML=Ii),Ja=a(e),p(un.$$.fragment,e),xa=a(e),bn=i(e,"P",{"data-svelte-h":!0}),r(bn)!=="svelte-1am730"&&(bn.textContent=Pi),ka=a(e),p(gn.$$.fragment,e),qa=a(e),hn=i(e,"P",{"data-svelte-h":!0}),r(hn)!=="svelte-3pwpc4"&&(hn.innerHTML=Ei),Wa=a(e),_n=i(e,"P",{"data-svelte-h":!0}),r(_n)!=="svelte-bdycnv"&&(_n.innerHTML=Yi),Za=a(e),p($n.$$.fragment,e),Ua=a(e),yn=i(e,"P",{"data-svelte-h":!0}),r(yn)!=="svelte-1ekqeba"&&(yn.innerHTML=Si),Qa=a(e),p(Mn.$$.fragment,e),za=a(e),Tn=i(e,"P",{"data-svelte-h":!0}),r(Tn)!=="svelte-1ka50bq"&&(Tn.textContent=Di),Ga=a(e),p(vn.$$.fragment,e),ja=a(e),wn=i(e,"P",{"data-svelte-h":!0}),r(wn)!=="svelte-missd6"&&(wn.textContent=Ki),Ba=a(e),p(Cn.$$.fragment,e),Xa=a(e),Jn=i(e,"P",{"data-svelte-h":!0}),r(Jn)!=="svelte-1h9a4um"&&(Jn.innerHTML=Oi),Va=a(e),p(xn.$$.fragment,e),La=a(e),p(kn.$$.fragment,e),Ha=a(e),qn=i(e,"P",{"data-svelte-h":!0}),r(qn)!=="svelte-1s6xsxh"&&(qn.innerHTML=er),Aa=a(e),p(Wn.$$.fragment,e),Ra=a(e),p(Zn.$$.fragment,e),Fa=a(e),Un=i(e,"P",{"data-svelte-h":!0}),r(Un)!=="svelte-1rkitfn"&&(Un.innerHTML=tr),Na=a(e),Qn=i(e,"P",{"data-svelte-h":!0}),r(Qn)!=="svelte-28didg"&&(Qn.innerHTML=nr),Ia=a(e),p(zn.$$.fragment,e),Pa=a(e),y=i(e,"DIV",{class:!0});var v=x(y);p(Gn.$$.fragment,v),hs=a(v),Dn=i(v,"P",{"data-svelte-h":!0}),r(Dn)!=="svelte-woamwr"&&(Dn.innerHTML=lr),_s=a(v),Kn=i(v,"P",{"data-svelte-h":!0}),r(Kn)!=="svelte-ki5gis"&&(Kn.innerHTML=or),$s=a(v),On=i(v,"P",{"data-svelte-h":!0}),r(On)!=="svelte-8qsk2q"&&(On.innerHTML=ar),ys=a(v),P=i(v,"DIV",{class:!0});var ts=x(P);p(jn.$$.fragment,ts),Ms=a(ts),el=i(ts,"P",{"data-svelte-h":!0}),r(el)!=="svelte-10tvzyv"&&(el.innerHTML=sr),ts.forEach(n),Ts=a(v),E=i(v,"DIV",{class:!0});var ns=x(E);p(Bn.$$.fragment,ns),vs=a(ns),tl=i(ns,"P",{"data-svelte-h":!0}),r(tl)!=="svelte-gy26u4"&&(tl.textContent=ir),ns.forEach(n),ws=a(v),Y=i(v,"DIV",{class:!0});var ls=x(Y);p(Xn.$$.fragment,ls),Cs=a(ls),nl=i(ls,"P",{"data-svelte-h":!0}),r(nl)!=="svelte-19bn0da"&&(nl.innerHTML=rr),ls.forEach(n),Js=a(v),S=i(v,"DIV",{class:!0});var os=x(S);p(Vn.$$.fragment,os),xs=a(os),ll=i(os,"P",{"data-svelte-h":!0}),r(ll)!=="svelte-1p6bdas"&&(ll.textContent=mr),os.forEach(n),v.forEach(n),Ea=a(e),p(Ln.$$.fragment,e),Ya=a(e),Hn=i(e,"P",{"data-svelte-h":!0}),r(Hn)!=="svelte-1hh7kt7"&&(Hn.innerHTML=pr),Sa=a(e),p(An.$$.fragment,e),Da=a(e),ol=i(e,"P",{}),x(ol).forEach(n),this.h()},h(){k(h,"name","hf:doc:metadata"),k(h,"content",wr),as(G,"text-align","center"),as(j,"text-align","center"),as(B,"text-align","center"),as(X,"text-align","center"),k(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),k(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){b(document.head,h),l(e,M,t),l(e,$,t),l(e,T,t),d(Q,e,t),l(e,C,t),d(q,e,t),l(e,al,t),l(e,D,t),l(e,sl,t),l(e,K,t),l(e,il,t),d(O,e,t),l(e,rl,t),l(e,ee,t),l(e,ml,t),l(e,te,t),l(e,pl,t),l(e,ne,t),l(e,dl,t),d(le,e,t),l(e,fl,t),l(e,oe,t),l(e,cl,t),d(ae,e,t),l(e,ul,t),l(e,se,t),l(e,bl,t),d(ie,e,t),l(e,gl,t),d(re,e,t),l(e,hl,t),l(e,me,t),l(e,_l,t),d(pe,e,t),l(e,$l,t),d(de,e,t),l(e,yl,t),l(e,fe,t),l(e,Ml,t),d(ce,e,t),l(e,Tl,t),d(ue,e,t),l(e,vl,t),l(e,be,t),l(e,wl,t),l(e,ge,t),l(e,Cl,t),l(e,he,t),l(e,Jl,t),l(e,G,t),l(e,xl,t),l(e,j,t),l(e,kl,t),l(e,B,t),l(e,ql,t),l(e,X,t),l(e,Wl,t),l(e,_e,t),l(e,Zl,t),l(e,$e,t),l(e,Ul,t),d(ye,e,t),l(e,Ql,t),l(e,Me,t),l(e,zl,t),d(Te,e,t),l(e,Gl,t),l(e,W,t),d(ve,W,null),b(W,is),b(W,Nn),b(W,rs),b(W,V),d(we,V,null),b(V,ms),b(V,In),l(e,jl,t),d(Ce,e,t),l(e,Bl,t),l(e,Je,t),l(e,Xl,t),l(e,xe,t),l(e,Vl,t),l(e,ke,t),l(e,Ll,t),d(qe,e,t),l(e,Hl,t),l(e,We,t),l(e,Al,t),l(e,Ze,t),l(e,Rl,t),l(e,Ue,t),l(e,Fl,t),d(Qe,e,t),l(e,Nl,t),l(e,ze,t),l(e,Il,t),l(e,Ge,t),l(e,Pl,t),d(je,e,t),l(e,El,t),l(e,Be,t),l(e,Yl,t),d(Xe,e,t),l(e,Sl,t),l(e,Ve,t),l(e,Dl,t),d(Le,e,t),l(e,Kl,t),d(He,e,t),l(e,Ol,t),l(e,Ae,t),l(e,eo,t),d(Re,e,t),l(e,to,t),l(e,Fe,t),l(e,no,t),l(e,Ne,t),l(e,lo,t),d(Ie,e,t),l(e,oo,t),l(e,Pe,t),l(e,ao,t),d(L,e,t),l(e,so,t),d(Ee,e,t),l(e,io,t),l(e,Ye,t),l(e,ro,t),d(Se,e,t),l(e,mo,t),l(e,De,t),l(e,po,t),d(Ke,e,t),l(e,fo,t),l(e,Oe,t),l(e,co,t),d(et,e,t),l(e,uo,t),d(tt,e,t),l(e,bo,t),l(e,nt,t),l(e,go,t),d(lt,e,t),l(e,ho,t),l(e,ot,t),l(e,_o,t),d(at,e,t),l(e,$o,t),d(st,e,t),l(e,yo,t),l(e,it,t),l(e,Mo,t),d(rt,e,t),l(e,To,t),l(e,mt,t),l(e,vo,t),d(pt,e,t),l(e,wo,t),l(e,dt,t),l(e,Co,t),l(e,ft,t),l(e,Jo,t),d(ct,e,t),l(e,xo,t),l(e,ut,t),l(e,ko,t),d(bt,e,t),l(e,qo,t),l(e,gt,t),l(e,Wo,t),d(ht,e,t),l(e,Zo,t),l(e,w,t),d(_t,w,null),b(w,ps),b(w,Pn),b(w,ds),b(w,H),d($t,H,null),b(H,fs),b(H,En),b(w,cs),b(w,A),d(yt,A,null),b(A,us),b(A,Yn),b(w,bs),b(w,R),d(Mt,R,null),b(R,gs),b(R,Sn),l(e,Uo,t),d(Tt,e,t),l(e,Qo,t),l(e,vt,t),l(e,zo,t),l(e,wt,t),l(e,Go,t),l(e,Ct,t),l(e,jo,t),l(e,Jt,t),l(e,Bo,t),l(e,xt,t),l(e,Xo,t),d(kt,e,t),l(e,Vo,t),l(e,qt,t),l(e,Lo,t),d(Wt,e,t),l(e,Ho,t),l(e,Zt,t),l(e,Ao,t),d(Ut,e,t),l(e,Ro,t),d(Qt,e,t),l(e,Fo,t),d(zt,e,t),l(e,No,t),l(e,Gt,t),l(e,Io,t),l(e,jt,t),l(e,Po,t),d(Bt,e,t),l(e,Eo,t),l(e,Xt,t),l(e,Yo,t),d(Vt,e,t),l(e,So,t),l(e,Lt,t),l(e,Do,t),d(Ht,e,t),l(e,Ko,t),d(F,e,t),l(e,Oo,t),d(At,e,t),l(e,ea,t),l(e,Rt,t),l(e,ta,t),d(Ft,e,t),l(e,na,t),l(e,Nt,t),l(e,la,t),l(e,It,t),l(e,oa,t),d(Pt,e,t),l(e,aa,t),l(e,Et,t),l(e,sa,t),d(N,e,t),l(e,ia,t),d(Yt,e,t),l(e,ra,t),l(e,St,t),l(e,ma,t),d(Dt,e,t),l(e,pa,t),l(e,Kt,t),l(e,da,t),d(Ot,e,t),l(e,fa,t),d(en,e,t),l(e,ca,t),l(e,tn,t),l(e,ua,t),d(nn,e,t),l(e,ba,t),d(ln,e,t),l(e,ga,t),l(e,on,t),l(e,ha,t),d(an,e,t),l(e,_a,t),d(sn,e,t),l(e,$a,t),l(e,rn,t),l(e,ya,t),d(mn,e,t),l(e,Ma,t),d(I,e,t),l(e,Ta,t),d(pn,e,t),l(e,va,t),l(e,dn,t),l(e,wa,t),d(fn,e,t),l(e,Ca,t),l(e,cn,t),l(e,Ja,t),d(un,e,t),l(e,xa,t),l(e,bn,t),l(e,ka,t),d(gn,e,t),l(e,qa,t),l(e,hn,t),l(e,Wa,t),l(e,_n,t),l(e,Za,t),d($n,e,t),l(e,Ua,t),l(e,yn,t),l(e,Qa,t),d(Mn,e,t),l(e,za,t),l(e,Tn,t),l(e,Ga,t),d(vn,e,t),l(e,ja,t),l(e,wn,t),l(e,Ba,t),d(Cn,e,t),l(e,Xa,t),l(e,Jn,t),l(e,Va,t),d(xn,e,t),l(e,La,t),d(kn,e,t),l(e,Ha,t),l(e,qn,t),l(e,Aa,t),d(Wn,e,t),l(e,Ra,t),d(Zn,e,t),l(e,Fa,t),l(e,Un,t),l(e,Na,t),l(e,Qn,t),l(e,Ia,t),d(zn,e,t),l(e,Pa,t),l(e,y,t),d(Gn,y,null),b(y,hs),b(y,Dn),b(y,_s),b(y,Kn),b(y,$s),b(y,On),b(y,ys),b(y,P),d(jn,P,null),b(P,Ms),b(P,el),b(y,Ts),b(y,E),d(Bn,E,null),b(E,vs),b(E,tl),b(y,ws),b(y,Y),d(Xn,Y,null),b(Y,Cs),b(Y,nl),b(y,Js),b(y,S),d(Vn,S,null),b(S,xs),b(S,ll),l(e,Ea,t),d(Ln,e,t),l(e,Ya,t),l(e,Hn,t),l(e,Sa,t),d(An,e,t),l(e,Da,t),l(e,ol,t),Ka=!0},p(e,[t]){const z={};t&2&&(z.$$scope={dirty:t,ctx:e}),L.$set(z);const Rn={};t&2&&(Rn.$$scope={dirty:t,ctx:e}),F.$set(Rn);const J={};t&2&&(J.$$scope={dirty:t,ctx:e}),N.$set(J);const Fn={};t&2&&(Fn.$$scope={dirty:t,ctx:e}),I.$set(Fn)},i(e){Ka||(f(Q.$$.fragment,e),f(q.$$.fragment,e),f(O.$$.fragment,e),f(le.$$.fragment,e),f(ae.$$.fragment,e),f(ie.$$.fragment,e),f(re.$$.fragment,e),f(pe.$$.fragment,e),f(de.$$.fragment,e),f(ce.$$.fragment,e),f(ue.$$.fragment,e),f(ye.$$.fragment,e),f(Te.$$.fragment,e),f(ve.$$.fragment,e),f(we.$$.fragment,e),f(Ce.$$.fragment,e),f(qe.$$.fragment,e),f(Qe.$$.fragment,e),f(je.$$.fragment,e),f(Xe.$$.fragment,e),f(Le.$$.fragment,e),f(He.$$.fragment,e),f(Re.$$.fragment,e),f(Ie.$$.fragment,e),f(L.$$.fragment,e),f(Ee.$$.fragment,e),f(Se.$$.fragment,e),f(Ke.$$.fragment,e),f(et.$$.fragment,e),f(tt.$$.fragment,e),f(lt.$$.fragment,e),f(at.$$.fragment,e),f(st.$$.fragment,e),f(rt.$$.fragment,e),f(pt.$$.fragment,e),f(ct.$$.fragment,e),f(bt.$$.fragment,e),f(ht.$$.fragment,e),f(_t.$$.fragment,e),f($t.$$.fragment,e),f(yt.$$.fragment,e),f(Mt.$$.fragment,e),f(Tt.$$.fragment,e),f(kt.$$.fragment,e),f(Wt.$$.fragment,e),f(Ut.$$.fragment,e),f(Qt.$$.fragment,e),f(zt.$$.fragment,e),f(Bt.$$.fragment,e),f(Vt.$$.fragment,e),f(Ht.$$.fragment,e),f(F.$$.fragment,e),f(At.$$.fragment,e),f(Ft.$$.fragment,e),f(Pt.$$.fragment,e),f(N.$$.fragment,e),f(Yt.$$.fragment,e),f(Dt.$$.fragment,e),f(Ot.$$.fragment,e),f(en.$$.fragment,e),f(nn.$$.fragment,e),f(ln.$$.fragment,e),f(an.$$.fragment,e),f(sn.$$.fragment,e),f(mn.$$.fragment,e),f(I.$$.fragment,e),f(pn.$$.fragment,e),f(fn.$$.fragment,e),f(un.$$.fragment,e),f(gn.$$.fragment,e),f($n.$$.fragment,e),f(Mn.$$.fragment,e),f(vn.$$.fragment,e),f(Cn.$$.fragment,e),f(xn.$$.fragment,e),f(kn.$$.fragment,e),f(Wn.$$.fragment,e),f(Zn.$$.fragment,e),f(zn.$$.fragment,e),f(Gn.$$.fragment,e),f(jn.$$.fragment,e),f(Bn.$$.fragment,e),f(Xn.$$.fragment,e),f(Vn.$$.fragment,e),f(Ln.$$.fragment,e),f(An.$$.fragment,e),Ka=!0)},o(e){c(Q.$$.fragment,e),c(q.$$.fragment,e),c(O.$$.fragment,e),c(le.$$.fragment,e),c(ae.$$.fragment,e),c(ie.$$.fragment,e),c(re.$$.fragment,e),c(pe.$$.fragment,e),c(de.$$.fragment,e),c(ce.$$.fragment,e),c(ue.$$.fragment,e),c(ye.$$.fragment,e),c(Te.$$.fragment,e),c(ve.$$.fragment,e),c(we.$$.fragment,e),c(Ce.$$.fragment,e),c(qe.$$.fragment,e),c(Qe.$$.fragment,e),c(je.$$.fragment,e),c(Xe.$$.fragment,e),c(Le.$$.fragment,e),c(He.$$.fragment,e),c(Re.$$.fragment,e),c(Ie.$$.fragment,e),c(L.$$.fragment,e),c(Ee.$$.fragment,e),c(Se.$$.fragment,e),c(Ke.$$.fragment,e),c(et.$$.fragment,e),c(tt.$$.fragment,e),c(lt.$$.fragment,e),c(at.$$.fragment,e),c(st.$$.fragment,e),c(rt.$$.fragment,e),c(pt.$$.fragment,e),c(ct.$$.fragment,e),c(bt.$$.fragment,e),c(ht.$$.fragment,e),c(_t.$$.fragment,e),c($t.$$.fragment,e),c(yt.$$.fragment,e),c(Mt.$$.fragment,e),c(Tt.$$.fragment,e),c(kt.$$.fragment,e),c(Wt.$$.fragment,e),c(Ut.$$.fragment,e),c(Qt.$$.fragment,e),c(zt.$$.fragment,e),c(Bt.$$.fragment,e),c(Vt.$$.fragment,e),c(Ht.$$.fragment,e),c(F.$$.fragment,e),c(At.$$.fragment,e),c(Ft.$$.fragment,e),c(Pt.$$.fragment,e),c(N.$$.fragment,e),c(Yt.$$.fragment,e),c(Dt.$$.fragment,e),c(Ot.$$.fragment,e),c(en.$$.fragment,e),c(nn.$$.fragment,e),c(ln.$$.fragment,e),c(an.$$.fragment,e),c(sn.$$.fragment,e),c(mn.$$.fragment,e),c(I.$$.fragment,e),c(pn.$$.fragment,e),c(fn.$$.fragment,e),c(un.$$.fragment,e),c(gn.$$.fragment,e),c($n.$$.fragment,e),c(Mn.$$.fragment,e),c(vn.$$.fragment,e),c(Cn.$$.fragment,e),c(xn.$$.fragment,e),c(kn.$$.fragment,e),c(Wn.$$.fragment,e),c(Zn.$$.fragment,e),c(zn.$$.fragment,e),c(Gn.$$.fragment,e),c(jn.$$.fragment,e),c(Bn.$$.fragment,e),c(Xn.$$.fragment,e),c(Vn.$$.fragment,e),c(Ln.$$.fragment,e),c(An.$$.fragment,e),Ka=!1},d(e){e&&(n(M),n($),n(T),n(C),n(al),n(D),n(sl),n(K),n(il),n(rl),n(ee),n(ml),n(te),n(pl),n(ne),n(dl),n(fl),n(oe),n(cl),n(ul),n(se),n(bl),n(gl),n(hl),n(me),n(_l),n($l),n(yl),n(fe),n(Ml),n(Tl),n(vl),n(be),n(wl),n(ge),n(Cl),n(he),n(Jl),n(G),n(xl),n(j),n(kl),n(B),n(ql),n(X),n(Wl),n(_e),n(Zl),n($e),n(Ul),n(Ql),n(Me),n(zl),n(Gl),n(W),n(jl),n(Bl),n(Je),n(Xl),n(xe),n(Vl),n(ke),n(Ll),n(Hl),n(We),n(Al),n(Ze),n(Rl),n(Ue),n(Fl),n(Nl),n(ze),n(Il),n(Ge),n(Pl),n(El),n(Be),n(Yl),n(Sl),n(Ve),n(Dl),n(Kl),n(Ol),n(Ae),n(eo),n(to),n(Fe),n(no),n(Ne),n(lo),n(oo),n(Pe),n(ao),n(so),n(io),n(Ye),n(ro),n(mo),n(De),n(po),n(fo),n(Oe),n(co),n(uo),n(bo),n(nt),n(go),n(ho),n(ot),n(_o),n($o),n(yo),n(it),n(Mo),n(To),n(mt),n(vo),n(wo),n(dt),n(Co),n(ft),n(Jo),n(xo),n(ut),n(ko),n(qo),n(gt),n(Wo),n(Zo),n(w),n(Uo),n(Qo),n(vt),n(zo),n(wt),n(Go),n(Ct),n(jo),n(Jt),n(Bo),n(xt),n(Xo),n(Vo),n(qt),n(Lo),n(Ho),n(Zt),n(Ao),n(Ro),n(Fo),n(No),n(Gt),n(Io),n(jt),n(Po),n(Eo),n(Xt),n(Yo),n(So),n(Lt),n(Do),n(Ko),n(Oo),n(ea),n(Rt),n(ta),n(na),n(Nt),n(la),n(It),n(oa),n(aa),n(Et),n(sa),n(ia),n(ra),n(St),n(ma),n(pa),n(Kt),n(da),n(fa),n(ca),n(tn),n(ua),n(ba),n(ga),n(on),n(ha),n(_a),n($a),n(rn),n(ya),n(Ma),n(Ta),n(va),n(dn),n(wa),n(Ca),n(cn),n(Ja),n(xa),n(bn),n(ka),n(qa),n(hn),n(Wa),n(_n),n(Za),n(Ua),n(yn),n(Qa),n(za),n(Tn),n(Ga),n(ja),n(wn),n(Ba),n(Xa),n(Jn),n(Va),n(La),n(Ha),n(qn),n(Aa),n(Ra),n(Fa),n(Un),n(Na),n(Qn),n(Ia),n(Pa),n(y),n(Ea),n(Ya),n(Hn),n(Sa),n(Da),n(ol)),n(h),u(Q,e),u(q,e),u(O,e),u(le,e),u(ae,e),u(ie,e),u(re,e),u(pe,e),u(de,e),u(ce,e),u(ue,e),u(ye,e),u(Te,e),u(ve),u(we),u(Ce,e),u(qe,e),u(Qe,e),u(je,e),u(Xe,e),u(Le,e),u(He,e),u(Re,e),u(Ie,e),u(L,e),u(Ee,e),u(Se,e),u(Ke,e),u(et,e),u(tt,e),u(lt,e),u(at,e),u(st,e),u(rt,e),u(pt,e),u(ct,e),u(bt,e),u(ht,e),u(_t),u($t),u(yt),u(Mt),u(Tt,e),u(kt,e),u(Wt,e),u(Ut,e),u(Qt,e),u(zt,e),u(Bt,e),u(Vt,e),u(Ht,e),u(F,e),u(At,e),u(Ft,e),u(Pt,e),u(N,e),u(Yt,e),u(Dt,e),u(Ot,e),u(en,e),u(nn,e),u(ln,e),u(an,e),u(sn,e),u(mn,e),u(I,e),u(pn,e),u(fn,e),u(un,e),u(gn,e),u($n,e),u(Mn,e),u(vn,e),u(Cn,e),u(xn,e),u(kn,e),u(Wn,e),u(Zn,e),u(zn,e),u(Gn),u(jn),u(Bn),u(Xn),u(Vn),u(Ln,e),u(An,e)}}}const wr='{"title":"量化 🤗 Transformers 模型","local":"量化--transformers-模型","sections":[{"title":"AWQ集成","local":"awq集成","sections":[{"title":"量化一个模型","local":"量化一个模型","sections":[],"depth":3},{"title":"加载一个量化的模型","local":"加载一个量化的模型","sections":[],"depth":3}],"depth":2},{"title":"示例使用","local":"示例使用","sections":[{"title":"结合 AWQ 和 Flash Attention","local":"结合-awq-和-flash-attention","sections":[],"depth":3},{"title":"基准测试","local":"基准测试","sections":[],"depth":3},{"title":"Google colab 演示","local":"google-colab-演示","sections":[],"depth":3},{"title":"AwqConfig","local":"transformers.AwqConfig","sections":[],"depth":3}],"depth":2},{"title":"AutoGPTQ 集成","local":"autogptq-集成","sections":[{"title":"要求","local":"要求","sections":[],"depth":3},{"title":"加载和量化模型","local":"加载和量化模型","sections":[{"title":"GPTQ 配置","local":"gptq-配置","sections":[],"depth":4},{"title":"量化","local":"量化","sections":[],"depth":4}],"depth":3},{"title":"推送量化模型到 🤗 Hub","local":"推送量化模型到--hub","sections":[],"depth":3},{"title":"从 🤗 Hub 加载一个量化模型","local":"从--hub-加载一个量化模型","sections":[],"depth":3},{"title":"Exllama内核加快推理速度","local":"exllama内核加快推理速度","sections":[{"title":"微调一个量化模型","local":"微调一个量化模型","sections":[],"depth":4}],"depth":3},{"title":"示例演示","local":"示例演示","sections":[],"depth":3},{"title":"GPTQConfig","local":"transformers.GPTQConfig","sections":[],"depth":3}],"depth":2},{"title":"bitsandbytes 集成","local":"bitsandbytes-集成","sections":[{"title":"通用用法","local":"通用用法","sections":[],"depth":3},{"title":"FP4 量化","local":"fp4-量化","sections":[{"title":"要求","local":"要求","sections":[],"depth":4},{"title":"提示和最佳实践","local":"提示和最佳实践","sections":[],"depth":4},{"title":"加载 4 位量化的大模型","local":"加载-4-位量化的大模型","sections":[],"depth":4}],"depth":3},{"title":"加载 8 位量化的大模型","local":"加载-8-位量化的大模型","sections":[{"title":"高级用例","local":"高级用例","sections":[{"title":"更改计算数据类型","local":"更改计算数据类型","sections":[],"depth":5}],"depth":4},{"title":"使用 NF4(普通浮点数 4)数据类型","local":"使用-nf4普通浮点数-4数据类型","sections":[],"depth":4},{"title":"使用嵌套量化进行更高效的内存推理","local":"使用嵌套量化进行更高效的内存推理","sections":[],"depth":4}],"depth":3},{"title":"将量化模型推送到🤗 Hub","local":"将量化模型推送到-hub","sections":[],"depth":3},{"title":"从🤗 Hub加载量化模型","local":"从-hub加载量化模型","sections":[],"depth":3},{"title":"高级用例","local":"高级用例","sections":[{"title":"在 cpu 和 gpu 之间卸载","local":"在-cpu-和-gpu-之间卸载","sections":[],"depth":4},{"title":"使用 llm_int8_threshold","local":"使用-llmint8threshold","sections":[],"depth":4},{"title":"跳过某些模块的转换","local":"跳过某些模块的转换","sections":[],"depth":4},{"title":"微调已加载为8位精度的模型","local":"微调已加载为8位精度的模型","sections":[],"depth":4}],"depth":3},{"title":"BitsAndBytesConfig","local":"transformers.BitsAndBytesConfig","sections":[],"depth":3}],"depth":2},{"title":"使用 🤗 optimum 进行量化","local":"使用--optimum-进行量化","sections":[],"depth":2}],"depth":1}';function Cr(U){return fr(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ur extends cr{constructor(h){super(),ur(this,h,Cr,vr,dr,{})}}export{Ur as component}; | |
Xet Storage Details
- Size:
- 101 kB
- Xet hash:
- 14439420e6b86208b31c3aead5cf134553a6fc099bea2aecfe8201cb7240e019
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.