Buckets:

rtrm's picture
download
raw
195 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Quantize ๐Ÿค— Transformers models&quot;,&quot;local&quot;:&quot;quantize--transformers-models&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;AutoGPTQ Integration&quot;,&quot;local&quot;:&quot;autogptq-integration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Requirements&quot;,&quot;local&quot;:&quot;requirements&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Load and quantize a model&quot;,&quot;local&quot;:&quot;load-and-quantize-a-model&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;GPTQ Configuration&quot;,&quot;local&quot;:&quot;gptq-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Quantization&quot;,&quot;local&quot;:&quot;quantization&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Push quantized model to ๐Ÿค— Hub&quot;,&quot;local&quot;:&quot;push-quantized-model-to--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Load a quantized model from the ๐Ÿค— Hub&quot;,&quot;local&quot;:&quot;load-a-quantized-model-from-the--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Exllama kernels for faster inference&quot;,&quot;local&quot;:&quot;exllama-kernels-for-faster-inference&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Fine-tune a quantized model&quot;,&quot;local&quot;:&quot;fine-tune-a-quantized-model&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Example demo&quot;,&quot;local&quot;:&quot;example-demo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;GPTQConfig&quot;,&quot;local&quot;:&quot;transformers.GPTQConfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;bitsandbytes Integration&quot;,&quot;local&quot;:&quot;bitsandbytes-integration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;General usage&quot;,&quot;local&quot;:&quot;general-usage&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;FP4 quantization&quot;,&quot;local&quot;:&quot;fp4-quantization&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Requirements&quot;,&quot;local&quot;:&quot;requirements&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Tips and best practices&quot;,&quot;local&quot;:&quot;tips-and-best-practices&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Load a large model in 4bit&quot;,&quot;local&quot;:&quot;load-a-large-model-in-4bit&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Load a large model in 8bit&quot;,&quot;local&quot;:&quot;load-a-large-model-in-8bit&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Advanced use cases&quot;,&quot;local&quot;:&quot;advanced-use-cases&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Change the compute dtype&quot;,&quot;local&quot;:&quot;change-the-compute-dtype&quot;,&quot;sections&quot;:[],&quot;depth&quot;:5},{&quot;title&quot;:&quot;Using NF4 (Normal Float 4) data type&quot;,&quot;local&quot;:&quot;using-nf4-normal-float-4-data-type&quot;,&quot;sections&quot;:[],&quot;depth&quot;:5},{&quot;title&quot;:&quot;Use nested quantization for more memory efficient inference&quot;,&quot;local&quot;:&quot;use-nested-quantization-for-more-memory-efficient-inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:5}],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Push quantized models on the ๐Ÿค— Hub&quot;,&quot;local&quot;:&quot;push-quantized-models-on-the--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Load a quantized model from the ๐Ÿค— Hub&quot;,&quot;local&quot;:&quot;load-a-quantized-model-from-the--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Advanced use cases&quot;,&quot;local&quot;:&quot;advanced-use-cases&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Offload between cpu and gpu&quot;,&quot;local&quot;:&quot;offload-between-cpu-and-gpu&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Play with llm_int8_threshold&quot;,&quot;local&quot;:&quot;play-with-llmint8threshold&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Skip the conversion of some modules&quot;,&quot;local&quot;:&quot;skip-the-conversion-of-some-modules&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Fine-tune a model that has been loaded in 8-bit&quot;,&quot;local&quot;:&quot;fine-tune-a-model-that-has-been-loaded-in-8-bit&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;BitsAndBytesConfig&quot;,&quot;local&quot;:&quot;transformers.BitsAndBytesConfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Quantization with ๐Ÿค— optimum&quot;,&quot;local&quot;:&quot;quantization-with--optimum&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/start.1486e459.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/scheduler.9bc65507.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/singletons.eee55cbf.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/index.3b203c72.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/paths.59da1547.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/app.d9ae818f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/index.707bf1b6.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/0.c06aa070.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/45.d94b4bac.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/Tip.c2ecdbf4.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/Docstring.86474e80.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/globals.7f7f1b26.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/EditOnGithub.922df6ba.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/CodeBlock.54a9f38d.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Quantize ๐Ÿค— Transformers models&quot;,&quot;local&quot;:&quot;quantize--transformers-models&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;AutoGPTQ Integration&quot;,&quot;local&quot;:&quot;autogptq-integration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Requirements&quot;,&quot;local&quot;:&quot;requirements&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Load and quantize a model&quot;,&quot;local&quot;:&quot;load-and-quantize-a-model&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;GPTQ Configuration&quot;,&quot;local&quot;:&quot;gptq-configuration&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Quantization&quot;,&quot;local&quot;:&quot;quantization&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Push quantized model to ๐Ÿค— Hub&quot;,&quot;local&quot;:&quot;push-quantized-model-to--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Load a quantized model from the ๐Ÿค— Hub&quot;,&quot;local&quot;:&quot;load-a-quantized-model-from-the--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Exllama kernels for faster inference&quot;,&quot;local&quot;:&quot;exllama-kernels-for-faster-inference&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Fine-tune a quantized model&quot;,&quot;local&quot;:&quot;fine-tune-a-quantized-model&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Example demo&quot;,&quot;local&quot;:&quot;example-demo&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;GPTQConfig&quot;,&quot;local&quot;:&quot;transformers.GPTQConfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;bitsandbytes Integration&quot;,&quot;local&quot;:&quot;bitsandbytes-integration&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;General usage&quot;,&quot;local&quot;:&quot;general-usage&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;FP4 quantization&quot;,&quot;local&quot;:&quot;fp4-quantization&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Requirements&quot;,&quot;local&quot;:&quot;requirements&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Tips and best practices&quot;,&quot;local&quot;:&quot;tips-and-best-practices&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Load a large model in 4bit&quot;,&quot;local&quot;:&quot;load-a-large-model-in-4bit&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Load a large model in 8bit&quot;,&quot;local&quot;:&quot;load-a-large-model-in-8bit&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Advanced use cases&quot;,&quot;local&quot;:&quot;advanced-use-cases&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Change the compute dtype&quot;,&quot;local&quot;:&quot;change-the-compute-dtype&quot;,&quot;sections&quot;:[],&quot;depth&quot;:5},{&quot;title&quot;:&quot;Using NF4 (Normal Float 4) data type&quot;,&quot;local&quot;:&quot;using-nf4-normal-float-4-data-type&quot;,&quot;sections&quot;:[],&quot;depth&quot;:5},{&quot;title&quot;:&quot;Use nested quantization for more memory efficient inference&quot;,&quot;local&quot;:&quot;use-nested-quantization-for-more-memory-efficient-inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:5}],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Push quantized models on the ๐Ÿค— Hub&quot;,&quot;local&quot;:&quot;push-quantized-models-on-the--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Load a quantized model from the ๐Ÿค— Hub&quot;,&quot;local&quot;:&quot;load-a-quantized-model-from-the--hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Advanced use cases&quot;,&quot;local&quot;:&quot;advanced-use-cases&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Offload between cpu and gpu&quot;,&quot;local&quot;:&quot;offload-between-cpu-and-gpu&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Play with llm_int8_threshold&quot;,&quot;local&quot;:&quot;play-with-llmint8threshold&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Skip the conversion of some modules&quot;,&quot;local&quot;:&quot;skip-the-conversion-of-some-modules&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Fine-tune a model that has been loaded in 8-bit&quot;,&quot;local&quot;:&quot;fine-tune-a-model-that-has-been-loaded-in-8-bit&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;BitsAndBytesConfig&quot;,&quot;local&quot;:&quot;transformers.BitsAndBytesConfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Quantization with ๐Ÿค— optimum&quot;,&quot;local&quot;:&quot;quantization-with--optimum&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="quantize--transformers-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#quantize--transformers-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Quantize ๐Ÿค— Transformers models</span></h1> <h2 class="relative group"><a id="autogptq-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#autogptq-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AutoGPTQ Integration</span></h2> <p data-svelte-h="svelte-1ag92u2">๐Ÿค— Transformers ใซใฏใ€่จ€่ชžใƒขใƒ‡ใƒซใง GPTQ ้‡ๅญๅŒ–ใ‚’ๅฎŸ่กŒใ™ใ‚‹ใŸใ‚ใฎ <code>optimum</code> API ใŒ็ตฑๅˆใ•ใ‚Œใฆใ„ใพใ™ใ€‚ใƒ‘ใƒ•ใ‚ฉใƒผใƒžใƒณใ‚นใ‚’ๅคงๅน…ใซไฝŽไธ‹ใ•ใ›ใ‚‹ใ“ใจใชใใ€ๆŽจ่ซ–้€Ÿๅบฆใ‚’้ซ˜้€ŸๅŒ–ใ™ใ‚‹ใ“ใจใชใใ€ใƒขใƒ‡ใƒซใ‚’ 8ใ€4ใ€3ใ€ใ•ใ‚‰ใซใฏ 2 ใƒ“ใƒƒใƒˆใงใƒญใƒผใƒ‰ใŠใ‚ˆใณ้‡ๅญๅŒ–ใงใใพใ™ใ€‚ใ“ใ‚Œใฏใ€ใปใจใ‚“ใฉใฎ GPU ใƒใƒผใƒ‰ใ‚ฆใ‚งใ‚ขใงใ‚ตใƒใƒผใƒˆใ•ใ‚Œใฆใ„ใพใ™ใ€‚</p> <p data-svelte-h="svelte-1gbdty9">้‡ๅญๅŒ–ใƒขใƒ‡ใƒซใฎ่ฉณ็ดฐใซใคใ„ใฆใฏใ€ไปฅไธ‹ใ‚’็ขบ่ชใ—ใฆใใ ใ•ใ„ใ€‚</p> <ul data-svelte-h="svelte-z5jal9"><li><a href="https://arxiv.org/pdf/2210.17323.pdf" rel="nofollow">GPTQ</a> ่ซ–ๆ–‡</li> <li>GPTQ ้‡ๅญๅŒ–ใซ้–ขใ™ใ‚‹ <code>optimum</code> <a href="https://huggingface.co/docs/optimum/llm_quantization/usage_guides/quantization" rel="nofollow">ใ‚ฌใ‚คใƒ‰</a></li> <li>ใƒใƒƒใ‚ฏใ‚จใƒณใƒ‰ใจใ—ใฆไฝฟ็”จใ•ใ‚Œใ‚‹ <a href="https://github.com/PanQiWei/AutoGPTQ" rel="nofollow"><code>AutoGPTQ</code></a> ใƒฉใ‚คใƒ–ใƒฉใƒช</li></ul> <h3 class="relative group"><a id="requirements" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#requirements"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Requirements</span></h3> <p data-svelte-h="svelte-1l7cenj">ไปฅไธ‹ใฎใ‚ณใƒผใƒ‰ใ‚’ๅฎŸ่กŒใ™ใ‚‹ใซใฏใ€ไปฅไธ‹ใฎ่ฆไปถใŒใ‚คใƒณใ‚นใƒˆใƒผใƒซใ•ใ‚Œใฆใ„ใ‚‹ๅฟ…่ฆใŒใ‚ใ‚Šใพใ™๏ผš</p> <ul data-svelte-h="svelte-hwx8is"><li><p>ๆœ€ๆ–ฐใฎ <code>AutoGPTQ</code> ใƒฉใ‚คใƒ–ใƒฉใƒชใ‚’ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹ใ€‚
<code>pip install auto-gptq</code> ใ‚’ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹ใ€‚</p></li> <li><p>ๆœ€ๆ–ฐใฎ <code>optimum</code> ใ‚’ใ‚ฝใƒผใ‚นใ‹ใ‚‰ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹ใ€‚
<code>git+https://github.com/huggingface/optimum.git</code> ใ‚’ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹ใ€‚</p></li> <li><p>ๆœ€ๆ–ฐใฎ <code>transformers</code> ใ‚’ใ‚ฝใƒผใ‚นใ‹ใ‚‰ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹ใ€‚
ๆœ€ๆ–ฐใฎ <code>transformers</code> ใ‚’ใ‚ฝใƒผใ‚นใ‹ใ‚‰ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹ <code>pip install git+https://github.com/huggingface/transformers.git</code></p></li> <li><p>ๆœ€ๆ–ฐใฎ <code>accelerate</code> ใƒฉใ‚คใƒ–ใƒฉใƒชใ‚’ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹ใ€‚
<code>pip install --upgrade accelerate</code> ใ‚’ๅฎŸ่กŒใ™ใ‚‹ใ€‚</p></li></ul> <p data-svelte-h="svelte-mtpiza">GPTQ็ตฑๅˆใฏไปŠใฎใจใ“ใ‚ใƒ†ใ‚ญใ‚นใƒˆใƒขใƒ‡ใƒซใฎใฟใ‚’ใ‚ตใƒใƒผใƒˆใ—ใฆใ„ใ‚‹ใฎใงใ€่ฆ–่ฆšใ€้Ÿณๅฃฐใ€ใƒžใƒซใƒใƒขใƒผใƒ€ใƒซใƒขใƒ‡ใƒซใงใฏไบˆๆœŸใ›ใฌๆŒ™ๅ‹•ใซ้ญ้‡ใ™ใ‚‹ใ‹ใ‚‚ใ—ใ‚Œใชใ„ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚</p> <h3 class="relative group"><a id="load-and-quantize-a-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-and-quantize-a-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Load and quantize a model</span></h3> <p data-svelte-h="svelte-1cu78c7">GPTQ ใฏใ€้‡ๅญๅŒ–ใƒขใƒ‡ใƒซใ‚’ไฝฟ็”จใ™ใ‚‹ๅ‰ใซ้‡ใฟใฎใ‚ญใƒฃใƒชใƒ–ใƒฌใƒผใ‚ทใƒงใƒณใ‚’ๅฟ…่ฆใจใ™ใ‚‹้‡ๅญๅŒ–ๆ–นๆณ•ใงใ™ใ€‚ใƒˆใƒฉใƒณใ‚นใƒ•ใ‚ฉใƒผใƒžใƒผ ใƒขใƒ‡ใƒซใ‚’ๆœ€ๅˆใ‹ใ‚‰้‡ๅญๅŒ–ใ™ใ‚‹ๅ ดๅˆใฏใ€้‡ๅญๅŒ–ใƒขใƒ‡ใƒซใ‚’ไฝœๆˆใ™ใ‚‹ใพใงใซๆ™‚้–“ใŒใ‹ใ‹ใ‚‹ใ“ใจใŒใ‚ใ‚Šใพใ™ (<code>facebook/opt-350m</code>ใƒขใƒ‡ใƒซใฎ Google colab ใงใฏ็ด„ 5 ๅˆ†)ใ€‚</p> <p data-svelte-h="svelte-173mkyz">ใ—ใŸใŒใฃใฆใ€GPTQ ้‡ๅญๅŒ–ใƒขใƒ‡ใƒซใ‚’ไฝฟ็”จใ™ใ‚‹ใ‚ทใƒŠใƒชใ‚ชใฏ 2 ใคใ‚ใ‚Šใพใ™ใ€‚ๆœ€ๅˆใฎไฝฟ็”จไพ‹ใฏใ€ใƒใƒ–ใงๅˆฉ็”จๅฏ่ƒฝใชไป–ใฎใƒฆใƒผใ‚ถใƒผใซใ‚ˆใฃใฆใ™ใงใซ้‡ๅญๅŒ–ใ•ใ‚ŒใŸใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใ™ใ‚‹ใ“ใจใงใ™ใ€‚2 ็•ช็›ฎใฎไฝฟ็”จไพ‹ใฏใ€ใƒขใƒ‡ใƒซใ‚’ๆœ€ๅˆใ‹ใ‚‰้‡ๅญๅŒ–ใ—ใ€ไฟๅญ˜ใ™ใ‚‹ใ‹ใƒใƒ–ใซใƒ—ใƒƒใ‚ทใƒฅใ—ใฆใ€ไป–ใฎใƒฆใƒผใ‚ถใƒผใŒไฝฟ็”จใงใใ‚‹ใ‚ˆใ†ใซใ™ใ‚‹ใ“ใจใงใ™ใ€‚ใใ‚Œใ‚‚ไฝฟใฃใฆใใ ใ•ใ„ใ€‚</p> <h4 class="relative group"><a id="gptq-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#gptq-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>GPTQ Configuration</span></h4> <p data-svelte-h="svelte-8sbga0">ใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใ—ใฆ้‡ๅญๅŒ–ใ™ใ‚‹ใซใฏใ€<a href="/docs/transformers/main/ja/main_classes/quantization#transformers.GPTQConfig">GPTQConfig</a> ใ‚’ไฝœๆˆใ™ใ‚‹ๅฟ…่ฆใŒใ‚ใ‚Šใพใ™ใ€‚ใƒ‡ใƒผใ‚ฟใ‚ปใƒƒใƒˆใ‚’ๆบ–ๅ‚™ใ™ใ‚‹ใซใฏใ€<code>bits</code>ใฎๆ•ฐใ€้‡ๅญๅŒ–ใ‚’่ชฟๆ•ดใ™ใ‚‹ใŸใ‚ใฎ<code>dataset</code>ใ€ใŠใ‚ˆใณใƒขใƒ‡ใƒซใฎ<code>Tokenizer</code>ใ‚’ๆธกใ™ๅฟ…่ฆใŒใ‚ใ‚Šใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model_id = <span class="hljs-string">&quot;facebook/opt-125m&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(model_id)
gptq_config = GPTQConfig(bits=<span class="hljs-number">4</span>, dataset = <span class="hljs-string">&quot;c4&quot;</span>, tokenizer=tokenizer)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-o6s3th">็‹ฌ่‡ชใฎใƒ‡ใƒผใ‚ฟใ‚ปใƒƒใƒˆใ‚’ๆ–‡ๅญ—ๅˆ—ใฎใƒชใ‚นใƒˆใจใ—ใฆๆธกใ™ใ“ใจใŒใงใใ‚‹ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใŸใ ใ—ใ€GPTQ ่ซ–ๆ–‡ใฎใƒ‡ใƒผใ‚ฟใ‚ปใƒƒใƒˆใ‚’ไฝฟ็”จใ™ใ‚‹ใ“ใจใ‚’ๅผทใใŠๅ‹งใ‚ใ—ใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->dataset = [<span class="hljs-string">&quot;auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm.&quot;</span>]
quantization = GPTQConfig(bits=<span class="hljs-number">4</span>, dataset = dataset, tokenizer=tokenizer)<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="quantization" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#quantization"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Quantization</span></h4> <p data-svelte-h="svelte-1h9rpz3"><code>from_pretrained</code> ใ‚’ไฝฟ็”จใ—ใ€<code>quantization_config</code> ใ‚’่จญๅฎšใ™ใ‚‹ใ“ใจใงใƒขใƒ‡ใƒซใ‚’้‡ๅญๅŒ–ใงใใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=gptq_config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wifo6l">ใƒขใƒ‡ใƒซใ‚’้‡ๅญๅŒ–ใ™ใ‚‹ใซใฏ GPU ใŒๅฟ…่ฆใงใ‚ใ‚‹ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใƒขใƒ‡ใƒซใ‚’ CPU ใซ้…็ฝฎใ—ใ€้‡ๅญๅŒ–ใ™ใ‚‹ใŸใ‚ใซใƒขใ‚ธใƒฅใƒผใƒซใ‚’ GPU ใซๅ‰ๅพŒใซ็งปๅ‹•ใ•ใ›ใพใ™ใ€‚</p> <p data-svelte-h="svelte-1dkzvn5">CPU ใ‚ชใƒ•ใƒญใƒผใƒ‰ใฎไฝฟ็”จไธญใซ GPU ใฎไฝฟ็”จ้‡ใ‚’ๆœ€ๅคงๅŒ–ใ—ใŸใ„ๅ ดๅˆใฏใ€<code>device_map = &quot;auto&quot;</code> ใ‚’่จญๅฎšใงใใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(model_id, device_map=<span class="hljs-string">&quot;auto&quot;</span>, quantization_config=gptq_config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-b4piob">ใƒ‡ใ‚ฃใ‚นใ‚ฏ ใ‚ชใƒ•ใƒญใƒผใƒ‰ใฏใ‚ตใƒใƒผใƒˆใ•ใ‚Œใฆใ„ใชใ„ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใ•ใ‚‰ใซใ€ใƒ‡ใƒผใ‚ฟใ‚ปใƒƒใƒˆใŒๅŽŸๅ› ใงใƒกใƒขใƒชใŒไธ่ถณใ—ใฆใ„ใ‚‹ๅ ดๅˆใฏใ€<code>from_pretained</code> ใง <code>max_memory</code> ใ‚’ๆธกใ™ๅฟ…่ฆใŒใ‚ใ‚‹ๅ ดๅˆใŒใ‚ใ‚Šใพใ™ใ€‚ <code>device_map</code>ใจ<code>max_memory</code>ใฎ่ฉณ็ดฐใซใคใ„ใฆใฏใ€ใ“ใฎ <a href="https://huggingface.co/docs/accelerate/usage_guides/big_modeling#designing-a-device-map" rel="nofollow">ใ‚ฌใ‚คใƒ‰</a> ใ‚’ๅ‚็…งใ—ใฆใใ ใ•ใ„ใ€‚</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400">GPTQ ้‡ๅญๅŒ–ใฏใ€็พๆ™‚็‚นใงใฏใƒ†ใ‚ญใ‚นใƒˆ ใƒขใƒ‡ใƒซใงใฎใฟๆฉŸ่ƒฝใ—ใพใ™ใ€‚ใ•ใ‚‰ใซใ€้‡ๅญๅŒ–ใƒ—ใƒญใ‚ปใ‚นใฏใƒใƒผใƒ‰ใ‚ฆใ‚งใ‚ขใซใ‚ˆใฃใฆใฏ้•ทๆ™‚้–“ใ‹ใ‹ใ‚‹ๅ ดๅˆใŒใ‚ใ‚Šใพใ™ (NVIDIA A100 ใ‚’ไฝฟ็”จใ—ใŸๅ ดๅˆใ€175B ใƒขใƒ‡ใƒซ = 4 gpu ๆ™‚้–“)ใ€‚ใƒขใƒ‡ใƒซใฎ GPTQ ้‡ๅญๅŒ–ใƒใƒผใ‚ธใƒงใƒณใŒๅญ˜ๅœจใ—ใชใ„ๅ ดๅˆใฏใ€ใƒใƒ–ใง็ขบ่ชใ—ใฆใใ ใ•ใ„ใ€‚ใใ†ใงใชใ„ๅ ดๅˆใฏใ€github ใง่ฆๆฑ‚ใ‚’้€ไฟกใงใใพใ™ใ€‚</div> <h3 class="relative group"><a id="push-quantized-model-to--hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#push-quantized-model-to--hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Push quantized model to ๐Ÿค— Hub</span></h3> <p data-svelte-h="svelte-1mi5065">ไป–ใฎ ๐Ÿค— ใƒขใƒ‡ใƒซใจๅŒๆง˜ใซใ€<code>push_to_hub</code> ใ‚’ไฝฟ็”จใ—ใฆ้‡ๅญๅŒ–ใƒขใƒ‡ใƒซใ‚’ใƒใƒ–ใซใƒ—ใƒƒใ‚ทใƒฅใงใใพใ™ใ€‚้‡ๅญๅŒ–ๆง‹ๆˆใฏไฟๅญ˜ใ•ใ‚Œใ€ใƒขใƒ‡ใƒซใซๆฒฟใฃใฆใƒ—ใƒƒใ‚ทใƒฅใ•ใ‚Œใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->quantized_model.push_to_hub(<span class="hljs-string">&quot;opt-125m-gptq&quot;</span>)
tokenizer.push_to_hub(<span class="hljs-string">&quot;opt-125m-gptq&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wrdmud">้‡ๅญๅŒ–ใ•ใ‚ŒใŸใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใ‚ซใƒซ ใƒžใ‚ทใƒณใซไฟๅญ˜ใ—ใŸใ„ๅ ดๅˆใฏใ€<code>save_pretrained</code> ใ‚’ไฝฟ็”จใ—ใฆ่กŒใ†ใ“ใจใ‚‚ใงใใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->quantized_model.save_pretrained(<span class="hljs-string">&quot;opt-125m-gptq&quot;</span>)
tokenizer.save_pretrained(<span class="hljs-string">&quot;opt-125m-gptq&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1taffib"><code>device_map</code> ใ‚’ไฝฟ็”จใ—ใฆใƒขใƒ‡ใƒซใ‚’้‡ๅญๅŒ–ใ—ใŸๅ ดๅˆใฏใ€ไฟๅญ˜ใ™ใ‚‹ๅ‰ใซใƒขใƒ‡ใƒซๅ…จไฝ“ใ‚’ GPU ใพใŸใฏ <code>cpu</code> ใฎใ„ใšใ‚Œใ‹ใซ็งปๅ‹•ใ—ใฆใใ ใ•ใ„ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->quantized_model.to(<span class="hljs-string">&quot;cpu&quot;</span>)
quantized_model.save_pretrained(<span class="hljs-string">&quot;opt-125m-gptq&quot;</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="load-a-quantized-model-from-the--hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-a-quantized-model-from-the--hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Load a quantized model from the ๐Ÿค— Hub</span></h3> <p data-svelte-h="svelte-1q9lmg5"><code>from_pretrained</code>ใ‚’ไฝฟ็”จใ—ใฆใ€้‡ๅญๅŒ–ใ•ใ‚ŒใŸใƒขใƒ‡ใƒซใ‚’ใƒใƒ–ใ‹ใ‚‰ใƒญใƒผใƒ‰ใงใใพใ™ใ€‚
ๅฑžๆ€ง <code>quantization_config</code> ใŒใƒขใƒ‡ใƒซ่จญๅฎšใ‚ชใƒ–ใ‚ธใ‚งใ‚ฏใƒˆใซๅญ˜ๅœจใ™ใ‚‹ใ“ใจใ‚’็ขบ่ชใ—ใฆใ€ใƒ—ใƒƒใ‚ทใƒฅใ•ใ‚ŒใŸ้‡ใฟใŒ้‡ๅญๅŒ–ใ•ใ‚Œใฆใ„ใ‚‹ใ“ใจใ‚’็ขบ่ชใ—ใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;{your_username}/opt-125m-gptq&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mb4sw2">ๅฟ…่ฆไปฅไธŠใฎใƒกใƒขใƒชใ‚’ๅ‰ฒใ‚Šๅฝ“ใฆใšใซใƒขใƒ‡ใƒซใ‚’ใ‚ˆใ‚Š้€Ÿใใƒญใƒผใƒ‰ใ—ใŸใ„ๅ ดๅˆใฏใ€<code>device_map</code> ๅผ•ๆ•ฐใฏ้‡ๅญๅŒ–ใƒขใƒ‡ใƒซใงใ‚‚ๆฉŸ่ƒฝใ—ใพใ™ใ€‚ <code>accelerate</code>ใƒฉใ‚คใƒ–ใƒฉใƒชใŒใ‚คใƒณใ‚นใƒˆใƒผใƒซใ•ใ‚Œใฆใ„ใ‚‹ใ“ใจใ‚’็ขบ่ชใ—ใฆใใ ใ•ใ„ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;{your_username}/opt-125m-gptq&quot;</span>, device_map=<span class="hljs-string">&quot;auto&quot;</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="exllama-kernels-for-faster-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#exllama-kernels-for-faster-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Exllama kernels for faster inference</span></h3> <p data-svelte-h="svelte-rwqpya">4 ใƒ“ใƒƒใƒˆ ใƒขใƒ‡ใƒซใฎๅ ดๅˆใ€ๆŽจ่ซ–้€Ÿๅบฆใ‚’้ซ˜ใ‚ใ‚‹ใŸใ‚ใซ exllama ใ‚ซใƒผใƒใƒซใ‚’ไฝฟ็”จใงใใพใ™ใ€‚ใƒ‡ใƒ•ใ‚ฉใƒซใƒˆใงๆœ‰ๅŠนใซใชใฃใฆใ„ใพใ™ใ€‚ <a href="/docs/transformers/main/ja/main_classes/quantization#transformers.GPTQConfig">GPTQConfig</a> ใง <code>disable_exllama</code> ใ‚’ๆธกใ™ใ“ใจใงใ€ใใฎๅ‹•ไฝœใ‚’ๅค‰ๆ›ดใงใใพใ™ใ€‚ใ“ใ‚Œใซใ‚ˆใ‚Šใ€่จญๅฎšใซไฟๅญ˜ใ•ใ‚Œใฆใ„ใ‚‹้‡ๅญๅŒ–่จญๅฎšใŒไธŠๆ›ธใใ•ใ‚Œใพใ™ใ€‚ใ‚ซใƒผใƒใƒซใซ้–ข้€ฃใ™ใ‚‹ๅฑžๆ€งใฎใฟใ‚’ไธŠๆ›ธใใงใใ‚‹ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใ•ใ‚‰ใซใ€exllama ใ‚ซใƒผใƒใƒซใ‚’ไฝฟ็”จใ—ใŸใ„ๅ ดๅˆใฏใ€ใƒขใƒ‡ใƒซๅ…จไฝ“ใ‚’ GPU ไธŠใซ็ฝฎใๅฟ…่ฆใŒใ‚ใ‚Šใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
gptq_config = GPTQConfig(bits=<span class="hljs-number">4</span>, disable_exllama=<span class="hljs-literal">False</span>)
model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;{your_username}/opt-125m-gptq&quot;</span>, device_map=<span class="hljs-string">&quot;auto&quot;</span>, quantization_config = gptq_config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-2djxfj">็พๆ™‚็‚นใงใฏ 4 ใƒ“ใƒƒใƒˆ ใƒขใƒ‡ใƒซใฎใฟใŒใ‚ตใƒใƒผใƒˆใ•ใ‚Œใฆใ„ใ‚‹ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใ•ใ‚‰ใซใ€peft ใ‚’ไฝฟ็”จใ—ใฆ้‡ๅญๅŒ–ใƒขใƒ‡ใƒซใ‚’ๅพฎ่ชฟๆ•ดใ—ใฆใ„ใ‚‹ๅ ดๅˆใฏใ€exllama ใ‚ซใƒผใƒใƒซใ‚’้žใ‚ขใ‚ฏใƒ†ใ‚ฃใƒ–ๅŒ–ใ™ใ‚‹ใ“ใจใ‚’ใŠๅ‹งใ‚ใ—ใพใ™ใ€‚</p> <h4 class="relative group"><a id="fine-tune-a-quantized-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fine-tune-a-quantized-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Fine-tune a quantized model</span></h4> <p data-svelte-h="svelte-16ybbze">Hugging Face ใ‚จใ‚ณใ‚ทใ‚นใƒ†ใƒ ใฎใ‚ขใƒ€ใƒ—ใ‚ฟใƒผใฎๅ…ฌๅผใ‚ตใƒใƒผใƒˆใซใ‚ˆใ‚Šใ€GPTQ ใง้‡ๅญๅŒ–ใ•ใ‚ŒใŸใƒขใƒ‡ใƒซใ‚’ๅพฎ่ชฟๆ•ดใงใใพใ™ใ€‚
่ฉณ็ดฐใซใคใ„ใฆใฏใ€<a href="https://github.com/huggingface/peft" rel="nofollow"><code>peft</code></a> ใƒฉใ‚คใƒ–ใƒฉใƒชใ‚’ใ”่ฆงใใ ใ•ใ„ใ€‚</p> <h3 class="relative group"><a id="example-demo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-demo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example demo</span></h3> <p data-svelte-h="svelte-cf6ao3">GPTQ ใ‚’ไฝฟ็”จใ—ใฆใƒขใƒ‡ใƒซใ‚’้‡ๅญๅŒ–ใ™ใ‚‹ๆ–นๆณ•ใจใ€peft ใ‚’ไฝฟ็”จใ—ใฆ้‡ๅญๅŒ–ใ•ใ‚ŒใŸใƒขใƒ‡ใƒซใ‚’ๅพฎ่ชฟๆ•ดใ™ใ‚‹ๆ–นๆณ•ใซใคใ„ใฆใฏใ€Google Colab <a href="https://colab.research.google.com/drive/1_TIrmuKOFhuRRiTWN94iLKUFu6ZX4ceb?usp=sharing" rel="nofollow">ใƒŽใƒผใƒˆใƒ–ใƒƒใ‚ฏ</a> ใ‚’ๅ‚็…งใ—ใฆใใ ใ•ใ„ใ€‚</p> <h3 class="relative group"><a id="transformers.GPTQConfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#transformers.GPTQConfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>GPTQConfig</span></h3> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.GPTQConfig"><!-- HTML_TAG_START --><h3 class="!m-0"><span class="flex-1 break-all md:text-lg bg-gradient-to-r px-2.5 py-1.5 rounded-xl from-indigo-50/70 to-white dark:from-gray-900 dark:to-gray-950 dark:text-indigo-300 text-indigo-700"><svg class="mr-1.5 text-indigo-500 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width=".8em" height=".8em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg><span class="font-light">class</span> <span class="font-medium">transformers.</span><span class="font-semibold">GPTQConfig</span></span></h3><!-- HTML_TAG_END --> <a id="transformers.GPTQConfig" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.GPTQConfig"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L531" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">bits<span class="opacity-60">: int</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">tokenizer<span class="opacity-60">: Any = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataset<span class="opacity-60">: Union = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">group_size<span class="opacity-60">: int = 128</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">damp_percent<span class="opacity-60">: float = 0.1</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">desc_act<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">sym<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">true_sequential<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">use_cuda_fp16<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model_seqlen<span class="opacity-60">: Optional = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">block_name_to_quantize<span class="opacity-60">: Optional = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">module_name_preceding_first_block<span class="opacity-60">: Optional = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">batch_size<span class="opacity-60">: int = 1</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">pad_token_id<span class="opacity-60">: Optional = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">use_exllama<span class="opacity-60">: Optional = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">max_input_length<span class="opacity-60">: Optional = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">exllama_config<span class="opacity-60">: Optional = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">cache_block_outputs<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">modules_in_block_to_quantize<span class="opacity-60">: Optional = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">**kwargs<span class="opacity-60"></span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.bits" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.bits"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>bits</strong> (<code>int</code>) &#x2014;
The number of bits to quantize to, supported numbers are (2, 3, 4, 8).<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.tokenizer" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.tokenizer"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>tokenizer</strong> (<code>str</code> or <code>PreTrainedTokenizerBase</code>, <em>optional</em>) &#x2014;
The tokenizer used to process the dataset. You can pass either:<ul>
<li>A custom tokenizer object.</li>
<li>A string, the <em>model id</em> of a predefined tokenizer hosted inside a model repo on huggingface.co.</li>
<li>A path to a <em>directory</em> containing vocabulary files required by the tokenizer, for instance saved
using the <a href="/docs/transformers/main/ja/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.save_pretrained">save_pretrained()</a> method, e.g., <code>./my_model_directory/</code>.</li>
</ul><!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.dataset" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.dataset"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>dataset</strong> (<code>Union[List[str]]</code>, <em>optional</em>) &#x2014;
The dataset used for quantization. You can provide your own dataset in a list of string or just use the
original datasets used in GPTQ paper [&#x2018;wikitext2&#x2019;,&#x2018;c4&#x2019;,&#x2018;c4-new&#x2019;]<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.group_size" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.group_size"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>group_size</strong> (<code>int</code>, <em>optional</em>, defaults to 128) &#x2014;
The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.damp_percent" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.damp_percent"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>damp_percent</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) &#x2014;
The percent of the average Hessian diagonal to use for dampening. Recommended value is 0.1.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.desc_act" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.desc_act"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>desc_act</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether to quantize columns in order of decreasing activation size. Setting it to False can significantly
speed up inference but the perplexity may become slightly worse. Also known as act-order.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.sym" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.sym"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>sym</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to use symetric quantization.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.true_sequential" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.true_sequential"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>true_sequential</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to perform sequential quantization even within a single Transformer block. Instead of quantizing
the entire block at once, we perform layer-wise quantization. As a result, each layer undergoes
quantization using inputs that have passed through the previously quantized layers.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.use_cuda_fp16" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.use_cuda_fp16"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>use_cuda_fp16</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether or not to use optimized cuda kernel for fp16 model. Need to have model in fp16.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.model_seqlen" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.model_seqlen"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>model_seqlen</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The maximum sequence length that the model can take.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.block_name_to_quantize" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.block_name_to_quantize"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>block_name_to_quantize</strong> (<code>str</code>, <em>optional</em>) &#x2014;
The transformers block name to quantize. If None, we will infer the block name using common patterns (e.g. model.layers)<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.module_name_preceding_first_block" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.module_name_preceding_first_block"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>module_name_preceding_first_block</strong> (<code>List[str]</code>, <em>optional</em>) &#x2014;
The layers that are preceding the first Transformer block.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.batch_size" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.batch_size"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>batch_size</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
The batch size used when processing the dataset<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.pad_token_id" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.pad_token_id"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>pad_token_id</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The pad token id. Needed to prepare the dataset when <code>batch_size</code> &gt; 1.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.use_exllama" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.use_exllama"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>use_exllama</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether to use exllama backend. Defaults to <code>True</code> if unset. Only works with <code>bits</code> = 4.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.max_input_length" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.max_input_length"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>max_input_length</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The maximum input length. This is needed to initialize a buffer that depends on the maximum expected input
length. It is specific to the exllama backend with act-order.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.exllama_config" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.exllama_config"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>exllama_config</strong> (<code>Dict[str, Any]</code>, <em>optional</em>) &#x2014;
The exllama config. You can specify the version of the exllama kernel through the <code>version</code> key. Defaults
to <code>{&quot;version&quot;: 1}</code> if unset.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.cache_block_outputs" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.cache_block_outputs"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>cache_block_outputs</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to cache block outputs to reuse as inputs for the succeeding block.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.GPTQConfig.modules_in_block_to_quantize" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.GPTQConfig.modules_in_block_to_quantize"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>modules_in_block_to_quantize</strong> (<code>List[List[str]]</code>, <em>optional</em>) &#x2014;
List of list of module names to quantize in the specified block. This argument is useful to exclude certain linear modules from being quantized.
The block to quantize can be specified by setting <code>block_name_to_quantize</code>. We will quantize each list sequentially. If not set, we will quantize all linear layers.
Example: <code>modules_in_block_to_quantize =[[&quot;self_attn.k_proj&quot;, &quot;self_attn.v_proj&quot;, &quot;self_attn.q_proj&quot;], [&quot;self_attn.o_proj&quot;]]</code>.
In this example, we will first quantize the q,k,v layers simultaneously since they are independent.
Then, we will quantize <code>self_attn.o_proj</code> layer with the q,k,v layers quantized. This way, we will get
better results since it reflects the real input <code>self_attn.o_proj</code> will get when the model is quantized.<!-- HTML_TAG_END --> </span></span> </li></ul> </div></div> <p data-svelte-h="svelte-i3efvr">This is a wrapper class about all possible attributes and features that you can play with a model that has been
loaded using <code>optimum</code> api for gptq quantization relying on auto_gptq backend.</p> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.GPTQConfig.from_dict_optimum"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>from_dict_optimum</span></h4><!-- HTML_TAG_END --> <a id="transformers.GPTQConfig.from_dict_optimum" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.GPTQConfig.from_dict_optimum"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L733" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">config_dict<span class="opacity-60"></span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-4jdj2l">Get compatible class with optimum gptq config dict</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.GPTQConfig.post_init"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>post_init</span></h4><!-- HTML_TAG_END --> <a id="transformers.GPTQConfig.post_init" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.GPTQConfig.post_init"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L645" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-1ozftb6">Safety checker that arguments are correct</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.GPTQConfig.to_dict_optimum"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>to_dict_optimum</span></h4><!-- HTML_TAG_END --> <a id="transformers.GPTQConfig.to_dict_optimum" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.GPTQConfig.to_dict_optimum"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L724" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-pjgtd6">Get compatible dict for optimum gptq config</p></div></div> <h2 class="relative group"><a id="bitsandbytes-integration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bitsandbytes-integration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bitsandbytes Integration</span></h2> <p data-svelte-h="svelte-1lw70bk">๐Ÿค— Transformers ใฏใ€<code>bitsandbytes</code> ใงๆœ€ใ‚‚ใ‚ˆใไฝฟ็”จใ•ใ‚Œใ‚‹ใƒขใ‚ธใƒฅใƒผใƒซใจ็ทŠๅฏ†ใซ็ตฑๅˆใ•ใ‚Œใฆใ„ใพใ™ใ€‚ๆ•ฐ่กŒใฎใ‚ณใƒผใƒ‰ใงใƒขใƒ‡ใƒซใ‚’ 8 ใƒ“ใƒƒใƒˆ็ฒพๅบฆใงใƒญใƒผใƒ‰ใงใใพใ™ใ€‚
ใ“ใ‚Œใฏใ€<code>bitsandbytes</code>ใฎ <code>0.37.0</code>ใƒชใƒชใƒผใ‚นไปฅ้™ใ€ใปใจใ‚“ใฉใฎ GPU ใƒใƒผใƒ‰ใ‚ฆใ‚งใ‚ขใงใ‚ตใƒใƒผใƒˆใ•ใ‚Œใฆใ„ใพใ™ใ€‚</p> <p data-svelte-h="svelte-19v0jp6">้‡ๅญๅŒ–ๆ–นๆณ•ใฎ่ฉณ็ดฐใซใคใ„ใฆใฏใ€<a href="https://arxiv.org/abs/2208.07339" rel="nofollow">LLM.int8()</a> ่ซ–ๆ–‡ใ€ใพใŸใฏ <a href="https://huggingface.co/blog/hf-bitsandbytes-" rel="nofollow">ใƒ–ใƒญใ‚ฐๆŠ•็จฟ</a> ใ‚’ใ”่ฆงใใ ใ•ใ„ใ€‚็ตฑๅˆ๏ผ‰ใ‚ณใƒฉใƒœใƒฌใƒผใ‚ทใƒงใƒณใซใคใ„ใฆใ€‚</p> <p data-svelte-h="svelte-1u8e287"><code>0.39.0</code>ใƒชใƒชใƒผใ‚นไปฅ้™ใ€FP4 ใƒ‡ใƒผใ‚ฟๅž‹ใ‚’ๆดป็”จใ—ใ€4 ใƒ“ใƒƒใƒˆ้‡ๅญๅŒ–ใ‚’ไฝฟ็”จใ—ใฆ<code>device_map</code>ใ‚’ใ‚ตใƒใƒผใƒˆใ™ใ‚‹ไปปๆ„ใฎใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใงใใพใ™ใ€‚</p> <p data-svelte-h="svelte-g9g250">็‹ฌ่‡ชใฎ pytorch ใƒขใƒ‡ใƒซใ‚’้‡ๅญๅŒ–ใ—ใŸใ„ๅ ดๅˆใฏใ€๐Ÿค— Accelerate ใƒฉใ‚คใƒ–ใƒฉใƒชใฎ <a href="https://huggingface.co/docs/accelerate/main/en/usage_guides/quantization" rel="nofollow">ใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆ</a> ใ‚’ใƒใ‚งใƒƒใ‚ฏใ—ใฆใใ ใ•ใ„ใ€‚</p> <p data-svelte-h="svelte-ouymag"><code>bitsandbytes</code>็ตฑๅˆใ‚’ไฝฟ็”จใ—ใฆใงใใ‚‹ใ“ใจใฏๆฌกใฎใจใŠใ‚Šใงใ™</p> <h3 class="relative group"><a id="general-usage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#general-usage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>General usage</span></h3> <p data-svelte-h="svelte-qonxi">ใƒขใƒ‡ใƒซใŒ ๐Ÿค— Accelerate ใซใ‚ˆใ‚‹่ชญใฟ่พผใฟใ‚’ใ‚ตใƒใƒผใƒˆใ—ใ€<code>torch.nn.Linear</code> ใƒฌใ‚คใƒคใƒผใŒๅซใพใ‚Œใฆใ„ใ‚‹้™ใ‚Šใ€ <a href="/docs/transformers/main/ja/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> ใƒกใ‚ฝใƒƒใƒ‰ใ‚’ๅ‘ผใณๅ‡บใ™ใจใใซ <code>load_in_8bit</code> ใพใŸใฏ <code>load_in_4bit</code> ๅผ•ๆ•ฐใ‚’ไฝฟ็”จใ—ใฆใƒขใƒ‡ใƒซใ‚’้‡ๅญๅŒ–ใงใใพใ™ใ€‚ใ“ใ‚Œใฏใฉใฎใ‚ˆใ†ใชใƒขใƒ€ใƒชใƒ†ใ‚ฃใงใ‚‚ๅŒๆง˜ใซๆฉŸ่ƒฝใ™ใ‚‹ใฏใšใงใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
model_8bit = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;facebook/opt-350m&quot;</span>, load_in_8bit=<span class="hljs-literal">True</span>)
model_4bit = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;facebook/opt-350m&quot;</span>, load_in_4bit=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rxk684">ใƒ‡ใƒ•ใ‚ฉใƒซใƒˆใงใฏใ€ไป–ใฎใ™ในใฆใฎใƒขใ‚ธใƒฅใƒผใƒซ (ไพ‹: <code>torch.nn.LayerNorm</code>) ใฏ <code>torch.float16</code> ใซๅค‰ๆ›ใ•ใ‚Œใพใ™ใŒใ€ใใฎ <code>dtype</code> ใ‚’ๅค‰ๆ›ดใ—ใŸใ„ๅ ดๅˆใฏใ€<code>torch_dtype</code> ๅผ•ๆ•ฐใ‚’ไธŠๆ›ธใใงใใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
<span class="hljs-meta">&gt;&gt;&gt; </span>model_8bit = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;facebook/opt-350m&quot;</span>, load_in_8bit=<span class="hljs-literal">True</span>, torch_dtype=torch.float32)
<span class="hljs-meta">&gt;&gt;&gt; </span>model_8bit.model.decoder.layers[-<span class="hljs-number">1</span>].final_layer_norm.weight.dtype
torch.float32<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="fp4-quantization" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fp4-quantization"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>FP4 quantization</span></h3> <h4 class="relative group"><a id="requirements" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#requirements"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Requirements</span></h4> <p data-svelte-h="svelte-1lnnlr4">ไปฅไธ‹ใฎใ‚ณใƒผใƒ‰ ใ‚นใƒ‹ใƒšใƒƒใƒˆใ‚’ๅฎŸ่กŒใ™ใ‚‹ๅ‰ใซใ€ไปฅไธ‹ใฎ่ฆไปถใŒใ‚คใƒณใ‚นใƒˆใƒผใƒซใ•ใ‚Œใฆใ„ใ‚‹ใ“ใจใ‚’็ขบ่ชใ—ใฆใใ ใ•ใ„ใ€‚</p> <ul data-svelte-h="svelte-1wl10a1"><li><p>ๆœ€ๆ–ฐใฎ<code>bitsandbytes</code>ใƒฉใ‚คใƒ–ใƒฉใƒช
<code>pip install bitsandbytes&gt;=0.39.0</code></p></li> <li><p>ๆœ€ๆ–ฐใฎ<code>accelerate</code>ใ‚’ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹
<code>pip install --upgrade accelerate</code></p></li> <li><p>ๆœ€ๆ–ฐใฎ <code>transformers</code> ใ‚’ใ‚คใƒณใ‚นใƒˆใƒผใƒซใ™ใ‚‹
<code>pip install --upgrade transformers</code></p></li></ul> <h4 class="relative group"><a id="tips-and-best-practices" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tips-and-best-practices"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tips and best practices</span></h4> <ul data-svelte-h="svelte-iudtgn"><li><p><strong>้ซ˜ๅบฆใชไฝฟ็”จๆณ•:</strong> ๅฏ่ƒฝใชใ™ในใฆใฎใ‚ชใƒ—ใ‚ทใƒงใƒณใ‚’ไฝฟ็”จใ—ใŸ 4 ใƒ“ใƒƒใƒˆ้‡ๅญๅŒ–ใฎ้ซ˜ๅบฆใชไฝฟ็”จๆณ•ใซใคใ„ใฆใฏใ€<a href="https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf" rel="nofollow">ใ“ใฎ Google Colab ใƒŽใƒผใƒˆใƒ–ใƒƒใ‚ฏ</a> ใ‚’ๅ‚็…งใ—ใฆใใ ใ•ใ„ใ€‚</p></li> <li><p><strong><code>batch_size=1</code> ใซใ‚ˆใ‚‹้ซ˜้€ŸๆŽจ่ซ– :</strong> bitsandbytes ใฎ <code>0.40.0</code> ใƒชใƒชใƒผใ‚นไปฅ้™ใ€<code>batch_size=1</code> ใงใฏ้ซ˜้€ŸๆŽจ่ซ–ใฎๆฉๆตใ‚’ๅ—ใ‘ใ‚‹ใ“ใจใŒใงใใพใ™ใ€‚ <a href="https://github.com/TimDettmers/bitsandbytes/releases/tag/0.40.0" rel="nofollow">ใ“ใ‚Œใ‚‰ใฎใƒชใƒชใƒผใ‚น ใƒŽใƒผใƒˆ</a> ใ‚’็ขบ่ชใ—ใ€ใ“ใฎๆฉŸ่ƒฝใ‚’ๆดป็”จใ™ใ‚‹ใซใฏ<code>0.40.0</code>ไปฅ้™ใฎใƒใƒผใ‚ธใƒงใƒณใ‚’ไฝฟ็”จใ—ใฆใ„ใ‚‹ใ“ใจใ‚’็ขบ่ชใ—ใฆใใ ใ•ใ„ใ€‚็ฎฑใฎใ€‚</p></li> <li><p><strong>ใƒˆใƒฌใƒผใƒ‹ใƒณใ‚ฐ:</strong> <a href="https://arxiv.org/abs/2305.14314" rel="nofollow">QLoRA ่ซ–ๆ–‡</a> ใซใ‚ˆใ‚‹ใจใ€4 ใƒ“ใƒƒใƒˆๅŸบๆœฌใƒขใƒ‡ใƒซใ‚’ใƒˆใƒฌใƒผใƒ‹ใƒณใ‚ฐใ™ใ‚‹ๅ ดๅˆ (ไพ‹: LoRA ใ‚ขใƒ€ใƒ—ใ‚ฟใƒผใ‚’ไฝฟ็”จ)ใ€<code>bnb_4bit_quant_type=&#39;nf4&#39;</code> ใ‚’ไฝฟ็”จใ™ใ‚‹ๅฟ…่ฆใŒใ‚ใ‚Šใพใ™ใ€‚ ใ€‚</p></li> <li><p><strong>ๆŽจ่ซ–:</strong> ๆŽจ่ซ–ใฎๅ ดๅˆใ€<code>bnb_4bit_quant_type</code> ใฏใƒ‘ใƒ•ใ‚ฉใƒผใƒžใƒณใ‚นใซๅคงใใชๅฝฑ้Ÿฟใ‚’ไธŽใˆใพใ›ใ‚“ใ€‚ใŸใ ใ—ใ€ใƒขใƒ‡ใƒซใฎ้‡ใฟใจใฎไธ€่ฒซๆ€งใ‚’ไฟใคใŸใ‚ใซใ€ๅฟ…ใšๅŒใ˜ <code>bnb_4bit_compute_dtype</code> ใŠใ‚ˆใณ <code>torch_dtype</code> ๅผ•ๆ•ฐใ‚’ไฝฟ็”จใ—ใฆใใ ใ•ใ„ใ€‚</p></li></ul> <h4 class="relative group"><a id="load-a-large-model-in-4bit" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-a-large-model-in-4bit"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Load a large model in 4bit</span></h4> <p data-svelte-h="svelte-artgoh"><code>.from_pretrained</code> ใƒกใ‚ฝใƒƒใƒ‰ใ‚’ๅ‘ผใณๅ‡บใ™ใจใใซ <code>load_in_4bit=True</code> ใ‚’ไฝฟ็”จใ™ใ‚‹ใจใ€ใƒกใƒขใƒชไฝฟ็”จ้‡ใ‚’ (ใŠใŠใ‚ˆใ) 4 ใงๅ‰ฒใ‚‹ใ“ใจใŒใงใใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># pip install transformers accelerate bitsandbytes</span>
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer
model_id = <span class="hljs-string">&quot;bigscience/bloom-1b7&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map=<span class="hljs-string">&quot;auto&quot;</span>, load_in_4bit=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-1p6yucr">ใƒขใƒ‡ใƒซใŒ 4 ใƒ“ใƒƒใƒˆใงใƒญใƒผใƒ‰ใ•ใ‚Œใ‚‹ใจใ€็พๆ™‚็‚นใงใฏ้‡ๅญๅŒ–ใ•ใ‚ŒใŸ้‡ใฟใ‚’ใƒใƒ–ใซใƒ—ใƒƒใ‚ทใƒฅใ™ใ‚‹ใ“ใจใฏใงใใชใ„ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ 4 ใƒ“ใƒƒใƒˆใฎ้‡ใฟใฏใพใ ใ‚ตใƒใƒผใƒˆใ•ใ‚Œใฆใ„ใชใ„ใŸใ‚ใ€ใƒˆใƒฌใƒผใƒ‹ใƒณใ‚ฐใงใใชใ„ใ“ใจใซใ‚‚ๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใŸใ ใ—ใ€4 ใƒ“ใƒƒใƒˆ ใƒขใƒ‡ใƒซใ‚’ไฝฟ็”จใ—ใฆ่ฟฝๅŠ ใฎใƒ‘ใƒฉใƒกใƒผใ‚ฟใƒผใ‚’ใƒˆใƒฌใƒผใƒ‹ใƒณใ‚ฐใ™ใ‚‹ใ“ใจใ‚‚ใงใใพใ™ใ€‚ใ“ใ‚Œใซใคใ„ใฆใฏๆฌกใฎใ‚ปใ‚ฏใ‚ทใƒงใƒณใง่ชฌๆ˜Žใ—ใพใ™ใ€‚</p></div> <h3 class="relative group"><a id="load-a-large-model-in-8bit" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-a-large-model-in-8bit"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Load a large model in 8bit</span></h3> <p data-svelte-h="svelte-1ep4vf9"><code>.from_pretrained</code> ใƒกใ‚ฝใƒƒใƒ‰ใ‚’ๅ‘ผใณๅ‡บใ™ใจใใซ <code>load_in_8bit=True</code> ๅผ•ๆ•ฐใ‚’ไฝฟ็”จใ™ใ‚‹ใจใ€ใƒกใƒขใƒช่ฆไปถใ‚’ใŠใ‚ˆใๅŠๅˆ†ใซใ—ใฆใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใงใใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># pip install transformers accelerate bitsandbytes</span>
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
model_id = <span class="hljs-string">&quot;bigscience/bloom-1b7&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wye4p4">ๆฌกใซใ€้€šๅธธ <a href="/docs/transformers/main/ja/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a> ใ‚’ไฝฟ็”จใ™ใ‚‹ใฎใจๅŒใ˜ใ‚ˆใ†ใซใƒขใƒ‡ใƒซใ‚’ไฝฟ็”จใ—ใพใ™ใ€‚</p> <p data-svelte-h="svelte-3h8e3v"><code>get_memory_footprint</code> ใƒกใ‚ฝใƒƒใƒ‰ใ‚’ไฝฟ็”จใ—ใฆใ€ใƒขใƒ‡ใƒซใฎใƒกใƒขใƒช ใƒ•ใƒƒใƒˆใƒ—ใƒชใƒณใƒˆใ‚’็ขบ่ชใงใใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">print</span>(model.get_memory_footprint())<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qrn2hy">ใ“ใฎ็ตฑๅˆใซใ‚ˆใ‚Šใ€ๅคงใใชใƒขใƒ‡ใƒซใ‚’ๅฐใ•ใชใƒ‡ใƒใ‚คใ‚นใซใƒญใƒผใƒ‰ใ—ใ€ๅ•้กŒใชใๅฎŸ่กŒใงใใ‚‹ใ‚ˆใ†ใซใชใ‚Šใพใ—ใŸใ€‚</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400">ใƒขใƒ‡ใƒซใŒ 8 ใƒ“ใƒƒใƒˆใงใƒญใƒผใƒ‰ใ•ใ‚Œใ‚‹ใจใ€ๆœ€ๆ–ฐใฎ `transformers`ใจ`bitsandbytes`ใ‚’ไฝฟ็”จใ™ใ‚‹ๅ ดๅˆใ‚’้™คใใ€้‡ๅญๅŒ–ใ•ใ‚ŒใŸ้‡ใฟใ‚’ใƒใƒ–ใซใƒ—ใƒƒใ‚ทใƒฅใ™ใ‚‹ใ“ใจใฏ็พๅœจไธๅฏ่ƒฝใงใ‚ใ‚‹ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ 8 ใƒ“ใƒƒใƒˆใฎ้‡ใฟใฏใพใ ใ‚ตใƒใƒผใƒˆใ•ใ‚Œใฆใ„ใชใ„ใŸใ‚ใ€ใƒˆใƒฌใƒผใƒ‹ใƒณใ‚ฐใงใใชใ„ใ“ใจใซใ‚‚ๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใŸใ ใ—ใ€8 ใƒ“ใƒƒใƒˆ ใƒขใƒ‡ใƒซใ‚’ไฝฟ็”จใ—ใฆ่ฟฝๅŠ ใฎใƒ‘ใƒฉใƒกใƒผใ‚ฟใƒผใ‚’ใƒˆใƒฌใƒผใƒ‹ใƒณใ‚ฐใ™ใ‚‹ใ“ใจใ‚‚ใงใใพใ™ใ€‚ใ“ใ‚Œใซใคใ„ใฆใฏๆฌกใฎใ‚ปใ‚ฏใ‚ทใƒงใƒณใง่ชฌๆ˜Žใ—ใพใ™ใ€‚
ใพใŸใ€`device_map` ใฏใ‚ชใƒ—ใ‚ทใƒงใƒณใงใ™ใŒใ€ๅˆฉ็”จๅฏ่ƒฝใชใƒชใ‚ฝใƒผใ‚นไธŠใงใƒขใƒ‡ใƒซใ‚’ๅŠน็އ็š„ใซใƒ‡ใ‚ฃใ‚นใƒ‘ใƒƒใƒใ™ใ‚‹ใŸใ‚ใ€ๆŽจ่ซ–ใซใฏ `device_map = &#39;auto&#39;` ใ‚’่จญๅฎšใ™ใ‚‹ใ“ใจใŒๆŽจๅฅจใ•ใ‚Œใพใ™ใ€‚</div> <h4 class="relative group"><a id="advanced-use-cases" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-use-cases"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced use cases</span></h4> <p data-svelte-h="svelte-brbl2r">ใ“ใ“ใงใฏใ€FP4 ้‡ๅญๅŒ–ใ‚’ไฝฟ็”จใ—ใฆๅฎŸ่กŒใงใใ‚‹ใ„ใใคใ‹ใฎ้ซ˜ๅบฆใชไฝฟ็”จไพ‹ใซใคใ„ใฆ่ชฌๆ˜Žใ—ใพใ™ใ€‚</p> <h5 class="relative group"><a id="change-the-compute-dtype" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#change-the-compute-dtype"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Change the compute dtype</span></h5> <p data-svelte-h="svelte-8cey66">compute dtype ใฏใ€่จˆ็ฎ—ไธญใซไฝฟ็”จใ•ใ‚Œใ‚‹ dtype ใ‚’ๅค‰ๆ›ดใ™ใ‚‹ใŸใ‚ใซไฝฟ็”จใ•ใ‚Œใพใ™ใ€‚ใŸใจใˆใฐใ€้š ใ—็Šถๆ…‹ใฏ<code>float32</code>ใซใ‚ใ‚Šใพใ™ใŒใ€้ซ˜้€ŸๅŒ–ใฎใŸใ‚ใซ่จˆ็ฎ—ใ‚’ bf16 ใซ่จญๅฎšใงใใพใ™ใ€‚ใƒ‡ใƒ•ใ‚ฉใƒซใƒˆใงใฏใ€compute dtype ใฏ <code>float32</code> ใซ่จญๅฎšใ•ใ‚Œใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(load_in_4bit=<span class="hljs-literal">True</span>, bnb_4bit_compute_dtype=torch.bfloat16)<!-- HTML_TAG_END --></pre></div> <h5 class="relative group"><a id="using-nf4-normal-float-4-data-type" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-nf4-normal-float-4-data-type"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using NF4 (Normal Float 4) data type</span></h5> <p data-svelte-h="svelte-1gdahjp">NF4 ใƒ‡ใƒผใ‚ฟๅž‹ใ‚’ไฝฟ็”จใ™ใ‚‹ใ“ใจใ‚‚ใงใใพใ™ใ€‚ใ“ใ‚Œใฏใ€ๆญฃ่ฆๅˆ†ๅธƒใ‚’ไฝฟ็”จใ—ใฆๅˆๆœŸๅŒ–ใ•ใ‚ŒใŸ้‡ใฟใซ้ฉๅˆใ—ใŸๆ–ฐใ—ใ„ 4 ใƒ“ใƒƒใƒˆ ใƒ‡ใƒผใ‚ฟๅž‹ใงใ™ใ€‚ใใฎๅฎŸ่กŒใฎใŸใ‚ใซ:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig
nf4_config = BitsAndBytesConfig(
load_in_4bit=<span class="hljs-literal">True</span>,
bnb_4bit_quant_type=<span class="hljs-string">&quot;nf4&quot;</span>,
)
model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)<!-- HTML_TAG_END --></pre></div> <h5 class="relative group"><a id="use-nested-quantization-for-more-memory-efficient-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#use-nested-quantization-for-more-memory-efficient-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Use nested quantization for more memory efficient inference</span></h5> <p data-svelte-h="svelte-1wj738d">ใพใŸใ€ใƒใ‚นใƒˆใ•ใ‚ŒใŸ้‡ๅญๅŒ–ๆ‰‹ๆณ•ใ‚’ไฝฟ็”จใ™ใ‚‹ใ“ใจใ‚’ใŠๅ‹งใ‚ใ—ใพใ™ใ€‚ใ“ใ‚Œใซใ‚ˆใ‚Šใ€ใƒ‘ใƒ•ใ‚ฉใƒผใƒžใƒณใ‚นใ‚’่ฟฝๅŠ ใ™ใ‚‹ใ“ใจใชใใ€ใ‚ˆใ‚ŠๅคšใใฎใƒกใƒขใƒชใŒ็ฏ€็ด„ใ•ใ‚Œใพใ™ใ€‚็ตŒ้จ“็š„ใช่ฆณๅฏŸใ‹ใ‚‰ใ€ใ“ใ‚Œใซใ‚ˆใ‚Šใ€NVIDIA-T4 16GB ไธŠใงใ‚ทใƒผใ‚ฑใƒณใ‚น้•ท 1024ใ€ใƒใƒƒใƒ ใ‚ตใ‚คใ‚บ 1ใ€ๅ‹พ้…็ดฏ็ฉใ‚นใƒ†ใƒƒใƒ— 4 ใฎ llama-13b ใƒขใƒ‡ใƒซใ‚’ๅพฎ่ชฟๆ•ดใ™ใ‚‹ใ“ใจใŒๅฏ่ƒฝใซใชใ‚Šใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig
double_quant_config = BitsAndBytesConfig(
load_in_4bit=<span class="hljs-literal">True</span>,
bnb_4bit_use_double_quant=<span class="hljs-literal">True</span>,
)
model_double_quant = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=double_quant_config)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="push-quantized-models-on-the--hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#push-quantized-models-on-the--hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Push quantized models on the ๐Ÿค— Hub</span></h3> <p data-svelte-h="svelte-9if3q8"><code>push_to_hub</code>ใƒกใ‚ฝใƒƒใƒ‰ใ‚’ๅ˜็ด”ใซไฝฟ็”จใ™ใ‚‹ใ“ใจใงใ€้‡ๅญๅŒ–ใ•ใ‚ŒใŸใƒขใƒ‡ใƒซใ‚’ใƒใƒ–ใซใƒ—ใƒƒใ‚ทใƒฅใงใใพใ™ใ€‚ใ“ใ‚Œใซใ‚ˆใ‚Šใ€ๆœ€ๅˆใซ้‡ๅญๅŒ–ๆง‹ๆˆใƒ•ใ‚กใ‚คใƒซใŒใƒ—ใƒƒใ‚ทใƒฅใ•ใ‚Œใ€ๆฌกใซ้‡ๅญๅŒ–ใ•ใ‚ŒใŸใƒขใƒ‡ใƒซใฎ้‡ใฟใŒใƒ—ใƒƒใ‚ทใƒฅใ•ใ‚Œใพใ™ใ€‚
ใ“ใฎๆฉŸ่ƒฝใ‚’ไฝฟ็”จใงใใ‚‹ใ‚ˆใ†ใซใ™ใ‚‹ใซใฏใ€ๅฟ…ใš <code>bitsandbytes&gt;0.37.2</code> ใ‚’ไฝฟ็”จใ—ใฆใใ ใ•ใ„ (ใ“ใฎ่จ˜ไบ‹ใฎๅŸท็ญ†ๆ™‚็‚นใงใฏใ€<code>bitsandbytes==0.38.0.post1</code> ใงใƒ†ใ‚นใƒˆใ—ใพใ—ใŸ)ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;bigscience/bloom-560m&quot;</span>, quantization_config=BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>))
tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;bigscience/bloom-560m&quot;</span>)
model.push_to_hub(<span class="hljs-string">&quot;bloom-560m-8bit&quot;</span>)<!-- HTML_TAG_END --></pre></div> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-1bkhcu">ๅคง่ฆๆจกใชใƒขใƒ‡ใƒซใงใฏใ€ใƒใƒ–ไธŠใง 8 ใƒ“ใƒƒใƒˆ ใƒขใƒ‡ใƒซใ‚’ใƒ—ใƒƒใ‚ทใƒฅใ™ใ‚‹ใ“ใจใŒๅผทใๆŽจๅฅจใ•ใ‚Œใพใ™ใ€‚ใ“ใ‚Œใซใ‚ˆใ‚Šใ€ใ‚ณใƒŸใƒฅใƒ‹ใƒ†ใ‚ฃใฏใƒกใƒขใƒช ใƒ•ใƒƒใƒˆใƒ—ใƒชใƒณใƒˆใฎๅ‰Šๆธ›ใจใ€ใŸใจใˆใฐ Google Colab ใงใฎๅคง่ฆๆจกใชใƒขใƒ‡ใƒซใฎ่ชญใฟ่พผใฟใซใ‚ˆใ‚‹ๆฉๆตใ‚’ๅ—ใ‘ใ‚‹ใ“ใจใŒใงใใพใ™ใ€‚</p></div> <h3 class="relative group"><a id="load-a-quantized-model-from-the--hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-a-quantized-model-from-the--hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Load a quantized model from the ๐Ÿค— Hub</span></h3> <p data-svelte-h="svelte-1am21ml"><code>from_pretrained</code>ใƒกใ‚ฝใƒƒใƒ‰ใ‚’ไฝฟ็”จใ—ใฆใ€ใƒใƒ–ใ‹ใ‚‰้‡ๅญๅŒ–ใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใงใใพใ™ใ€‚ๅฑžๆ€ง <code>quantization_config</code> ใŒใƒขใƒ‡ใƒซ่จญๅฎšใ‚ชใƒ–ใ‚ธใ‚งใ‚ฏใƒˆใซๅญ˜ๅœจใ™ใ‚‹ใ“ใจใ‚’็ขบ่ชใ—ใฆใ€ใƒ—ใƒƒใ‚ทใƒฅใ•ใ‚ŒใŸ้‡ใฟใŒ้‡ๅญๅŒ–ใ•ใ‚Œใฆใ„ใ‚‹ใ“ใจใ‚’็ขบ่ชใ—ใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;{your_username}/bloom-560m-8bit&quot;</span>, device_map=<span class="hljs-string">&quot;auto&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wm02p5">ใ“ใฎๅ ดๅˆใ€ๅผ•ๆ•ฐ <code>load_in_8bit=True</code> ใ‚’ๆŒ‡ๅฎšใ™ใ‚‹ๅฟ…่ฆใฏใ‚ใ‚Šใพใ›ใ‚“ใŒใ€<code>bitsandbytes</code> ใจ <code>accelerate</code> ใŒใ‚คใƒณใ‚นใƒˆใƒผใƒซใ•ใ‚Œใฆใ„ใ‚‹ใ“ใจใ‚’็ขบ่ชใ™ใ‚‹ๅฟ…่ฆใŒใ‚ใ‚‹ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚
ใพใŸใ€<code>device_map</code> ใฏใ‚ชใƒ—ใ‚ทใƒงใƒณใงใ™ใŒใ€ๅˆฉ็”จๅฏ่ƒฝใชใƒชใ‚ฝใƒผใ‚นไธŠใงใƒขใƒ‡ใƒซใ‚’ๅŠน็އ็š„ใซใƒ‡ใ‚ฃใ‚นใƒ‘ใƒƒใƒใ™ใ‚‹ใŸใ‚ใ€ๆŽจ่ซ–ใซใฏ <code>device_map = &#39;auto&#39;</code> ใ‚’่จญๅฎšใ™ใ‚‹ใ“ใจใŒๆŽจๅฅจใ•ใ‚Œใพใ™ใ€‚</p> <h3 class="relative group"><a id="advanced-use-cases" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-use-cases"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced use cases</span></h3> <p data-svelte-h="svelte-1xz0d87">ใ“ใฎใ‚ปใ‚ฏใ‚ทใƒงใƒณใฏใ€8 ใƒ“ใƒƒใƒˆ ใƒขใƒ‡ใƒซใฎใƒญใƒผใƒ‰ใจๅฎŸ่กŒไปฅๅค–ใซไฝ•ใŒใงใใ‚‹ใ‹ใ‚’ๆŽขๆฑ‚ใ—ใŸใ„ไธŠ็ดšใƒฆใƒผใ‚ถใƒผใ‚’ๅฏพ่ฑกใจใ—ใฆใ„ใพใ™ใ€‚</p> <h4 class="relative group"><a id="offload-between-cpu-and-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#offload-between-cpu-and-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Offload between cpu and gpu</span></h4> <p data-svelte-h="svelte-yl6mei">ใ“ใฎ้ซ˜ๅบฆใชไฝฟ็”จไพ‹ใฎ 1 ใคใฏใ€ใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใ—ใ€<code>CPU</code>ใจ<code>GPU</code>ใฎ้–“ใง้‡ใฟใ‚’ใƒ‡ใ‚ฃใ‚นใƒ‘ใƒƒใƒใงใใ‚‹ใ“ใจใงใ™ใ€‚ CPU ไธŠใงใƒ‡ใ‚ฃใ‚นใƒ‘ใƒƒใƒใ•ใ‚Œใ‚‹้‡ใฟใฏ <strong>8 ใƒ“ใƒƒใƒˆใซๅค‰ๆ›ใ•ใ‚Œใชใ„</strong>ใŸใ‚ใ€<code>float32</code>ใซไฟๆŒใ•ใ‚Œใ‚‹ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใ“ใฎๆฉŸ่ƒฝใฏใ€้žๅธธใซๅคง่ฆๆจกใชใƒขใƒ‡ใƒซใ‚’้ฉๅˆใ•ใ›ใ€ใใฎใƒขใƒ‡ใƒซใ‚’ GPU ใจ CPU ใฎ้–“ใงใƒ‡ใ‚ฃใ‚นใƒ‘ใƒƒใƒใ—ใŸใ„ใƒฆใƒผใ‚ถใƒผใ‚’ๅฏพ่ฑกใจใ—ใฆใ„ใพใ™ใ€‚</p> <p data-svelte-h="svelte-xbol81">ใพใšใ€<code>transformers</code> ใ‹ใ‚‰ <a href="/docs/transformers/main/ja/main_classes/quantization#transformers.BitsAndBytesConfig">BitsAndBytesConfig</a> ใ‚’ใƒญใƒผใƒ‰ใ—ใ€ๅฑžๆ€ง <code>llm_int8_enable_fp32_cpu_offload</code> ใ‚’ <code>True</code> ใซ่จญๅฎšใ—ใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1n9uav7"><code>bigscience/bloom-1b7</code>ใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใ™ใ‚‹ๅฟ…่ฆใŒใ‚ใ‚Šใ€<code>lm_head</code>ใ‚’้™คใใƒขใƒ‡ใƒซๅ…จไฝ“ใซโ€‹โ€‹้ฉๅˆใ™ใ‚‹ใฎใซๅๅˆ†ใช GPU RAM ใŒใ‚ใ‚‹ใจใ—ใพใ™ใ€‚ใ—ใŸใŒใฃใฆใ€ๆฌกใฎใ‚ˆใ†ใซใ‚ซใ‚นใ‚ฟใƒ  device_map ใ‚’ไฝœๆˆใ—ใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->device_map = {
<span class="hljs-string">&quot;transformer.word_embeddings&quot;</span>: <span class="hljs-number">0</span>,
<span class="hljs-string">&quot;transformer.word_embeddings_layernorm&quot;</span>: <span class="hljs-number">0</span>,
<span class="hljs-string">&quot;lm_head&quot;</span>: <span class="hljs-string">&quot;cpu&quot;</span>,
<span class="hljs-string">&quot;transformer.h&quot;</span>: <span class="hljs-number">0</span>,
<span class="hljs-string">&quot;transformer.ln_f&quot;</span>: <span class="hljs-number">0</span>,
}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-mnwhfx">ใใ—ใฆใ€ๆฌกใฎใ‚ˆใ†ใซใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใ—ใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model_8bit = AutoModelForCausalLM.from_pretrained(
<span class="hljs-string">&quot;bigscience/bloom-1b7&quot;</span>,
device_map=device_map,
quantization_config=quantization_config,
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-rwyjy1">ไปฅไธŠใงใ™๏ผใƒขใƒ‡ใƒซใ‚’ๆฅฝใ—ใ‚“ใงใใ ใ•ใ„๏ผ</p> <h4 class="relative group"><a id="play-with-llmint8threshold" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#play-with-llmint8threshold"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Play with llm_int8_threshold</span></h4> <p data-svelte-h="svelte-1ozkl3z"><code>llm_int8_threshold</code> ๅผ•ๆ•ฐใ‚’ๆ“ไฝœใ—ใฆใ€ๅค–ใ‚Œๅ€คใฎใ—ใใ„ๅ€คใ‚’ๅค‰ๆ›ดใงใใพใ™ใ€‚ ๅค–ใ‚Œๅ€ค ใจใฏใ€็‰นๅฎšใฎใ—ใใ„ๅ€คใ‚ˆใ‚Šๅคงใใ„้š ใ‚ŒใŸ็Šถๆ…‹ใฎๅ€คใงใ™ใ€‚
ใ“ใ‚Œใฏใ€<code>LLM.int8()</code>่ซ–ๆ–‡ใง่ชฌๆ˜Žใ•ใ‚Œใฆใ„ใ‚‹ๅค–ใ‚Œๅ€คๆคœๅ‡บใฎๅค–ใ‚Œๅ€คใ—ใใ„ๅ€คใซๅฏพๅฟœใ—ใพใ™ใ€‚ใ“ใฎใ—ใใ„ๅ€คใ‚’่ถ…ใˆใ‚‹้š ใ—็Šถๆ…‹ใฎๅ€คใฏๅค–ใ‚Œๅ€คใจใฟใชใ•ใ‚Œใ€ใใ‚Œใ‚‰ใฎๅ€คใซๅฏพใ™ใ‚‹ๆ“ไฝœใฏ fp16 ใงๅฎŸ่กŒใ•ใ‚Œใพใ™ใ€‚้€šๅธธใ€ๅ€คใฏๆญฃ่ฆๅˆ†ๅธƒใ—ใพใ™ใ€‚ใคใพใ‚Šใ€ใปใจใ‚“ใฉใฎๅ€คใฏ [-3.5, 3.5] ใฎ็ฏ„ๅ›ฒๅ†…ใซใ‚ใ‚Šใพใ™ใŒใ€ๅคง่ฆๆจกใชใƒขใƒ‡ใƒซใงใฏๅคงใใ็•ฐใชใ‚‹ๅˆ†ๅธƒใ‚’็คบใ™ไพ‹ๅค–็š„ใช็ณป็ตฑ็š„ๅค–ใ‚Œๅ€คใŒใ„ใใคใ‹ใ‚ใ‚Šใพใ™ใ€‚ใ“ใ‚Œใ‚‰ใฎๅค–ใ‚Œๅ€คใฏใ€ๅคšใใฎๅ ดๅˆ [-60, -6] ใพใŸใฏ [6, 60] ใฎ็ฏ„ๅ›ฒๅ†…ใซใ‚ใ‚Šใพใ™ใ€‚ Int8 ้‡ๅญๅŒ–ใฏใ€ๅคงใใ•ใŒ 5 ็จ‹ๅบฆใพใงใฎๅ€คใงใฏใ†ใพใๆฉŸ่ƒฝใ—ใพใ™ใŒใ€ใใ‚Œใ‚’่ถ…ใˆใ‚‹ใจใ€ใƒ‘ใƒ•ใ‚ฉใƒผใƒžใƒณใ‚นใŒๅคงๅน…ใซไฝŽไธ‹ใ—ใพใ™ใ€‚้ฉๅˆ‡ใชใƒ‡ใƒ•ใ‚ฉใƒซใƒˆใฎใ—ใใ„ๅ€คใฏ 6 ใงใ™ใŒใ€ใ‚ˆใ‚Šไธๅฎ‰ๅฎšใชใƒขใƒ‡ใƒซ (ๅฐ่ฆๆจกใชใƒขใƒ‡ใƒซใ€ๅพฎ่ชฟๆ•ด) ใงใฏใ€ใ‚ˆใ‚ŠไฝŽใ„ใ—ใใ„ๅ€คใŒๅฟ…่ฆใซใชใ‚‹ๅ ดๅˆใŒใ‚ใ‚Šใพใ™ใ€‚
ใ“ใฎๅผ•ๆ•ฐใฏใ€ใƒขใƒ‡ใƒซใฎๆŽจ่ซ–้€Ÿๅบฆใซๅฝฑ้Ÿฟใ‚’ไธŽใˆใ‚‹ๅฏ่ƒฝๆ€งใŒใ‚ใ‚Šใพใ™ใ€‚ใ“ใฎใƒ‘ใƒฉใƒกใƒผใ‚ฟใ‚’่ฉฆใ—ใฆใฟใฆใ€ใƒฆใƒผใ‚นใ‚ฑใƒผใ‚นใซๆœ€้ฉใชใƒ‘ใƒฉใƒกใƒผใ‚ฟใ‚’่ฆ‹ใคใ‘ใ‚‹ใ“ใจใ‚’ใŠๅ‹งใ‚ใ—ใพใ™ใ€‚</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
model_id = <span class="hljs-string">&quot;bigscience/bloom-1b7&quot;</span>
quantization_config = BitsAndBytesConfig(
llm_int8_threshold=<span class="hljs-number">10</span>,
)
model_8bit = AutoModelForCausalLM.from_pretrained(
model_id,
device_map=device_map,
quantization_config=quantization_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="skip-the-conversion-of-some-modules" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#skip-the-conversion-of-some-modules"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Skip the conversion of some modules</span></h4> <p data-svelte-h="svelte-bn1879">ไธ€้ƒจใฎใƒขใƒ‡ใƒซใซใฏใ€ๅฎ‰ๅฎšๆ€งใ‚’็ขบไฟใ™ใ‚‹ใŸใ‚ใซ 8 ใƒ“ใƒƒใƒˆใซๅค‰ๆ›ใ™ใ‚‹ๅฟ…่ฆใŒใชใ„ใƒขใ‚ธใƒฅใƒผใƒซใŒใ„ใใคใ‹ใ‚ใ‚Šใพใ™ใ€‚ใŸใจใˆใฐใ€ใ‚ธใƒฅใƒผใ‚ฏใƒœใƒƒใ‚ฏใ‚น ใƒขใƒ‡ใƒซใซใฏใ€ใ‚นใ‚ญใƒƒใƒ—ใ™ใ‚‹ๅฟ…่ฆใŒใ‚ใ‚‹ใ„ใใคใ‹ใฎ <code>lm_head</code> ใƒขใ‚ธใƒฅใƒผใƒซใŒใ‚ใ‚Šใพใ™ใ€‚ <code>llm_int8_skip_modules</code> ใง้Šใ‚“ใงใฟใ‚‹</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
model_id = <span class="hljs-string">&quot;bigscience/bloom-1b7&quot;</span>
quantization_config = BitsAndBytesConfig(
llm_int8_skip_modules=[<span class="hljs-string">&quot;lm_head&quot;</span>],
)
model_8bit = AutoModelForCausalLM.from_pretrained(
model_id,
device_map=device_map,
quantization_config=quantization_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="fine-tune-a-model-that-has-been-loaded-in-8-bit" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fine-tune-a-model-that-has-been-loaded-in-8-bit"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Fine-tune a model that has been loaded in 8-bit</span></h4> <p data-svelte-h="svelte-mgdccl">Hugging Face ใ‚จใ‚ณใ‚ทใ‚นใƒ†ใƒ ใฎใ‚ขใƒ€ใƒ—ใ‚ฟใƒผใฎๅ…ฌๅผใ‚ตใƒใƒผใƒˆใซใ‚ˆใ‚Šใ€8 ใƒ“ใƒƒใƒˆใงใƒญใƒผใƒ‰ใ•ใ‚ŒใŸใƒขใƒ‡ใƒซใ‚’ๅพฎ่ชฟๆ•ดใงใใพใ™ใ€‚
ใ“ใ‚Œใซใ‚ˆใ‚Šใ€ๅ˜ไธ€ใฎ Google Colab ใง<code>flan-t5-large</code>ใ‚„<code>facebook/opt-6.7b</code>ใชใฉใฎๅคง่ฆๆจกใƒขใƒ‡ใƒซใ‚’ๅพฎ่ชฟๆ•ดใ™ใ‚‹ใ“ใจใŒใงใใพใ™ใ€‚่ฉณ็ดฐใซใคใ„ใฆใฏใ€<a href="https://github.com/huggingface/peft" rel="nofollow"><code>peft</code></a> ใƒฉใ‚คใƒ–ใƒฉใƒชใ‚’ใ”่ฆงใใ ใ•ใ„ใ€‚</p> <p data-svelte-h="svelte-14qh4dh">ใƒˆใƒฌใƒผใƒ‹ใƒณใ‚ฐ็”จใฎใƒขใƒ‡ใƒซใ‚’ใƒญใƒผใƒ‰ใ™ใ‚‹ใจใใซ <code>device_map</code> ใ‚’ๆธกใ™ๅฟ…่ฆใŒใชใ„ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚ใƒขใƒ‡ใƒซใŒ GPU ใซ่‡ชๅ‹•็š„ใซใƒญใƒผใƒ‰ใ•ใ‚Œใพใ™ใ€‚ๅฟ…่ฆใซๅฟœใ˜ใฆใ€ใƒ‡ใƒใ‚คใ‚น ใƒžใƒƒใƒ—ใ‚’็‰นๅฎšใฎใƒ‡ใƒใ‚คใ‚นใซ่จญๅฎšใ™ใ‚‹ใ“ใจใ‚‚ใงใใพใ™ (ไพ‹: <code>cuda:0</code>ใ€<code>0</code>ใ€<code>torch.device(&#39;cuda:0&#39;)</code>)ใ€‚ <code>device_map=auto</code>ใฏๆŽจ่ซ–ใฎใฟใซไฝฟ็”จใ™ใ‚‹ๅฟ…่ฆใŒใ‚ใ‚‹ใ“ใจใซๆณจๆ„ใ—ใฆใใ ใ•ใ„ใ€‚</p> <h3 class="relative group"><a id="transformers.BitsAndBytesConfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#transformers.BitsAndBytesConfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>BitsAndBytesConfig</span></h3> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.BitsAndBytesConfig"><!-- HTML_TAG_START --><h3 class="!m-0"><span class="flex-1 break-all md:text-lg bg-gradient-to-r px-2.5 py-1.5 rounded-xl from-indigo-50/70 to-white dark:from-gray-900 dark:to-gray-950 dark:text-indigo-300 text-indigo-700"><svg class="mr-1.5 text-indigo-500 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width=".8em" height=".8em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg><span class="font-light">class</span> <span class="font-medium">transformers.</span><span class="font-semibold">BitsAndBytesConfig</span></span></h3><!-- HTML_TAG_END --> <a id="transformers.BitsAndBytesConfig" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.BitsAndBytesConfig"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L295" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">load_in_8bit<span class="opacity-60"> = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">load_in_4bit<span class="opacity-60"> = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">llm_int8_threshold<span class="opacity-60"> = 6.0</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">llm_int8_skip_modules<span class="opacity-60"> = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">llm_int8_enable_fp32_cpu_offload<span class="opacity-60"> = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">llm_int8_has_fp16_weight<span class="opacity-60"> = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">bnb_4bit_compute_dtype<span class="opacity-60"> = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">bnb_4bit_quant_type<span class="opacity-60"> = 'fp4'</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">bnb_4bit_use_double_quant<span class="opacity-60"> = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">bnb_4bit_quant_storage<span class="opacity-60"> = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">**kwargs<span class="opacity-60"></span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.load_in_8bit" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.load_in_8bit"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>load_in_8bit</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
This flag is used to enable 8-bit quantization with LLM.int8().<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.load_in_4bit" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.load_in_4bit"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>load_in_4bit</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
This flag is used to enable 4-bit quantization by replacing the Linear layers with FP4/NF4 layers from
<code>bitsandbytes</code>.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.llm_int8_threshold" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.llm_int8_threshold"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>llm_int8_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 6.0) &#x2014;
This corresponds to the outlier threshold for outlier detection as described in <code>LLM.int8() : 8-bit Matrix Multiplication for Transformers at Scale</code> paper: <a href="https://arxiv.org/abs/2208.07339" rel="nofollow">https://arxiv.org/abs/2208.07339</a> Any hidden states value
that is above this threshold will be considered an outlier and the operation on those values will be done
in fp16. Values are usually normally distributed, that is, most values are in the range [-3.5, 3.5], but
there are some exceptional systematic outliers that are very differently distributed for large models.
These outliers are often in the interval [-60, -6] or [6, 60]. Int8 quantization works well for values of
magnitude ~5, but beyond that, there is a significant performance penalty. A good default threshold is 6,
but a lower threshold might be needed for more unstable models (small models, fine-tuning).<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.llm_int8_skip_modules" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.llm_int8_skip_modules"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>llm_int8_skip_modules</strong> (<code>List[str]</code>, <em>optional</em>) &#x2014;
An explicit list of the modules that we do not want to convert in 8-bit. This is useful for models such as
Jukebox that has several heads in different places and not necessarily at the last position. For example
for <code>CausalLM</code> models, the last <code>lm_head</code> is kept in its original <code>dtype</code>.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.llm_int8_enable_fp32_cpu_offload" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.llm_int8_enable_fp32_cpu_offload"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>llm_int8_enable_fp32_cpu_offload</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
This flag is used for advanced use cases and users that are aware of this feature. If you want to split
your model in different parts and run some parts in int8 on GPU and some parts in fp32 on CPU, you can use
this flag. This is useful for offloading large models such as <code>google/flan-t5-xxl</code>. Note that the int8
operations will not be run on CPU.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.llm_int8_has_fp16_weight" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.llm_int8_has_fp16_weight"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>llm_int8_has_fp16_weight</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
This flag runs LLM.int8() with 16-bit main weights. This is useful for fine-tuning as the weights do not
have to be converted back and forth for the backward pass.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.bnb_4bit_compute_dtype" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.bnb_4bit_compute_dtype"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>bnb_4bit_compute_dtype</strong> (<code>torch.dtype</code> or str, <em>optional</em>, defaults to <code>torch.float32</code>) &#x2014;
This sets the computational type which might be different than the input type. For example, inputs might be
fp32, but computation can be set to bf16 for speedups.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.bnb_4bit_quant_type" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.bnb_4bit_quant_type"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>bnb_4bit_quant_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;fp4&quot;</code>) &#x2014;
This sets the quantization data type in the bnb.nn.Linear4Bit layers. Options are FP4 and NF4 data types
which are specified by <code>fp4</code> or <code>nf4</code>.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.bnb_4bit_use_double_quant" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.bnb_4bit_use_double_quant"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>bnb_4bit_use_double_quant</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
This flag is used for nested quantization where the quantization constants from the first quantization are
quantized again.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.bnb_4bit_quant_storage" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.bnb_4bit_quant_storage"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>bnb_4bit_quant_storage</strong> (<code>torch.dtype</code> or str, <em>optional</em>, defaults to <code>torch.uint8</code>) &#x2014;
This sets the storage type to pack the quanitzed 4-bit prarams.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="transformers.BitsAndBytesConfig.kwargs" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="&amp;amp;num;transformers.BitsAndBytesConfig.kwargs"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>kwargs</strong> (<code>Dict[str, Any]</code>, <em>optional</em>) &#x2014;
Additional parameters from which to initialize the configuration object.<!-- HTML_TAG_END --> </span></span> </li></ul> </div></div> <p data-svelte-h="svelte-woamwr">This is a wrapper class about all possible attributes and features that you can play with a model that has been
loaded using <code>bitsandbytes</code>.</p> <p data-svelte-h="svelte-ki5gis">This replaces <code>load_in_8bit</code> or <code>load_in_4bit</code>therefore both options are mutually exclusive.</p> <p data-svelte-h="svelte-8qsk2q">Currently only supports <code>LLM.int8()</code>, <code>FP4</code>, and <code>NF4</code> quantization. If more methods are added to <code>bitsandbytes</code>,
then more arguments will be added to this class.</p> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.BitsAndBytesConfig.is_quantizable"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>is_quantizable</span></h4><!-- HTML_TAG_END --> <a id="transformers.BitsAndBytesConfig.is_quantizable" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.BitsAndBytesConfig.is_quantizable"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L466" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-10tvzyv">Returns <code>True</code> if the model is quantizable, <code>False</code> otherwise.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.BitsAndBytesConfig.post_init"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>post_init</span></h4><!-- HTML_TAG_END --> <a id="transformers.BitsAndBytesConfig.post_init" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.BitsAndBytesConfig.post_init"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L429" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-gy26u4">Safety checker that arguments are correct - also replaces some NoneType arguments with their default values.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.BitsAndBytesConfig.quantization_method"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>quantization_method</span></h4><!-- HTML_TAG_END --> <a id="transformers.BitsAndBytesConfig.quantization_method" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.BitsAndBytesConfig.quantization_method"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L472" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-19bn0da">This method returns the quantization method used for the model. If the model is not quantizable, it returns
<code>None</code>.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="transformers.BitsAndBytesConfig.to_diff_dict"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>to_diff_dict</span></h4><!-- HTML_TAG_END --> <a id="transformers.BitsAndBytesConfig.to_diff_dict" class="header-link invisible with-hover:group-hover:visible pr-2" href="#transformers.BitsAndBytesConfig.to_diff_dict"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L503" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span data-svelte-h="svelte-1jq0pl7">)</span> <span class="font-bold" data-svelte-h="svelte-1j6k10o">โ†’</span> <span class="rounded hover:bg-gray-400 cursor-pointer"><!-- HTML_TAG_START --><script context="module">export const metadata = 'undefined';</script><span><code>Dict[str, Any]</code></span><!-- HTML_TAG_END --></span></p> <div class="!mb-10 relative docstring-details "> <div id="transformers.BitsAndBytesConfig.to_diff_dict.returns" class="flex items-center font-semibold space-x-3 text-base !mt-0 !mb-0 text-gray-800 rounded "><p class="text-base">Returns</p> <!-- HTML_TAG_START --><script context="module">export const metadata = 'undefined';</script>
<p><code>Dict[str, Any]</code></p>
<!-- HTML_TAG_END --> <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700"></span></div> <p class="text-base"><!-- HTML_TAG_START --><script context="module">export const metadata = 'undefined';</script>
<p>Dictionary of all the attributes that make up this configuration instance,</p>
<!-- HTML_TAG_END --></p> </div></div> <p data-svelte-h="svelte-1p6bdas">Removes all attributes from config which correspond to the default config attributes for better readability and
serializes to a Python dictionary.</p></div></div> <h2 class="relative group"><a id="quantization-with--optimum" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#quantization-with--optimum"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Quantization with ๐Ÿค— optimum</span></h2> <p data-svelte-h="svelte-p3rxgx"><code>optimum</code>ใงใ‚ตใƒใƒผใƒˆใ•ใ‚Œใฆใ„ใ‚‹้‡ๅญๅŒ–ๆ–นๆณ•ใฎ่ฉณ็ดฐใซใคใ„ใฆใฏใ€<a href="https://huggingface.co/docs/optimum/index" rel="nofollow">Optimum ใƒ‰ใ‚ญใƒฅใƒกใƒณใƒˆ</a> ใ‚’ๅ‚็…งใ—ใ€ใ“ใ‚Œใ‚‰ใŒ่‡ชๅˆ†ใฎใƒฆใƒผใ‚นใ‚ฑใƒผใ‚นใซ้ฉ็”จใงใใ‚‹ใ‹ใฉใ†ใ‹ใ‚’็ขบ่ชใ—ใฆใใ ใ•ใ„ใ€‚</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ja/main_classes/quantization.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_jement = {
assets: "/docs/transformers/main/ja",
base: "/docs/transformers/main/ja",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/ja/_app/immutable/entry/start.1486e459.js"),
import("/docs/transformers/main/ja/_app/immutable/entry/app.d9ae818f.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 45],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
195 kB
ยท
Xet hash:
1babcae82edd034d477e20fef3fd21383e25e4912965586937b09072fa7e2d10

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.