Buckets:

rtrm's picture
download
raw
36.9 kB
import{s as _n,n as xn,o as $n}from"../chunks/scheduler.852ec091.js";import{S as Tn,i as qn,g as r,s as o,r as c,A as zn,h as d,f as e,c as a,j as h,u as l,x as u,k as y,y as s,a as i,v as p,d as m,t as b,w as f}from"../chunks/index.28275fd3.js";import{D as g}from"../chunks/Docstring.ca5810b0.js";import{H as It,E as wn}from"../chunks/getInferenceSnippets.f859b5ca.js";function kn(Re){let k,Ft,Qt,Rt,P,Bt,E,Be="The <code>bitsandbytes.functional</code> API provides the low-level building blocks for the library’s features.",Ut,V,Wt,M,Ue="<li>When you need direct control over quantized operations and their parameters.</li> <li>To build custom layers or operations leveraging low-bit arithmetic.</li> <li>To integrate with other ecosystem tooling.</li> <li>For experimental or research purposes requiring non-standard quantization or performance optimizations.</li>",jt,H,Gt,x,I,xe,mt,We="Performs an 8-bit integer matrix multiplication.",$e,bt,je=`A linear transformation is applied such that <code>out = A @ B.T</code>. When possible, integer tensor core hardware is
utilized to accelerate the operation.`,Jt,A,Q,Te,ft,Ge="Performs dequantization on the result of a quantized int8 matrix multiplication.",Kt,D,S,qe,ht,Je="Dequantizes a tensor with dtype <code>torch.int8</code> to <code>torch.float32</code>.",Xt,$,F,ze,yt,Ke="Quantizes a tensor with dtype <code>torch.float16</code> to <code>torch.int8</code> in accordance to the <code>LLM.int8()</code> algorithm.",we,gt,Xe='For more information, see the <a href="https://arxiv.org/abs/2208.07339" rel="nofollow">LLM.int8() paper</a>.',Yt,R,Zt,T,B,ke,vt,Ye="Dequantizes a packed 4-bit quantized tensor.",Ae,_t,Ze=`The input tensor is dequantized by dividing it into blocks of <code>blocksize</code> values.
The the absolute maximum value within these blocks is used for scaling
the non-linear dequantization.`,te,U,W,ee,j,G,ne,J,K,oe,q,X,De,xt,tn="Quantize tensor A in blocks of 4-bit values.",Ce,$t,en="Quantizes tensor A by dividing it into blocks which are independently quantized.",ae,Y,Z,ie,tt,et,se,v,nt,Le,Tt,nn="container for quantization state components to work with Params4bit and similar classes",Ne,L,ot,Oe,qt,on=`returns dict of tensors and strings to use in serialization via _save_to_state_dict()
param: packed — returns dict[str, torch.Tensor] for state_dict fit for safetensors saving`,Pe,_,at,Ee,zt,an=`unpacks components of state_dict into QuantState
where necessary, convert into strings, torch.dtype, ints, etc.`,Ve,wt,sn="qs_dict: based on state_dict, with only relevant keys, striped of prefixes.",Me,kt,rn="item with key <code>quant_state.bitsandbytes__[nf4/fp4]</code> may contain minor and non-tensor quant state items.",re,it,de,st,dn="Primitives used in the 8-bit optimizer quantization.",ce,rt,cn='For more details see <a href="https://arxiv.org/abs/1511.04561" rel="nofollow">8-Bit Approximations for Parallelism in Deep Learning</a>',le,z,dt,He,At,ln="Dequantize a tensor in blocks of values.",Ie,Dt,un=`The input tensor is dequantized by dividing it into blocks of <code>blocksize</code> values.
The the absolute maximum value within these blocks is used for scaling
the non-linear dequantization.`,ue,w,ct,Qe,Ct,pn="Quantize a tensor in blocks of values.",Se,Lt,mn=`The input tensor is quantized by dividing it into blocks of <code>blocksize</code> values.
The the absolute maximum value within these blocks is calculated for scaling
the non-linear quantization.`,pe,lt,me,C,ut,Fe,Nt,bn="Gets the memory address of the first element of a tenso",be,pt,fe,St,he;return P=new It({props:{title:"Overview",local:"overview",headingTag:"h1"}}),V=new It({props:{title:"When to Use bitsandbytes.functional",local:"when-to-use-bitsandbytesfunctional",headingTag:"h2"}}),H=new It({props:{title:"LLM.int8()",local:"bitsandbytes.functional.int8_linear_matmul",headingTag:"h2"}}),I=new g({props:{name:"bitsandbytes.functional.int8_linear_matmul",anchor:"bitsandbytes.functional.int8_linear_matmul",parameters:[{name:"A",val:": Tensor"},{name:"B",val:": Tensor"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"dtype",val:" = torch.int32"}],parametersDescription:[{anchor:"bitsandbytes.functional.int8_linear_matmul.A",description:"<strong>A</strong> (<code>torch.Tensor</code>) &#x2014; The first matrix operand with the data type <code>torch.int8</code>.",name:"A"},{anchor:"bitsandbytes.functional.int8_linear_matmul.B",description:"<strong>B</strong> (<code>torch.Tensor</code>) &#x2014; The second matrix operand with the data type <code>torch.int8</code>.",name:"B"},{anchor:"bitsandbytes.functional.int8_linear_matmul.out",description:"<strong>out</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; A pre-allocated tensor used to store the result.",name:"out"},{anchor:"bitsandbytes.functional.int8_linear_matmul.dtype",description:"<strong>dtype</strong> (<code>torch.dtype</code>, <em>optional</em>) &#x2014; The expected data type of the output. Defaults to <code>torch.int32</code>.",name:"dtype"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L1872",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The result of the operation.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>torch.Tensor</code></p>
`,raiseDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<ul>
<li><code>NotImplementedError</code> — The operation is not supported in the current environment.</li>
<li><code>RuntimeError</code> — Raised when the cannot be completed for any other reason.</li>
</ul>
`,raiseType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>NotImplementedError</code> or <code>RuntimeError</code></p>
`}}),Q=new g({props:{name:"bitsandbytes.functional.int8_mm_dequant",anchor:"bitsandbytes.functional.int8_mm_dequant",parameters:[{name:"A",val:": Tensor"},{name:"row_stats",val:": Tensor"},{name:"col_stats",val:": Tensor"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"bias",val:": typing.Optional[torch.Tensor] = None"}],parametersDescription:[{anchor:"bitsandbytes.functional.int8_mm_dequant.A",description:"<strong>A</strong> (<code>torch.Tensor</code> with dtype <code>torch.int32</code>) &#x2014; The result of a quantized int8 matrix multiplication.",name:"A"},{anchor:"bitsandbytes.functional.int8_mm_dequant.row_stats",description:"<strong>row_stats</strong> (<code>torch.Tensor</code>) &#x2014; The row-wise quantization statistics for the lhs operand of the matrix multiplication.",name:"row_stats"},{anchor:"bitsandbytes.functional.int8_mm_dequant.col_stats",description:"<strong>col_stats</strong> (<code>torch.Tensor</code>) &#x2014; The column-wise quantization statistics for the rhs operand of the matrix multiplication.",name:"col_stats"},{anchor:"bitsandbytes.functional.int8_mm_dequant.out",description:"<strong>out</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; A pre-allocated tensor to store the output of the operation.",name:"out"},{anchor:"bitsandbytes.functional.int8_mm_dequant.bias",description:"<strong>bias</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; An optional bias vector to add to the result.",name:"bias"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L1898",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The dequantized result with an optional bias, with dtype <code>torch.float16</code>.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>torch.Tensor</code></p>
`}}),S=new g({props:{name:"bitsandbytes.functional.int8_vectorwise_dequant",anchor:"bitsandbytes.functional.int8_vectorwise_dequant",parameters:[{name:"A",val:": Tensor"},{name:"stats",val:": Tensor"}],parametersDescription:[{anchor:"bitsandbytes.functional.int8_vectorwise_dequant.A",description:"<strong>A</strong> (<code>torch.Tensor</code> with dtype <code>torch.int8</code>) &#x2014; The quantized int8 tensor.",name:"A"},{anchor:"bitsandbytes.functional.int8_vectorwise_dequant.stats",description:"<strong>stats</strong> (<code>torch.Tensor</code> with dtype <code>torch.float32</code>) &#x2014; The row-wise quantization statistics.",name:"stats"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L2154",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The dequantized tensor.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>torch.Tensor</code> with dtype <code>torch.float32</code></p>
`}}),F=new g({props:{name:"bitsandbytes.functional.int8_vectorwise_quant",anchor:"bitsandbytes.functional.int8_vectorwise_quant",parameters:[{name:"A",val:": Tensor"},{name:"threshold",val:" = 0.0"}],parametersDescription:[{anchor:"bitsandbytes.functional.int8_vectorwise_quant.A",description:"<strong>A</strong> (<code>torch.Tensor</code> with dtype <code>torch.float16</code>) &#x2014; The input tensor.",name:"A"},{anchor:"bitsandbytes.functional.int8_vectorwise_quant.threshold",description:`<strong>threshold</strong> (<code>float</code>, <em>optional</em>) &#x2014;
An optional threshold for sparse decomposition of outlier features.</p>
<p>No outliers are held back when 0.0. Defaults to 0.0.`,name:"threshold"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L2168",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A tuple containing the quantized tensor and relevant statistics.</p>
<ul>
<li><code>torch.Tensor</code> with dtype <code>torch.int8</code>: The quantized data.</li>
<li><code>torch.Tensor</code> with dtype <code>torch.float32</code>: The quantization scales.</li>
<li><code>torch.Tensor</code> with dtype <code>torch.int32</code>, <em>optional</em>: A list of column indices which contain outlier features.</li>
</ul>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]</code></p>
`}}),R=new It({props:{title:"4-bit",local:"bitsandbytes.functional.dequantize_4bit",headingTag:"h2"}}),B=new g({props:{name:"bitsandbytes.functional.dequantize_4bit",anchor:"bitsandbytes.functional.dequantize_4bit",parameters:[{name:"A",val:": Tensor"},{name:"quant_state",val:": typing.Optional[bitsandbytes.functional.QuantState] = None"},{name:"absmax",val:": typing.Optional[torch.Tensor] = None"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"blocksize",val:": typing.Optional[int] = None"},{name:"quant_type",val:" = 'fp4'"}],parametersDescription:[{anchor:"bitsandbytes.functional.dequantize_4bit.A",description:"<strong>A</strong> (<code>torch.Tensor</code>) &#x2014; The quantized input tensor.",name:"A"},{anchor:"bitsandbytes.functional.dequantize_4bit.quant_state",description:`<strong>quant_state</strong> (<code>QuantState</code>, <em>optional</em>) &#x2014;
The quantization state as returned by <code>quantize_4bit</code>.
Required if <code>absmax</code> is not provided.`,name:"quant_state"},{anchor:"bitsandbytes.functional.dequantize_4bit.absmax",description:`<strong>absmax</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
A tensor containing the scaling values.
Required if <code>quant_state</code> is not provided and ignored otherwise.`,name:"absmax"},{anchor:"bitsandbytes.functional.dequantize_4bit.out",description:"<strong>out</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; A tensor to use to store the result.",name:"out"},{anchor:"bitsandbytes.functional.dequantize_4bit.blocksize",description:`<strong>blocksize</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The size of the blocks. Defaults to 128 on ROCm and 64 otherwise.
Valid values are 64, 128, 256, 512, 1024, 2048, and 4096.`,name:"blocksize"},{anchor:"bitsandbytes.functional.dequantize_4bit.quant_type",description:"<strong>quant_type</strong> (<code>str</code>, <em>optional</em>) &#x2014; The data type to use: <code>nf4</code> or <code>fp4</code>. Defaults to <code>fp4</code>.",name:"quant_type"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L998",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The dequantized tensor.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>torch.Tensor</code></p>
`,raiseDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<ul>
<li><code>ValueError</code> — Raised when the input data type or blocksize is not supported.</li>
</ul>
`,raiseType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>ValueError</code></p>
`}}),W=new g({props:{name:"bitsandbytes.functional.dequantize_fp4",anchor:"bitsandbytes.functional.dequantize_fp4",parameters:[{name:"A",val:": Tensor"},{name:"quant_state",val:": typing.Optional[bitsandbytes.functional.QuantState] = None"},{name:"absmax",val:": typing.Optional[torch.Tensor] = None"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"blocksize",val:": typing.Optional[int] = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L974"}}),G=new g({props:{name:"bitsandbytes.functional.dequantize_nf4",anchor:"bitsandbytes.functional.dequantize_nf4",parameters:[{name:"A",val:": Tensor"},{name:"quant_state",val:": typing.Optional[bitsandbytes.functional.QuantState] = None"},{name:"absmax",val:": typing.Optional[torch.Tensor] = None"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"blocksize",val:": typing.Optional[int] = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L986"}}),K=new g({props:{name:"bitsandbytes.functional.gemv_4bit",anchor:"bitsandbytes.functional.gemv_4bit",parameters:[{name:"A",val:": Tensor"},{name:"B",val:": Tensor"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"transposed_A",val:" = False"},{name:"transposed_B",val:" = False"},{name:"state",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L1619"}}),X=new g({props:{name:"bitsandbytes.functional.quantize_4bit",anchor:"bitsandbytes.functional.quantize_4bit",parameters:[{name:"A",val:": Tensor"},{name:"absmax",val:": typing.Optional[torch.Tensor] = None"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"blocksize",val:" = None"},{name:"compress_statistics",val:" = False"},{name:"quant_type",val:" = 'fp4'"},{name:"quant_storage",val:" = torch.uint8"}],parametersDescription:[{anchor:"bitsandbytes.functional.quantize_4bit.A",description:"<strong>A</strong> (<code>torch.Tensor</code>) &#x2014; The input tensor. Supports <code>float16</code>, <code>bfloat16</code>, or <code>float32</code> datatypes.",name:"A"},{anchor:"bitsandbytes.functional.quantize_4bit.absmax",description:"<strong>absmax</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; A tensor to use to store the absmax values.",name:"absmax"},{anchor:"bitsandbytes.functional.quantize_4bit.out",description:"<strong>out</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; A tensor to use to store the result.",name:"out"},{anchor:"bitsandbytes.functional.quantize_4bit.blocksize",description:`<strong>blocksize</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The size of the blocks. Defaults to 128 on ROCm and 64 otherwise.
Valid values are 64, 128, 256, 512, 1024, 2048, and 4096.`,name:"blocksize"},{anchor:"bitsandbytes.functional.quantize_4bit.compress_statistics",description:"<strong>compress_statistics</strong> (<code>bool</code>, <em>optional</em>) &#x2014; Whether to additionally quantize the absmax values. Defaults to False.",name:"compress_statistics"},{anchor:"bitsandbytes.functional.quantize_4bit.quant_type",description:"<strong>quant_type</strong> (<code>str</code>, <em>optional</em>) &#x2014; The data type to use: <code>nf4</code> or <code>fp4</code>. Defaults to <code>fp4</code>.",name:"quant_type"},{anchor:"bitsandbytes.functional.quantize_4bit.quant_storage",description:"<strong>quant_storage</strong> (<code>torch.dtype</code>, <em>optional</em>) &#x2014; The dtype of the tensor used to store the result. Defaults to <code>torch.uint8</code>.",name:"quant_storage"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L893",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A tuple containing the quantization results.</p>
<ul>
<li><code>torch.Tensor</code>: The quantized tensor with packed 4-bit values.</li>
<li><code>QuantState</code>: The state object used to undo the quantization.</li>
</ul>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p>Tuple[<code>torch.Tensor</code>, <code>QuantState</code>]</p>
`,raiseDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<ul>
<li><code>ValueError</code> — Raised when the input data type is not supported.</li>
</ul>
`,raiseType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>ValueError</code></p>
`}}),Z=new g({props:{name:"bitsandbytes.functional.quantize_fp4",anchor:"bitsandbytes.functional.quantize_fp4",parameters:[{name:"A",val:": Tensor"},{name:"absmax",val:": typing.Optional[torch.Tensor] = None"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"blocksize",val:" = None"},{name:"compress_statistics",val:" = False"},{name:"quant_storage",val:" = torch.uint8"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L867"}}),et=new g({props:{name:"bitsandbytes.functional.quantize_nf4",anchor:"bitsandbytes.functional.quantize_nf4",parameters:[{name:"A",val:": Tensor"},{name:"absmax",val:": typing.Optional[torch.Tensor] = None"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"blocksize",val:" = None"},{name:"compress_statistics",val:" = False"},{name:"quant_storage",val:" = torch.uint8"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L880"}}),nt=new g({props:{name:"class bitsandbytes.functional.QuantState",anchor:"bitsandbytes.functional.QuantState",parameters:[{name:"absmax",val:""},{name:"shape",val:" = None"},{name:"code",val:" = None"},{name:"blocksize",val:" = None"},{name:"quant_type",val:" = None"},{name:"dtype",val:" = None"},{name:"offset",val:" = None"},{name:"state2",val:" = None"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L460"}}),ot=new g({props:{name:"as_dict",anchor:"bitsandbytes.functional.QuantState.as_dict",parameters:[{name:"packed",val:" = False"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L572"}}),at=new g({props:{name:"from_dict",anchor:"bitsandbytes.functional.QuantState.from_dict",parameters:[{name:"qs_dict",val:": dict"},{name:"device",val:": device"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L521"}}),it=new It({props:{title:"Dynamic 8-bit Quantization",local:"bitsandbytes.functional.dequantize_blockwise",headingTag:"h2"}}),dt=new g({props:{name:"bitsandbytes.functional.dequantize_blockwise",anchor:"bitsandbytes.functional.dequantize_blockwise",parameters:[{name:"A",val:": Tensor"},{name:"quant_state",val:": typing.Optional[bitsandbytes.functional.QuantState] = None"},{name:"absmax",val:": typing.Optional[torch.Tensor] = None"},{name:"code",val:": typing.Optional[torch.Tensor] = None"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"blocksize",val:": int = 4096"},{name:"nested",val:" = False"}],parametersDescription:[{anchor:"bitsandbytes.functional.dequantize_blockwise.A",description:"<strong>A</strong> (<code>torch.Tensor</code>) &#x2014; The quantized input tensor.",name:"A"},{anchor:"bitsandbytes.functional.dequantize_blockwise.quant_state",description:`<strong>quant_state</strong> (<code>QuantState</code>, <em>optional</em>) &#x2014;
The quantization state as returned by <code>quantize_blockwise</code>.
Required if <code>absmax</code> is not provided.`,name:"quant_state"},{anchor:"bitsandbytes.functional.dequantize_blockwise.absmax",description:`<strong>absmax</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
A tensor containing the scaling values.
Required if <code>quant_state</code> is not provided and ignored otherwise.`,name:"absmax"},{anchor:"bitsandbytes.functional.dequantize_blockwise.code",description:`<strong>code</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
A mapping describing the low-bit data type. Defaults to a signed 8-bit dynamic type.
For more details, see (8-Bit Approximations for Parallelism in Deep Learning)[https://arxiv.org/abs/1511.04561].
Ignored when <code>quant_state</code> is provided.`,name:"code"},{anchor:"bitsandbytes.functional.dequantize_blockwise.out",description:"<strong>out</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; A tensor to use to store the result.",name:"out"},{anchor:"bitsandbytes.functional.dequantize_blockwise.blocksize",description:`<strong>blocksize</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The size of the blocks. Defaults to 4096.
Valid values are 64, 128, 256, 512, 1024, 2048, and 4096.
Ignored when <code>quant_state</code> is provided.`,name:"blocksize"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L708",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The dequantized tensor. The datatype is indicated by <code>quant_state.dtype</code> and defaults to <code>torch.float32</code>.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>torch.Tensor</code></p>
`,raiseDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<ul>
<li><code>ValueError</code> — Raised when the input data type is not supported.</li>
</ul>
`,raiseType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>ValueError</code></p>
`}}),ct=new g({props:{name:"bitsandbytes.functional.quantize_blockwise",anchor:"bitsandbytes.functional.quantize_blockwise",parameters:[{name:"A",val:": Tensor"},{name:"code",val:": typing.Optional[torch.Tensor] = None"},{name:"absmax",val:": typing.Optional[torch.Tensor] = None"},{name:"out",val:": typing.Optional[torch.Tensor] = None"},{name:"blocksize",val:" = 4096"},{name:"nested",val:" = False"}],parametersDescription:[{anchor:"bitsandbytes.functional.quantize_blockwise.A",description:"<strong>A</strong> (<code>torch.Tensor</code>) &#x2014; The input tensor. Supports <code>float16</code>, <code>bfloat16</code>, or <code>float32</code> datatypes.",name:"A"},{anchor:"bitsandbytes.functional.quantize_blockwise.code",description:`<strong>code</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
A mapping describing the low-bit data type. Defaults to a signed 8-bit dynamic type.
For more details, see (8-Bit Approximations for Parallelism in Deep Learning)[https://arxiv.org/abs/1511.04561].`,name:"code"},{anchor:"bitsandbytes.functional.quantize_blockwise.absmax",description:"<strong>absmax</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; A tensor to use to store the absmax values.",name:"absmax"},{anchor:"bitsandbytes.functional.quantize_blockwise.out",description:"<strong>out</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014; A tensor to use to store the result.",name:"out"},{anchor:"bitsandbytes.functional.quantize_blockwise.blocksize",description:`<strong>blocksize</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The size of the blocks. Defaults to 4096.
Valid values are 64, 128, 256, 512, 1024, 2048, and 4096.`,name:"blocksize"},{anchor:"bitsandbytes.functional.quantize_blockwise.nested",description:"<strong>nested</strong> (<code>bool</code>, <em>optional</em>) &#x2014; Whether to additionally quantize the absmax values. Defaults to False.",name:"nested"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L637",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A tuple containing the quantization results.</p>
<ul>
<li><code>torch.Tensor</code>: The quantized tensor.</li>
<li><code>QuantState</code>: The state object used to undo the quantization.</li>
</ul>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>Tuple[torch.Tensor, QuantState]</code></p>
`,raiseDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<ul>
<li><code>ValueError</code> — Raised when the input data type is not supported.</li>
</ul>
`,raiseType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>ValueError</code></p>
`}}),lt=new It({props:{title:"Utility",local:"bitsandbytes.functional.get_ptr",headingTag:"h2"}}),ut=new g({props:{name:"bitsandbytes.functional.get_ptr",anchor:"bitsandbytes.functional.get_ptr",parameters:[{name:"A",val:": typing.Optional[torch.Tensor]"}],parametersDescription:[{anchor:"bitsandbytes.functional.get_ptr.A",description:"<strong>A</strong> (<code>Optional[Tensor]</code>) &#x2014; A PyTorch tensor.",name:"A"}],source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/vr_1512/bitsandbytes/functional.py#L445",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A pointer to the underlying tensor data.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>Optional[ct.c_void_p]</code></p>
`}}),pt=new wn({props:{source:"https://github.com/bitsandbytes-foundation/bitsandbytes/blob/main/docs/source/reference/functional.mdx"}}),{c(){k=r("meta"),Ft=o(),Qt=r("p"),Rt=o(),c(P.$$.fragment),Bt=o(),E=r("p"),E.innerHTML=Be,Ut=o(),c(V.$$.fragment),Wt=o(),M=r("ul"),M.innerHTML=Ue,jt=o(),c(H.$$.fragment),Gt=o(),x=r("div"),c(I.$$.fragment),xe=o(),mt=r("p"),mt.textContent=We,$e=o(),bt=r("p"),bt.innerHTML=je,Jt=o(),A=r("div"),c(Q.$$.fragment),Te=o(),ft=r("p"),ft.textContent=Ge,Kt=o(),D=r("div"),c(S.$$.fragment),qe=o(),ht=r("p"),ht.innerHTML=Je,Xt=o(),$=r("div"),c(F.$$.fragment),ze=o(),yt=r("p"),yt.innerHTML=Ke,we=o(),gt=r("p"),gt.innerHTML=Xe,Yt=o(),c(R.$$.fragment),Zt=o(),T=r("div"),c(B.$$.fragment),ke=o(),vt=r("p"),vt.textContent=Ye,Ae=o(),_t=r("p"),_t.innerHTML=Ze,te=o(),U=r("div"),c(W.$$.fragment),ee=o(),j=r("div"),c(G.$$.fragment),ne=o(),J=r("div"),c(K.$$.fragment),oe=o(),q=r("div"),c(X.$$.fragment),De=o(),xt=r("p"),xt.textContent=tn,Ce=o(),$t=r("p"),$t.textContent=en,ae=o(),Y=r("div"),c(Z.$$.fragment),ie=o(),tt=r("div"),c(et.$$.fragment),se=o(),v=r("div"),c(nt.$$.fragment),Le=o(),Tt=r("p"),Tt.textContent=nn,Ne=o(),L=r("div"),c(ot.$$.fragment),Oe=o(),qt=r("p"),qt.textContent=on,Pe=o(),_=r("div"),c(at.$$.fragment),Ee=o(),zt=r("p"),zt.textContent=an,Ve=o(),wt=r("p"),wt.textContent=sn,Me=o(),kt=r("p"),kt.innerHTML=rn,re=o(),c(it.$$.fragment),de=o(),st=r("p"),st.textContent=dn,ce=o(),rt=r("p"),rt.innerHTML=cn,le=o(),z=r("div"),c(dt.$$.fragment),He=o(),At=r("p"),At.textContent=ln,Ie=o(),Dt=r("p"),Dt.innerHTML=un,ue=o(),w=r("div"),c(ct.$$.fragment),Qe=o(),Ct=r("p"),Ct.textContent=pn,Se=o(),Lt=r("p"),Lt.innerHTML=mn,pe=o(),c(lt.$$.fragment),me=o(),C=r("div"),c(ut.$$.fragment),Fe=o(),Nt=r("p"),Nt.textContent=bn,be=o(),c(pt.$$.fragment),fe=o(),St=r("p"),this.h()},l(t){const n=zn("svelte-u9bgzb",document.head);k=d(n,"META",{name:!0,content:!0}),n.forEach(e),Ft=a(t),Qt=d(t,"P",{}),h(Qt).forEach(e),Rt=a(t),l(P.$$.fragment,t),Bt=a(t),E=d(t,"P",{"data-svelte-h":!0}),u(E)!=="svelte-donk0z"&&(E.innerHTML=Be),Ut=a(t),l(V.$$.fragment,t),Wt=a(t),M=d(t,"UL",{"data-svelte-h":!0}),u(M)!=="svelte-ug6xw5"&&(M.innerHTML=Ue),jt=a(t),l(H.$$.fragment,t),Gt=a(t),x=d(t,"DIV",{class:!0});var Ot=h(x);l(I.$$.fragment,Ot),xe=a(Ot),mt=d(Ot,"P",{"data-svelte-h":!0}),u(mt)!=="svelte-1phi9i6"&&(mt.textContent=We),$e=a(Ot),bt=d(Ot,"P",{"data-svelte-h":!0}),u(bt)!=="svelte-q2aa2u"&&(bt.innerHTML=je),Ot.forEach(e),Jt=a(t),A=d(t,"DIV",{class:!0});var ye=h(A);l(Q.$$.fragment,ye),Te=a(ye),ft=d(ye,"P",{"data-svelte-h":!0}),u(ft)!=="svelte-1lqdwfe"&&(ft.textContent=Ge),ye.forEach(e),Kt=a(t),D=d(t,"DIV",{class:!0});var ge=h(D);l(S.$$.fragment,ge),qe=a(ge),ht=d(ge,"P",{"data-svelte-h":!0}),u(ht)!=="svelte-15q912e"&&(ht.innerHTML=Je),ge.forEach(e),Xt=a(t),$=d(t,"DIV",{class:!0});var Pt=h($);l(F.$$.fragment,Pt),ze=a(Pt),yt=d(Pt,"P",{"data-svelte-h":!0}),u(yt)!=="svelte-1u2p684"&&(yt.innerHTML=Ke),we=a(Pt),gt=d(Pt,"P",{"data-svelte-h":!0}),u(gt)!=="svelte-1f18irr"&&(gt.innerHTML=Xe),Pt.forEach(e),Yt=a(t),l(R.$$.fragment,t),Zt=a(t),T=d(t,"DIV",{class:!0});var Et=h(T);l(B.$$.fragment,Et),ke=a(Et),vt=d(Et,"P",{"data-svelte-h":!0}),u(vt)!=="svelte-1o0c7r0"&&(vt.textContent=Ye),Ae=a(Et),_t=d(Et,"P",{"data-svelte-h":!0}),u(_t)!=="svelte-1oor9gf"&&(_t.innerHTML=Ze),Et.forEach(e),te=a(t),U=d(t,"DIV",{class:!0});var fn=h(U);l(W.$$.fragment,fn),fn.forEach(e),ee=a(t),j=d(t,"DIV",{class:!0});var hn=h(j);l(G.$$.fragment,hn),hn.forEach(e),ne=a(t),J=d(t,"DIV",{class:!0});var yn=h(J);l(K.$$.fragment,yn),yn.forEach(e),oe=a(t),q=d(t,"DIV",{class:!0});var Vt=h(q);l(X.$$.fragment,Vt),De=a(Vt),xt=d(Vt,"P",{"data-svelte-h":!0}),u(xt)!=="svelte-1n8tbt5"&&(xt.textContent=tn),Ce=a(Vt),$t=d(Vt,"P",{"data-svelte-h":!0}),u($t)!=="svelte-1ucdexx"&&($t.textContent=en),Vt.forEach(e),ae=a(t),Y=d(t,"DIV",{class:!0});var gn=h(Y);l(Z.$$.fragment,gn),gn.forEach(e),ie=a(t),tt=d(t,"DIV",{class:!0});var vn=h(tt);l(et.$$.fragment,vn),vn.forEach(e),se=a(t),v=d(t,"DIV",{class:!0});var N=h(v);l(nt.$$.fragment,N),Le=a(N),Tt=d(N,"P",{"data-svelte-h":!0}),u(Tt)!=="svelte-1ec4axr"&&(Tt.textContent=nn),Ne=a(N),L=d(N,"DIV",{class:!0});var ve=h(L);l(ot.$$.fragment,ve),Oe=a(ve),qt=d(ve,"P",{"data-svelte-h":!0}),u(qt)!=="svelte-1ubgx6o"&&(qt.textContent=on),ve.forEach(e),Pe=a(N),_=d(N,"DIV",{class:!0});var O=h(_);l(at.$$.fragment,O),Ee=a(O),zt=d(O,"P",{"data-svelte-h":!0}),u(zt)!=="svelte-1k7tn2t"&&(zt.textContent=an),Ve=a(O),wt=d(O,"P",{"data-svelte-h":!0}),u(wt)!=="svelte-11ui0wm"&&(wt.textContent=sn),Me=a(O),kt=d(O,"P",{"data-svelte-h":!0}),u(kt)!=="svelte-1ykfpyf"&&(kt.innerHTML=rn),O.forEach(e),N.forEach(e),re=a(t),l(it.$$.fragment,t),de=a(t),st=d(t,"P",{"data-svelte-h":!0}),u(st)!=="svelte-7lntof"&&(st.textContent=dn),ce=a(t),rt=d(t,"P",{"data-svelte-h":!0}),u(rt)!=="svelte-1bzp8dj"&&(rt.innerHTML=cn),le=a(t),z=d(t,"DIV",{class:!0});var Mt=h(z);l(dt.$$.fragment,Mt),He=a(Mt),At=d(Mt,"P",{"data-svelte-h":!0}),u(At)!=="svelte-sv0f0s"&&(At.textContent=ln),Ie=a(Mt),Dt=d(Mt,"P",{"data-svelte-h":!0}),u(Dt)!=="svelte-1oor9gf"&&(Dt.innerHTML=un),Mt.forEach(e),ue=a(t),w=d(t,"DIV",{class:!0});var Ht=h(w);l(ct.$$.fragment,Ht),Qe=a(Ht),Ct=d(Ht,"P",{"data-svelte-h":!0}),u(Ct)!=="svelte-g7axkd"&&(Ct.textContent=pn),Se=a(Ht),Lt=d(Ht,"P",{"data-svelte-h":!0}),u(Lt)!=="svelte-1e3tiho"&&(Lt.innerHTML=mn),Ht.forEach(e),pe=a(t),l(lt.$$.fragment,t),me=a(t),C=d(t,"DIV",{class:!0});var _e=h(C);l(ut.$$.fragment,_e),Fe=a(_e),Nt=d(_e,"P",{"data-svelte-h":!0}),u(Nt)!=="svelte-mjy6qu"&&(Nt.textContent=bn),_e.forEach(e),be=a(t),l(pt.$$.fragment,t),fe=a(t),St=d(t,"P",{}),h(St).forEach(e),this.h()},h(){y(k,"name","hf:doc:metadata"),y(k,"content",An),y(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(tt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(_,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),y(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(t,n){s(document.head,k),i(t,Ft,n),i(t,Qt,n),i(t,Rt,n),p(P,t,n),i(t,Bt,n),i(t,E,n),i(t,Ut,n),p(V,t,n),i(t,Wt,n),i(t,M,n),i(t,jt,n),p(H,t,n),i(t,Gt,n),i(t,x,n),p(I,x,null),s(x,xe),s(x,mt),s(x,$e),s(x,bt),i(t,Jt,n),i(t,A,n),p(Q,A,null),s(A,Te),s(A,ft),i(t,Kt,n),i(t,D,n),p(S,D,null),s(D,qe),s(D,ht),i(t,Xt,n),i(t,$,n),p(F,$,null),s($,ze),s($,yt),s($,we),s($,gt),i(t,Yt,n),p(R,t,n),i(t,Zt,n),i(t,T,n),p(B,T,null),s(T,ke),s(T,vt),s(T,Ae),s(T,_t),i(t,te,n),i(t,U,n),p(W,U,null),i(t,ee,n),i(t,j,n),p(G,j,null),i(t,ne,n),i(t,J,n),p(K,J,null),i(t,oe,n),i(t,q,n),p(X,q,null),s(q,De),s(q,xt),s(q,Ce),s(q,$t),i(t,ae,n),i(t,Y,n),p(Z,Y,null),i(t,ie,n),i(t,tt,n),p(et,tt,null),i(t,se,n),i(t,v,n),p(nt,v,null),s(v,Le),s(v,Tt),s(v,Ne),s(v,L),p(ot,L,null),s(L,Oe),s(L,qt),s(v,Pe),s(v,_),p(at,_,null),s(_,Ee),s(_,zt),s(_,Ve),s(_,wt),s(_,Me),s(_,kt),i(t,re,n),p(it,t,n),i(t,de,n),i(t,st,n),i(t,ce,n),i(t,rt,n),i(t,le,n),i(t,z,n),p(dt,z,null),s(z,He),s(z,At),s(z,Ie),s(z,Dt),i(t,ue,n),i(t,w,n),p(ct,w,null),s(w,Qe),s(w,Ct),s(w,Se),s(w,Lt),i(t,pe,n),p(lt,t,n),i(t,me,n),i(t,C,n),p(ut,C,null),s(C,Fe),s(C,Nt),i(t,be,n),p(pt,t,n),i(t,fe,n),i(t,St,n),he=!0},p:xn,i(t){he||(m(P.$$.fragment,t),m(V.$$.fragment,t),m(H.$$.fragment,t),m(I.$$.fragment,t),m(Q.$$.fragment,t),m(S.$$.fragment,t),m(F.$$.fragment,t),m(R.$$.fragment,t),m(B.$$.fragment,t),m(W.$$.fragment,t),m(G.$$.fragment,t),m(K.$$.fragment,t),m(X.$$.fragment,t),m(Z.$$.fragment,t),m(et.$$.fragment,t),m(nt.$$.fragment,t),m(ot.$$.fragment,t),m(at.$$.fragment,t),m(it.$$.fragment,t),m(dt.$$.fragment,t),m(ct.$$.fragment,t),m(lt.$$.fragment,t),m(ut.$$.fragment,t),m(pt.$$.fragment,t),he=!0)},o(t){b(P.$$.fragment,t),b(V.$$.fragment,t),b(H.$$.fragment,t),b(I.$$.fragment,t),b(Q.$$.fragment,t),b(S.$$.fragment,t),b(F.$$.fragment,t),b(R.$$.fragment,t),b(B.$$.fragment,t),b(W.$$.fragment,t),b(G.$$.fragment,t),b(K.$$.fragment,t),b(X.$$.fragment,t),b(Z.$$.fragment,t),b(et.$$.fragment,t),b(nt.$$.fragment,t),b(ot.$$.fragment,t),b(at.$$.fragment,t),b(it.$$.fragment,t),b(dt.$$.fragment,t),b(ct.$$.fragment,t),b(lt.$$.fragment,t),b(ut.$$.fragment,t),b(pt.$$.fragment,t),he=!1},d(t){t&&(e(Ft),e(Qt),e(Rt),e(Bt),e(E),e(Ut),e(Wt),e(M),e(jt),e(Gt),e(x),e(Jt),e(A),e(Kt),e(D),e(Xt),e($),e(Yt),e(Zt),e(T),e(te),e(U),e(ee),e(j),e(ne),e(J),e(oe),e(q),e(ae),e(Y),e(ie),e(tt),e(se),e(v),e(re),e(de),e(st),e(ce),e(rt),e(le),e(z),e(ue),e(w),e(pe),e(me),e(C),e(be),e(fe),e(St)),e(k),f(P,t),f(V,t),f(H,t),f(I),f(Q),f(S),f(F),f(R,t),f(B),f(W),f(G),f(K),f(X),f(Z),f(et),f(nt),f(ot),f(at),f(it,t),f(dt),f(ct),f(lt,t),f(ut),f(pt,t)}}}const An='{"title":"Overview","local":"overview","sections":[{"title":"When to Use bitsandbytes.functional","local":"when-to-use-bitsandbytesfunctional","sections":[],"depth":2},{"title":"LLM.int8()","local":"bitsandbytes.functional.int8_linear_matmul","sections":[],"depth":2},{"title":"4-bit","local":"bitsandbytes.functional.dequantize_4bit","sections":[],"depth":2},{"title":"Dynamic 8-bit Quantization","local":"bitsandbytes.functional.dequantize_blockwise","sections":[],"depth":2},{"title":"Utility","local":"bitsandbytes.functional.get_ptr","sections":[],"depth":2}],"depth":1}';function Dn(Re){return $n(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Pn extends Tn{constructor(k){super(),qn(this,k,Dn,kn,_n,{})}}export{Pn as component};

Xet Storage Details

Size:
36.9 kB
·
Xet hash:
5fd39dd0d3419d8488ea24cf64f1c49712fed551c445ca8be3f4e9e286e26ef3

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.