| |
| |
| |
| export enum GGMLFileQuantizationType { |
| F32 = 0, |
| F16 = 1, |
| Q4_0 = 2, |
| Q4_1 = 3, |
| Q4_1_SOME_F16 = 4, |
| Q4_2 = 5, |
| Q4_3 = 6, |
| Q8_0 = 7, |
| Q5_0 = 8, |
| Q5_1 = 9, |
| Q2_K = 10, |
| Q3_K_S = 11, |
| Q3_K_M = 12, |
| Q3_K_L = 13, |
| Q4_K_S = 14, |
| Q4_K_M = 15, |
| Q5_K_S = 16, |
| Q5_K_M = 17, |
| Q6_K = 18, |
| IQ2_XXS = 19, |
| IQ2_XS = 20, |
| Q2_K_S = 21, |
| IQ3_XS = 22, |
| IQ3_XXS = 23, |
| IQ1_S = 24, |
| IQ4_NL = 25, |
| IQ3_S = 26, |
| IQ3_M = 27, |
| IQ2_S = 28, |
| IQ2_M = 29, |
| IQ4_XS = 30, |
| IQ1_M = 31, |
| BF16 = 32, |
| Q4_0_4_4 = 33, |
| Q4_0_4_8 = 34, |
| Q4_0_8_8 = 35, |
| TQ1_0 = 36, |
| TQ2_0 = 37, |
|
|
| |
| |
| Q2_K_XL = 1000, |
| Q3_K_XL = 1001, |
| Q4_K_XL = 1002, |
| Q5_K_XL = 1003, |
| Q6_K_XL = 1004, |
| Q8_K_XL = 1005, |
| } |
|
|
| const ggufQuants = Object.values(GGMLFileQuantizationType).filter((v): v is string => typeof v === "string"); |
| export const GGUF_QUANT_RE = new RegExp(`(?<quant>${ggufQuants.join("|")})` + "(_(?<sizeVariation>[A-Z]+))?"); |
| export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g"); |
|
|
| export function parseGGUFQuantLabel(fname: string): string | undefined { |
| const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); |
| return quantLabel; |
| } |
|
|
| |
| |
| |
| export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [ |
| GGMLFileQuantizationType.F32, |
| GGMLFileQuantizationType.BF16, |
| GGMLFileQuantizationType.F16, |
| GGMLFileQuantizationType.Q8_K_XL, |
| GGMLFileQuantizationType.Q8_0, |
|
|
| |
| GGMLFileQuantizationType.Q6_K_XL, |
| GGMLFileQuantizationType.Q6_K, |
|
|
| |
| GGMLFileQuantizationType.Q5_K_XL, |
| GGMLFileQuantizationType.Q5_K_M, |
| GGMLFileQuantizationType.Q5_K_S, |
| GGMLFileQuantizationType.Q5_0, |
| GGMLFileQuantizationType.Q5_1, |
|
|
| |
| GGMLFileQuantizationType.Q4_K_XL, |
| GGMLFileQuantizationType.Q4_K_M, |
| GGMLFileQuantizationType.Q4_K_S, |
| GGMLFileQuantizationType.IQ4_NL, |
| GGMLFileQuantizationType.IQ4_XS, |
| GGMLFileQuantizationType.Q4_0_4_4, |
| GGMLFileQuantizationType.Q4_0_4_8, |
| GGMLFileQuantizationType.Q4_0_8_8, |
| GGMLFileQuantizationType.Q4_1_SOME_F16, |
| GGMLFileQuantizationType.Q4_0, |
| GGMLFileQuantizationType.Q4_1, |
| GGMLFileQuantizationType.Q4_2, |
| GGMLFileQuantizationType.Q4_3, |
|
|
| |
| GGMLFileQuantizationType.Q3_K_XL, |
| GGMLFileQuantizationType.Q3_K_L, |
| GGMLFileQuantizationType.Q3_K_M, |
| GGMLFileQuantizationType.Q3_K_S, |
| GGMLFileQuantizationType.IQ3_M, |
| GGMLFileQuantizationType.IQ3_S, |
| GGMLFileQuantizationType.IQ3_XS, |
| GGMLFileQuantizationType.IQ3_XXS, |
|
|
| |
| GGMLFileQuantizationType.Q2_K_XL, |
| GGMLFileQuantizationType.Q2_K, |
| GGMLFileQuantizationType.Q2_K_S, |
| GGMLFileQuantizationType.IQ2_M, |
| GGMLFileQuantizationType.IQ2_S, |
| GGMLFileQuantizationType.IQ2_XS, |
| GGMLFileQuantizationType.IQ2_XXS, |
|
|
| |
| GGMLFileQuantizationType.IQ1_S, |
| GGMLFileQuantizationType.IQ1_M, |
| GGMLFileQuantizationType.TQ1_0, |
| GGMLFileQuantizationType.TQ2_0, |
| ]; |
|
|
| |
| |
| export function findNearestQuantType( |
| quant: GGMLFileQuantizationType, |
| availableQuants: GGMLFileQuantizationType[] |
| ): GGMLFileQuantizationType | undefined { |
| |
| const orderMap = new Map<GGMLFileQuantizationType, number>(); |
| GGUF_QUANT_ORDER.forEach((q, index) => { |
| orderMap.set(q, index); |
| }); |
|
|
| const targetIndex = orderMap.get(quant) ?? 0; |
|
|
| |
| |
| const sortedAvailable = availableQuants |
| .filter((q) => orderMap.has(q)) |
| .sort((a, b) => (orderMap.get(a) ?? Infinity) - (orderMap.get(b) ?? Infinity)); |
|
|
| |
| if (sortedAvailable.length === 0) { |
| return undefined; |
| } |
|
|
| |
| |
| |
| for (const availableQuant of sortedAvailable) { |
| |
| const availableIndex = orderMap.get(availableQuant) ?? 0; |
| if (availableIndex >= targetIndex) { |
| return availableQuant; |
| } |
| } |
|
|
| |
| |
| |
| return sortedAvailable[sortedAvailable.length - 1]; |
| } |
|
|
| |
| export enum GGMLQuantizationType { |
| F32 = 0, |
| F16 = 1, |
| Q4_0 = 2, |
| Q4_1 = 3, |
| Q5_0 = 6, |
| Q5_1 = 7, |
| Q8_0 = 8, |
| Q8_1 = 9, |
| Q2_K = 10, |
| Q3_K = 11, |
| Q4_K = 12, |
| Q5_K = 13, |
| Q6_K = 14, |
| Q8_K = 15, |
| IQ2_XXS = 16, |
| IQ2_XS = 17, |
| IQ3_XXS = 18, |
| IQ1_S = 19, |
| IQ4_NL = 20, |
| IQ3_S = 21, |
| IQ2_S = 22, |
| IQ4_XS = 23, |
| I8 = 24, |
| I16 = 25, |
| I32 = 26, |
| I64 = 27, |
| F64 = 28, |
| IQ1_M = 29, |
| BF16 = 30, |
| TQ1_0 = 34, |
| TQ2_0 = 35, |
| } |
|
|