Spaces:

Mooizz
/

New-space-openenv

Paused

App Files Files Community

New-space-openenv / node_modules /@huggingface /tasks /src /gguf.ts

Mooizz

Upload folder using huggingface_hub

1070765 verified about 1 month ago

raw

history blame contribute delete

5.97 kB

	// This list is copied from gguf/types.ts, but will all types available (for backward compatibility)
	// NOT to be confused with GGMLQuantizationType, a FileQuantization can contain multiple GGMLQuantizationType
	// For example, Q4_K_M model can contains Q4_K and Q6_K tensors
	export enum GGMLFileQuantizationType {
	F32 = 0,
	F16 = 1,
	Q4_0 = 2,
	Q4_1 = 3,
	Q4_1_SOME_F16 = 4,
	Q4_2 = 5,
	Q4_3 = 6,
	Q8_0 = 7,
	Q5_0 = 8,
	Q5_1 = 9,
	Q2_K = 10,
	Q3_K_S = 11,
	Q3_K_M = 12,
	Q3_K_L = 13,
	Q4_K_S = 14,
	Q4_K_M = 15,
	Q5_K_S = 16,
	Q5_K_M = 17,
	Q6_K = 18,
	IQ2_XXS = 19,
	IQ2_XS = 20,
	Q2_K_S = 21,
	IQ3_XS = 22,
	IQ3_XXS = 23,
	IQ1_S = 24,
	IQ4_NL = 25,
	IQ3_S = 26,
	IQ3_M = 27,
	IQ2_S = 28,
	IQ2_M = 29,
	IQ4_XS = 30,
	IQ1_M = 31,
	BF16 = 32,
	Q4_0_4_4 = 33,
	Q4_0_4_8 = 34,
	Q4_0_8_8 = 35,
	TQ1_0 = 36,
	TQ2_0 = 37,
	MXFP4_MOE = 38,

	// custom quants used by unsloth
	// they are not officially a scheme enum value in GGUF, but only here for naming
	Q2_K_XL = 1000,
	Q3_K_XL = 1001,
	Q4_K_XL = 1002,
	Q5_K_XL = 1003,
	Q6_K_XL = 1004,
	Q8_K_XL = 1005,
	}

	const ggufQuants = Object.values(GGMLFileQuantizationType).filter((v): v is string => typeof v === "string");
	export const GGUF_QUANT_RE = new RegExp(
	"(?<prefix>UD-)?" + `(?<quant>${ggufQuants.join("\|")})` + "(_(?<sizeVariation>[A-Z]+))?",
	);
	export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g");

	export function parseGGUFQuantLabel(fname: string): string \| undefined {
	const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); // if there is multiple quant substrings in a name, we prefer the last one
	return quantLabel;
	}

	// order of quantization, from biggest to smallest
	// this list must be in sync with the order in GGMLFileQuantizationType
	// the gguf.spec.ts tests are using verify if the order is correct
	export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
	GGMLFileQuantizationType.F32,
	GGMLFileQuantizationType.BF16,
	GGMLFileQuantizationType.F16,
	GGMLFileQuantizationType.Q8_K_XL,
	GGMLFileQuantizationType.Q8_0,

	// 6-bit quantizations
	GGMLFileQuantizationType.Q6_K_XL,
	GGMLFileQuantizationType.Q6_K,

	// 5-bit quantizations
	GGMLFileQuantizationType.Q5_K_XL,
	GGMLFileQuantizationType.Q5_K_M,
	GGMLFileQuantizationType.Q5_K_S,
	GGMLFileQuantizationType.Q5_0,
	GGMLFileQuantizationType.Q5_1,

	// 4-bit quantizations
	GGMLFileQuantizationType.Q4_K_XL,
	GGMLFileQuantizationType.Q4_K_M,
	GGMLFileQuantizationType.Q4_K_S,
	GGMLFileQuantizationType.IQ4_NL,
	GGMLFileQuantizationType.IQ4_XS,
	GGMLFileQuantizationType.Q4_0_4_4,
	GGMLFileQuantizationType.Q4_0_4_8,
	GGMLFileQuantizationType.Q4_0_8_8,
	GGMLFileQuantizationType.Q4_1_SOME_F16,
	GGMLFileQuantizationType.Q4_0,
	GGMLFileQuantizationType.Q4_1,
	GGMLFileQuantizationType.Q4_2,
	GGMLFileQuantizationType.Q4_3,
	GGMLFileQuantizationType.MXFP4_MOE,

	// 3-bit quantizations
	GGMLFileQuantizationType.Q3_K_XL,
	GGMLFileQuantizationType.Q3_K_L,
	GGMLFileQuantizationType.Q3_K_M,
	GGMLFileQuantizationType.Q3_K_S,
	GGMLFileQuantizationType.IQ3_M,
	GGMLFileQuantizationType.IQ3_S,
	GGMLFileQuantizationType.IQ3_XS,
	GGMLFileQuantizationType.IQ3_XXS,

	// 2-bit quantizations
	GGMLFileQuantizationType.Q2_K_XL,
	GGMLFileQuantizationType.Q2_K,
	GGMLFileQuantizationType.Q2_K_S,
	GGMLFileQuantizationType.IQ2_M,
	GGMLFileQuantizationType.IQ2_S,
	GGMLFileQuantizationType.IQ2_XS,
	GGMLFileQuantizationType.IQ2_XXS,

	// 1-bit quantizations
	GGMLFileQuantizationType.IQ1_S,
	GGMLFileQuantizationType.IQ1_M,
	GGMLFileQuantizationType.TQ1_0,
	GGMLFileQuantizationType.TQ2_0,
	];

	// This function finds the nearest quantization type that is less than or equal to the given quantization type.
	// It returns undefined if no such quantization type is found.
	export function findNearestQuantType(
	quant: GGMLFileQuantizationType,
	availableQuants: GGMLFileQuantizationType[],
	): GGMLFileQuantizationType \| undefined {
	// Create a map for quick index lookup from the defined order
	const orderMap = new Map<GGMLFileQuantizationType, number>();
	GGUF_QUANT_ORDER.forEach((q, index) => {
	orderMap.set(q, index);
	});

	const targetIndex = orderMap.get(quant) ?? 0; // the 0 case should never happen

	// Filter the available quantizations to include only those defined in the order map,
	// then sort them according to the GGUF_QUANT_ORDER (from largest/index 0 to smallest/highest index).
	const sortedAvailable = availableQuants
	.filter((q) => orderMap.has(q))
	.sort((a, b) => (orderMap.get(a) ?? Infinity) - (orderMap.get(b) ?? Infinity));

	// If no valid quantizations are available after filtering
	if (sortedAvailable.length === 0) {
	return undefined;
	}

	// Iterate through the sorted available quantizations (largest to smallest).
	// Find the first one whose order index is >= the target index.
	// This means finding the largest quantization that is smaller than or equal to the target.
	for (const availableQuant of sortedAvailable) {
	// We know the key exists due to the filter above.
	const availableIndex = orderMap.get(availableQuant) ?? 0;
	if (availableIndex >= targetIndex) {
	return availableQuant;
	}
	}

	// If the loop completes, it means all available quantizations are larger (have a smaller index)
	// than the target quantization. In this case, return the "smallest" available quantization,
	// which is the last element in the sorted list (highest index among available).
	return sortedAvailable[sortedAvailable.length - 1];
	}

	// This list is only used to calculate the size of the model, NOT to be confused with the quantization FILE type
	export enum GGMLQuantizationType {
	F32 = 0,
	F16 = 1,
	Q4_0 = 2,
	Q4_1 = 3,
	Q5_0 = 6,
	Q5_1 = 7,
	Q8_0 = 8,
	Q8_1 = 9,
	Q2_K = 10,
	Q3_K = 11,
	Q4_K = 12,
	Q5_K = 13,
	Q6_K = 14,
	Q8_K = 15,
	IQ2_XXS = 16,
	IQ2_XS = 17,
	IQ3_XXS = 18,
	IQ1_S = 19,
	IQ4_NL = 20,
	IQ3_S = 21,
	IQ2_S = 22,
	IQ4_XS = 23,
	I8 = 24,
	I16 = 25,
	I32 = 26,
	I64 = 27,
	F64 = 28,
	IQ1_M = 29,
	BF16 = 30,
	TQ1_0 = 34,
	TQ2_0 = 35,
	MXFP4 = 39,
	}