AsadIsmail's picture
Bundle ternary_quant package directly (private repo fix)
162f86a verified
"""Post-training ternary quantization for HuggingFace LLMs."""
from ternary_quant.quantizer import TernaryQuantizer
from ternary_quant.pipeline import quantize_model
from ternary_quant.inference import (
TernaryLinear,
clear_quantized_runtime_cache,
load_ternary_model,
prepare_quantized_runtime,
)
from ternary_quant.generative_adapters import (
BroadQuantizationConfig,
build_calibration_batches,
generative_model_info_to_dict,
inspect_generative_model,
load_generative_model,
quantize_components_inplace,
)
from ternary_quant.mac_kernels import metal_ternary_kernels_available
from ternary_quant.quantizer_small import (
GroupwiseAsymmetricTernaryQuantizer,
build_role_aware_plan,
quantize_small_model_inplace,
)
from ternary_quant.ptq_families import (
ProgressiveTritPlaneQuantizer,
TritPlaneParameter,
build_family_config,
get_default_family_candidates,
quantize_family_inplace,
summarize_family_quantization,
)
__version__ = "0.1.5"
__all__ = [
"TernaryQuantizer",
"quantize_model",
"GroupwiseAsymmetricTernaryQuantizer",
"ProgressiveTritPlaneQuantizer",
"TritPlaneParameter",
"build_role_aware_plan",
"quantize_small_model_inplace",
"build_family_config",
"get_default_family_candidates",
"quantize_family_inplace",
"summarize_family_quantization",
"BroadQuantizationConfig",
"inspect_generative_model",
"load_generative_model",
"build_calibration_batches",
"quantize_components_inplace",
"generative_model_info_to_dict",
"TernaryLinear",
"load_ternary_model",
"prepare_quantized_runtime",
"clear_quantized_runtime_cache",
"metal_ternary_kernels_available",
]