plapre-pico-coreml / scripts /quantize.py
Daniel Rothmann
Remove int4 variant which had too poor quality
5ed6654
#!/usr/bin/env python3
"""
Post-training weight quantization for PlaprePico CoreML model.
Applies int8 linear weight quantization to reduce model size
and improve inference speed (less memory bandwidth).
Usage:
python quantize.py [--input PATH] [--output PATH]
"""
import argparse
from pathlib import Path
import coremltools as ct
from coremltools.optimize.coreml import (
OpLinearQuantizerConfig,
OptimizationConfig,
linear_quantize_weights,
)
def quantize_model(input_path: Path, output_path: Path, nbits: int = 8):
print(f"Loading {input_path}...")
model = ct.models.MLModel(str(input_path), compute_units=ct.ComputeUnit.CPU_ONLY)
print(f"Quantizing weights to int{nbits}...")
op_config = OpLinearQuantizerConfig(
mode="linear_symmetric",
dtype=f"int{nbits}",
granularity="per_channel",
)
config = OptimizationConfig(global_config=op_config)
quantized = linear_quantize_weights(model, config)
print(f"Saving to {output_path}...")
quantized.save(str(output_path))
# Compare sizes
import os
def dir_size(p):
total = 0
for f in Path(p).rglob("*"):
if f.is_file():
total += f.stat().st_size
return total
orig_mb = dir_size(input_path) / 1e6
quant_mb = dir_size(output_path) / 1e6
print(f"\nOriginal: {orig_mb:.1f} MB")
print(f"Quantized: {quant_mb:.1f} MB ({quant_mb/orig_mb*100:.0f}%)")
print("Done!")
def main():
parser = argparse.ArgumentParser(description="Quantize PlaprePico model")
parser.add_argument("--input", type=str,
default=str(Path(__file__).parent.parent / "PlaprePico.mlpackage"))
parser.add_argument("--output", type=str, default=None)
args = parser.parse_args()
input_path = Path(args.input)
output_path = Path(args.output) if args.output else input_path.parent / "PlaprePico_int8.mlpackage"
quantize_model(input_path, output_path, 8)
if __name__ == "__main__":
main()