parakeet-tdt_ctc-110m-coreml / quantize_coreml.py
Jarbas's picture
Upload 92 files
290bf47 verified
#!/usr/bin/env python3
"""Quantize the Parakeet CTC CoreML models (mel_encoder + ctc_decoder)."""
from __future__ import annotations
import json
import shutil
from pathlib import Path
import coremltools as ct
from coremltools.optimize.coreml import (
OptimizationConfig,
OpLinearQuantizerConfig,
OpPalettizerConfig,
linear_quantize_weights,
palettize_weights,
)
INPUT_DIR = Path("parakeet_ctc_coreml")
OUTPUT_DIR = Path("parakeet_ctc_coreml_quantized")
VARIANTS = {
"int8_linear": lambda m: linear_quantize_weights(
m,
OptimizationConfig(
global_config=OpLinearQuantizerConfig(mode="linear", granularity="per_channel")
),
),
"int4_linear": lambda m: linear_quantize_weights(
m,
OptimizationConfig(
global_config=OpLinearQuantizerConfig(mode="linear_symmetric", granularity="per_block", block_size=32, dtype="int4")
),
),
"6bit_palettize": lambda m: palettize_weights(
m,
OptimizationConfig(global_config=OpPalettizerConfig(mode="kmeans", nbits=6)),
),
"4bit_palettize": lambda m: palettize_weights(
m,
OptimizationConfig(global_config=OpPalettizerConfig(mode="kmeans", nbits=4)),
),
}
MODELS = {
"mel_encoder": "parakeet_ctc_mel_encoder.mlpackage",
"ctc_decoder": "parakeet_ctc_decoder.mlpackage",
}
def dir_size_mb(path: Path) -> float:
return sum(p.stat().st_size for p in path.rglob("*") if p.is_file()) / (1024 * 1024)
def quantize_model(src: Path, fn, dst: Path, name: str) -> float:
print(f" Loading {src.name}...")
model = ct.models.MLModel(str(src), compute_units=ct.ComputeUnit.CPU_AND_NE)
try:
model.minimum_deployment_target = ct.target.iOS17
except Exception:
pass
print(f" Quantizing...")
q_model = fn(model)
try:
q_model.minimum_deployment_target = ct.target.iOS17
except Exception:
pass
dst.parent.mkdir(parents=True, exist_ok=True)
q_model.save(str(dst))
size = dir_size_mb(dst)
print(f" Saved → {dst} ({size:.1f} MB)")
return size
def main():
if not INPUT_DIR.exists():
raise SystemExit(f"Input dir not found: {INPUT_DIR}")
baseline_sizes = {k: dir_size_mb(INPUT_DIR / v) for k, v in MODELS.items()}
print("Baseline sizes:")
for k, v in baseline_sizes.items():
print(f" {k}: {v:.1f} MB")
print()
results = {}
for variant_name, fn in VARIANTS.items():
print(f"=== Variant: {variant_name} ===")
variant_dir = OUTPUT_DIR / variant_name
variant_dir.mkdir(parents=True, exist_ok=True)
results[variant_name] = {}
for comp_name, filename in MODELS.items():
src = INPUT_DIR / filename
dst = variant_dir / filename
if not src.exists():
print(f" Skipping {filename} (not found)")
continue
q_size = quantize_model(src, fn, dst, comp_name)
base_size = baseline_sizes[comp_name]
ratio = base_size / q_size if q_size > 0 else 0
results[variant_name][comp_name] = {
"size_mb": round(q_size, 2),
"baseline_mb": round(base_size, 2),
"compression_ratio": round(ratio, 2),
}
# Copy metadata and vocab
for f in ["metadata.json", "vocab.json"]:
src_f = INPUT_DIR / f
if src_f.exists():
shutil.copy2(src_f, variant_dir / f)
print()
summary_path = OUTPUT_DIR / "quantization_summary.json"
summary_path.write_text(json.dumps(results, indent=2))
print(f"Summary written to {summary_path}")
print()
print("Compression ratios:")
for variant, comps in results.items():
for comp, m in comps.items():
print(f" {variant}/{comp}: {m['baseline_mb']:.1f} MB → {m['size_mb']:.1f} MB (×{m['compression_ratio']:.2f})")
if __name__ == "__main__":
main()