#!/usr/bin/env python3 """ Unified build entry point for the Plapre Pico CoreML pipeline. Builds all three CoreML models (Plapre Pico LLM, Kanade decoder, HiFT vocoder) and optionally produces quantized variants of the LLM. Usage: python scripts/build.py # build all 3 models python scripts/build.py --quantize int4 # + PlaprePico_int4.mlpackage python scripts/build.py --quantize int4 --quantize int8 # both quantizations python scripts/build.py --skip llm # only rebuild audio models python scripts/build.py --skip audio # only rebuild LLM python scripts/build.py --output-dir PATH # default: repo root python scripts/build.py --num-tokens 100 # passed through to audio """ import argparse from pathlib import Path from convert_llm import convert_llm from convert_audio import convert_audio from quantize import quantize_model REPO_ROOT = Path(__file__).parent.parent def _dir_size_mb(path: Path) -> float: if not path.exists(): return 0.0 return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / 1e6 def main(): parser = argparse.ArgumentParser(description="Build full Plapre Pico CoreML pipeline") parser.add_argument("--output-dir", type=str, default=str(REPO_ROOT)) parser.add_argument("--model-dir", type=str, default=None, help="Local Plapre Pico HF snapshot (otherwise downloaded)") parser.add_argument("--num-tokens", type=int, default=100, help="Audio token count for vocoder mel length") parser.add_argument("--quantize", action="append", choices=["int8"], default=[], help="Produce quantized LLM variant(s); may be repeated") parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[], help="Skip a stage") args = parser.parse_args() output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) artifacts: list[Path] = [] if "llm" not in args.skip: print("\n========== LLM ==========") llm_path = convert_llm( output_dir=output_dir, model_dir=Path(args.model_dir) if args.model_dir else None, ) artifacts.append(llm_path) for filename in ["rope_cos.npy", "rope_sin.npy", "manifest.json", "tokenizer.json", "speakers.json"]: p = output_dir / filename if p.exists(): artifacts.append(p) else: llm_path = output_dir / "PlaprePico.mlpackage" for q in args.quantize: print(f"\n========== Quantize {q} ==========") if not llm_path.exists(): print(f" SKIP: {llm_path} not found (run without --skip llm first)") continue bits = int(q[3:]) out = output_dir / f"PlaprePico_{q}.mlpackage" quantize_model(llm_path, out, bits) artifacts.append(out) if "audio" not in args.skip: print("\n========== Audio (Kanade + Vocoder) ==========") kanade_path, vocoder_path = convert_audio(output_dir, args.num_tokens) artifacts.extend([kanade_path, vocoder_path]) print("\n========== Build summary ==========") for p in artifacts: size = _dir_size_mb(p) if p.is_dir() else (p.stat().st_size / 1e6 if p.exists() else 0) print(f" {p.name:40s} {size:8.1f} MB") print(f"\nOutput directory: {output_dir}") if __name__ == "__main__": main()