| |
| """ |
| Unified build entry point for the Plapre Pico CoreML pipeline. |
| |
| Builds all three CoreML models (Plapre Pico LLM, Kanade decoder, HiFT vocoder) |
| and optionally produces quantized variants of the LLM. |
| |
| Usage: |
| python scripts/build.py # build all 3 models |
| python scripts/build.py --quantize int4 # + PlaprePico_int4.mlpackage |
| python scripts/build.py --quantize int4 --quantize int8 # both quantizations |
| python scripts/build.py --skip llm # only rebuild audio models |
| python scripts/build.py --skip audio # only rebuild LLM |
| python scripts/build.py --output-dir PATH # default: repo root |
| python scripts/build.py --num-tokens 100 # passed through to audio |
| """ |
|
|
| import argparse |
| from pathlib import Path |
|
|
| from convert_llm import convert_llm |
| from convert_audio import convert_audio |
| from quantize import quantize_model |
|
|
|
|
| REPO_ROOT = Path(__file__).parent.parent |
|
|
|
|
| def _dir_size_mb(path: Path) -> float: |
| if not path.exists(): |
| return 0.0 |
| return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / 1e6 |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Build full Plapre Pico CoreML pipeline") |
| parser.add_argument("--output-dir", type=str, default=str(REPO_ROOT)) |
| parser.add_argument("--model-dir", type=str, default=None, |
| help="Local Plapre Pico HF snapshot (otherwise downloaded)") |
| parser.add_argument("--num-tokens", type=int, default=100, |
| help="Audio token count for vocoder mel length") |
| parser.add_argument("--quantize", action="append", choices=["int8"], default=[], |
| help="Produce quantized LLM variant(s); may be repeated") |
| parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[], |
| help="Skip a stage") |
| args = parser.parse_args() |
|
|
| output_dir = Path(args.output_dir) |
| output_dir.mkdir(parents=True, exist_ok=True) |
| artifacts: list[Path] = [] |
|
|
| if "llm" not in args.skip: |
| print("\n========== LLM ==========") |
| llm_path = convert_llm( |
| output_dir=output_dir, |
| model_dir=Path(args.model_dir) if args.model_dir else None, |
| ) |
| artifacts.append(llm_path) |
| for filename in ["rope_cos.npy", "rope_sin.npy", "manifest.json", |
| "tokenizer.json", "speakers.json"]: |
| p = output_dir / filename |
| if p.exists(): |
| artifacts.append(p) |
| else: |
| llm_path = output_dir / "PlaprePico.mlpackage" |
|
|
| for q in args.quantize: |
| print(f"\n========== Quantize {q} ==========") |
| if not llm_path.exists(): |
| print(f" SKIP: {llm_path} not found (run without --skip llm first)") |
| continue |
| bits = int(q[3:]) |
| out = output_dir / f"PlaprePico_{q}.mlpackage" |
| quantize_model(llm_path, out, bits) |
| artifacts.append(out) |
|
|
| if "audio" not in args.skip: |
| print("\n========== Audio (Kanade + Vocoder) ==========") |
| kanade_path, vocoder_path = convert_audio(output_dir, args.num_tokens) |
| artifacts.extend([kanade_path, vocoder_path]) |
|
|
| print("\n========== Build summary ==========") |
| for p in artifacts: |
| size = _dir_size_mb(p) if p.is_dir() else (p.stat().st_size / 1e6 if p.exists() else 0) |
| print(f" {p.name:40s} {size:8.1f} MB") |
| print(f"\nOutput directory: {output_dir}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|