#!/usr/bin/env python3
"""
Unified build entry point for the Plapre Pico CoreML pipeline.

Builds all three CoreML models (Plapre Pico LLM, Kanade decoder, HiFT vocoder)
and optionally produces quantized variants of the LLM.

Usage:
    python scripts/build.py                                  # build all 3 models
    python scripts/build.py --quantize int4                  # + PlaprePico_int4.mlpackage
    python scripts/build.py --quantize int4 --quantize int8  # both quantizations
    python scripts/build.py --skip llm                       # only rebuild audio models
    python scripts/build.py --skip audio                     # only rebuild LLM
    python scripts/build.py --output-dir PATH                # default: repo root
    python scripts/build.py --num-tokens 100                 # passed through to audio
"""

import argparse
from pathlib import Path

from convert_llm import convert_llm
from convert_audio import convert_audio
from quantize import quantize_model


REPO_ROOT = Path(__file__).parent.parent


def _dir_size_mb(path: Path) -> float:
    if not path.exists():
        return 0.0
    return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / 1e6


def main():
    parser = argparse.ArgumentParser(description="Build full Plapre Pico CoreML pipeline")
    parser.add_argument("--output-dir", type=str, default=str(REPO_ROOT))
    parser.add_argument("--model-dir", type=str, default=None,
                        help="Local Plapre Pico HF snapshot (otherwise downloaded)")
    parser.add_argument("--num-tokens", type=int, default=100,
                        help="Audio token count for vocoder mel length")
    parser.add_argument("--quantize", action="append", choices=["int8"], default=[],
                        help="Produce quantized LLM variant(s); may be repeated")
    parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[],
                        help="Skip a stage")
    args = parser.parse_args()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    artifacts: list[Path] = []

    if "llm" not in args.skip:
        print("\n========== LLM ==========")
        llm_path = convert_llm(
            output_dir=output_dir,
            model_dir=Path(args.model_dir) if args.model_dir else None,
        )
        artifacts.append(llm_path)
        for filename in ["rope_cos.npy", "rope_sin.npy", "manifest.json",
                         "tokenizer.json", "speakers.json"]:
            p = output_dir / filename
            if p.exists():
                artifacts.append(p)
    else:
        llm_path = output_dir / "PlaprePico.mlpackage"

    for q in args.quantize:
        print(f"\n========== Quantize {q} ==========")
        if not llm_path.exists():
            print(f"  SKIP: {llm_path} not found (run without --skip llm first)")
            continue
        bits = int(q[3:])
        out = output_dir / f"PlaprePico_{q}.mlpackage"
        quantize_model(llm_path, out, bits)
        artifacts.append(out)

    if "audio" not in args.skip:
        print("\n========== Audio (Kanade + Vocoder) ==========")
        kanade_path, vocoder_path = convert_audio(output_dir, args.num_tokens)
        artifacts.extend([kanade_path, vocoder_path])

    print("\n========== Build summary ==========")
    for p in artifacts:
        size = _dir_size_mb(p) if p.is_dir() else (p.stat().st_size / 1e6 if p.exists() else 0)
        print(f"  {p.name:40s}  {size:8.1f} MB")
    print(f"\nOutput directory: {output_dir}")


if __name__ == "__main__":
    main()