Daniel Rothmann commited on 14 days ago

Commit

e79fa0a

1 Parent(s): fed9119

Tidy up repo and conversion scripts

Files changed (18) hide show

KanadeDecoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
KanadeDecoder.mlpackage/Manifest.json +8 -8
PlaprePico.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
PlaprePico.mlpackage/Manifest.json +3 -3
PlaprePico_int4.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
PlaprePico_int4.mlpackage/Data/com.apple.CoreML/weights/weight.bin +2 -2
PlaprePico_int4.mlpackage/Manifest.json +8 -8
PlaprePico_int8.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
PlaprePico_int8.mlpackage/Data/com.apple.CoreML/weights/weight.bin +2 -2
PlaprePico_int8.mlpackage/Manifest.json +8 -8
Vocoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
Vocoder.mlpackage/Manifest.json +3 -3
manifest.json +1 -1
scripts/build.py +90 -0
scripts/{convert_kanade.py → convert_audio.py} +12 -6
scripts/{convert.py → convert_llm.py} +89 -15
scripts/inject_state_updates.py +0 -172
scripts/mixed_precision.py +0 -184

KanadeDecoder.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a443e8a50213febe82723e6bc41696fa420a0b8050eb626bffdc76f1a7c36e0b
-size 199420

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee0b31302fb2709b2ad5cde88ed77e8dde6170f4b711a03835b1ee1b17fb60d1
+size 199364

KanadeDecoder.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "C2BF25B7-4DB5-4C6F-A4B8-4BC558D0C7E7": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Specification",
-            "name": "model.mlmodel",
-            "path": "com.apple.CoreML/model.mlmodel"
-        },
-        "FFA4DD20-41E6-4D3A-B810-5F438BD1787B": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         }
     },
-    "rootModelIdentifier": "C2BF25B7-4DB5-4C6F-A4B8-4BC558D0C7E7"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "0E204957-794C-49DA-B444-2E03C7B62509": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
+        },
+        "9CF9C1BF-D9A1-4092-ADCC-2D70ECBAAFB3": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "9CF9C1BF-D9A1-4092-ADCC-2D70ECBAAFB3"
 }

PlaprePico.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0ea4fbe5939f8db381da0ccadf9e90b61c82f5f0eca58b46e89b3a5541a49f0
-size 957824

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb9711d00982520e62e667a7f524df950ca1d1080991bdd9cba6d2327b891511
+size 956591

PlaprePico.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "1F911078-42FE-4F91-A2D0-E5B86F87F7AD": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
-        "3E69D1BF-E09D-43D9-A7FE-E3B15CDDF0BD": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
-    "rootModelIdentifier": "3E69D1BF-E09D-43D9-A7FE-E3B15CDDF0BD"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "6F441A1D-F82F-44B2-A345-005396E0926A": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
+        "FAB5B87A-358B-488E-A85C-97F3F8FDFA45": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "FAB5B87A-358B-488E-A85C-97F3F8FDFA45"
 }

PlaprePico_int4.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a04406d6835f50b4a6a94b028474305217518acd50352b91ba071afbb7a70a45
-size 935132

 version https://git-lfs.github.com/spec/v1
+oid sha256:a156dd782a9aabc95ce6b1e1a34b05a4c71557a96e36cb69e07833a65c530e04
+size 986129

PlaprePico_int4.mlpackage/Data/com.apple.CoreML/weights/weight.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9ef8ca208c59506b6b4205b59c27ec1e2a838338e969c271fb90db332a0c68e
-size 59654148

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbfed1ebb98d1541a462d6ec13e11b22d086c6ea7088ecdcc1c3c687fd99ee2f
+size 59614916

PlaprePico_int4.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "B95C3123-5C93-4485-B674-506310CC30FA": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Weights",
-            "name": "weights",
-            "path": "com.apple.CoreML/weights"
-        },
-        "EF542C51-0401-4C0A-9713-6B90D89D10D4": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
-    "rootModelIdentifier": "EF542C51-0401-4C0A-9713-6B90D89D10D4"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "ABEB1845-9A9A-4CDB-AACD-335B4EEE0328": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "F3186CBA-EC22-47C9-AAD1-AE8E1C7669C8": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
         }
     },
+    "rootModelIdentifier": "ABEB1845-9A9A-4CDB-AACD-335B4EEE0328"
 }

PlaprePico_int8.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dac4b9812c8cd6a53a43d2e1d5dc92daf112461026334e39ce104e7b4d20c488
-size 935132

 version https://git-lfs.github.com/spec/v1
+oid sha256:7dd7913ab08a8436f8be1d66604de5af00494f3e7c6512590e002678417dc1ee
+size 986129

PlaprePico_int8.mlpackage/Data/com.apple.CoreML/weights/weight.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa4953349237cf80de350967e34ef51e1532eb0e1f4472c474047a0bc12bc0e8
-size 118766148

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f3f2ba6cb15545f208241e0c8e3bc148167ee252c6d25029ef0b36c795f48f9
+size 118726916

PlaprePico_int8.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "8A809F7D-AC26-4562-8336-52FEB0717F2B": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Specification",
-            "name": "model.mlmodel",
-            "path": "com.apple.CoreML/model.mlmodel"
-        },
-        "B96C4545-8329-431E-A1F1-6DB58F13ACA9": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         }
     },
-    "rootModelIdentifier": "8A809F7D-AC26-4562-8336-52FEB0717F2B"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "0758A291-73CC-483A-896D-F8A0679A8DDB": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
+        },
+        "A80CC513-9349-459D-9D4B-327E783D5596": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "A80CC513-9349-459D-9D4B-327E783D5596"
 }

Vocoder.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88536c7f82ce5963c40ab46ab192452ddd1af731ecd4e08a40ea827fc544fbb6
-size 1298694

 version https://git-lfs.github.com/spec/v1
+oid sha256:982ea75647bdbbcec542f1b19b739ac2620e115e75a27a05e5c6eb9794422c41
+size 1298631

Vocoder.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "1865D6B1-DF08-4C5C-8B25-53058EF04D75": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         },
-        "6D12622B-E675-4537-9163-574EA27CA0C1": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         }
     },
-    "rootModelIdentifier": "1865D6B1-DF08-4C5C-8B25-53058EF04D75"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "62101658-5B4C-4B17-B935-8DB8A3E815C9": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         },
+        "DB7344E2-6983-4F26-9909-B8957A9147DE": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         }
     },
+    "rootModelIdentifier": "62101658-5B4C-4B17-B935-8DB8A3E815C9"
 }

manifest.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "model": "plapre-pico",
   "version": "1.0",
-  "context_length": 2048,
   "prefill_length": 512,
   "vocab_size": 20802,
   "num_layers": 30,

 {
   "model": "plapre-pico",
   "version": "1.0",
+  "context_length": 512,
   "prefill_length": 512,
   "vocab_size": 20802,
   "num_layers": 30,

scripts/build.py ADDED Viewed

	@@ -0,0 +1,90 @@

+#!/usr/bin/env python3
+"""
+Unified build entry point for the Plapre Pico CoreML pipeline.
+Builds all three CoreML models (Plapre Pico LLM, Kanade decoder, HiFT vocoder)
+and optionally produces quantized variants of the LLM.
+Usage:
+    python scripts/build.py                                  # build all 3 models
+    python scripts/build.py --quantize int4                  # + PlaprePico_int4.mlpackage
+    python scripts/build.py --quantize int4 --quantize int8  # both quantizations
+    python scripts/build.py --skip llm                       # only rebuild audio models
+    python scripts/build.py --skip audio                     # only rebuild LLM
+    python scripts/build.py --output-dir PATH                # default: repo root
+    python scripts/build.py --num-tokens 100                 # passed through to audio
+"""
+import argparse
+from pathlib import Path
+from convert_llm import convert_llm
+from convert_audio import convert_audio
+from quantize import quantize_model
+REPO_ROOT = Path(__file__).parent.parent
+def _dir_size_mb(path: Path) -> float:
+    if not path.exists():
+        return 0.0
+    return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) / 1e6
+def main():
+    parser = argparse.ArgumentParser(description="Build full Plapre Pico CoreML pipeline")
+    parser.add_argument("--output-dir", type=str, default=str(REPO_ROOT))
+    parser.add_argument("--model-dir", type=str, default=None,
+                        help="Local Plapre Pico HF snapshot (otherwise downloaded)")
+    parser.add_argument("--num-tokens", type=int, default=100,
+                        help="Audio token count for vocoder mel length")
+    parser.add_argument("--quantize", action="append", choices=["int4", "int8"], default=[],
+                        help="Produce quantized LLM variant(s); may be repeated")
+    parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[],
+                        help="Skip a stage")
+    args = parser.parse_args()
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    artifacts: list[Path] = []
+    if "llm" not in args.skip:
+        print("\n========== LLM ==========")
+        llm_path = convert_llm(
+            output_dir=output_dir,
+            model_dir=Path(args.model_dir) if args.model_dir else None,
+        )
+        artifacts.append(llm_path)
+        for filename in ["rope_cos.npy", "rope_sin.npy", "manifest.json",
+                         "tokenizer.json", "speakers.json"]:
+            p = output_dir / filename
+            if p.exists():
+                artifacts.append(p)
+    else:
+        llm_path = output_dir / "PlaprePico.mlpackage"
+    for q in args.quantize:
+        print(f"\n========== Quantize {q} ==========")
+        if not llm_path.exists():
+            print(f"  SKIP: {llm_path} not found (run without --skip llm first)")
+            continue
+        bits = int(q[3:])
+        out = output_dir / f"PlaprePico_{q}.mlpackage"
+        quantize_model(llm_path, out, bits)
+        artifacts.append(out)
+    if "audio" not in args.skip:
+        print("\n========== Audio (Kanade + Vocoder) ==========")
+        kanade_path, vocoder_path = convert_audio(output_dir, args.num_tokens)
+        artifacts.extend([kanade_path, vocoder_path])
+    print("\n========== Build summary ==========")
+    for p in artifacts:
+        size = _dir_size_mb(p) if p.is_dir() else (p.stat().st_size / 1e6 if p.exists() else 0)
+        print(f"  {p.name:40s}  {size:8.1f} MB")
+    print(f"\nOutput directory: {output_dir}")
+if __name__ == "__main__":
+    main()

scripts/{convert_kanade.py → convert_audio.py} RENAMED Viewed

@@ -682,7 +682,13 @@ def main():
         help="Fixed number of audio tokens (determines mel length)",
     )
     args = parser.parse_args()
-    output_dir = Path(args.output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
     print("Loading Kanade model...")
@@ -690,20 +696,20 @@ def main():
     patch_kanade_for_coreml(kanade)
     vocoder = load_vocoder(kanade.config.vocoder_name).eval().float()
-    # Compute mel_length for this token count
     mel_length = kanade._calculate_target_mel_length(
-        kanade._calculate_original_audio_length(args.num_tokens)
     )
     print(f"\n=== Converting Kanade decoder ===")
-    convert_kanade_decoder(kanade, args.num_tokens, output_dir)
     print(f"\n=== Converting full vocoder (mel → waveform) ===")
     convert_full_vocoder(vocoder, mel_length, output_dir)
-    print("\nDone!")
-    print(f"  KanadeDecoder: {args.num_tokens} tokens → mel (80, {mel_length})")
     print(f"  Vocoder: mel (80, {mel_length}) → waveform")
 def convert_full_vocoder(vocoder, mel_length: int, output_dir: Path):

         help="Fixed number of audio tokens (determines mel length)",
     )
     args = parser.parse_args()
+    convert_audio(Path(args.output_dir), args.num_tokens)
+def convert_audio(output_dir: Path, num_tokens: int = 100) -> tuple[Path, Path]:
+    """Convert Kanade decoder + HiFT vocoder to CoreML.
+    Returns (KanadeDecoder.mlpackage, Vocoder.mlpackage) paths."""
     output_dir.mkdir(parents=True, exist_ok=True)
     print("Loading Kanade model...")
     patch_kanade_for_coreml(kanade)
     vocoder = load_vocoder(kanade.config.vocoder_name).eval().float()
     mel_length = kanade._calculate_target_mel_length(
+        kanade._calculate_original_audio_length(num_tokens)
     )
     print(f"\n=== Converting Kanade decoder ===")
+    convert_kanade_decoder(kanade, num_tokens, output_dir)
     print(f"\n=== Converting full vocoder (mel → waveform) ===")
     convert_full_vocoder(vocoder, mel_length, output_dir)
+    print("\nAudio conversion complete!")
+    print(f"  KanadeDecoder: {num_tokens} tokens → mel (80, {mel_length})")
     print(f"  Vocoder: mel (80, {mel_length}) → waveform")
+    return output_dir / "KanadeDecoder.mlpackage", output_dir / "Vocoder.mlpackage"
 def convert_full_vocoder(vocoder, mel_length: int, output_dir: Path):

scripts/{convert.py → convert_llm.py} RENAMED Viewed

@@ -17,6 +17,7 @@ from pathlib import Path
 import numpy as np
 import torch
 import coremltools as ct
 from huggingface_hub import snapshot_download
 from safetensors.torch import load_file
@@ -166,7 +167,7 @@ def convert_decode(model: PlaprePico, output_dir: Path):
     sin = torch.zeros(1, 1, 1, HEAD_DIM, dtype=torch.float16)
     update_mask = torch.zeros(1, 1, MAX_CONTEXT, 1, dtype=torch.float16)
-    update_mask[0, 0, PREFILL_SEQ_LEN, 0] = 1.0
     speaker_embedding = torch.zeros(1, SPEAKER_DIM, dtype=torch.float16)
     is_speaker_step = torch.zeros(1, dtype=torch.float16)
@@ -211,9 +212,78 @@ def convert_decode(model: PlaprePico, output_dir: Path):
         minimum_deployment_target=ct.target.iOS18,
     )
     out_path = output_dir / "PlaprePico.mlpackage"
     mlmodel.save(str(out_path))
     print(f"Saved decode model to {out_path}")
 def copy_assets(model_dir: Path, output_dir: Path):
@@ -251,33 +321,37 @@ def copy_assets(model_dir: Path, output_dir: Path):
     print(f"Wrote manifest to {manifest_path}")
-def main():
-    parser = argparse.ArgumentParser(description="Convert Plapre Pico to CoreML")
-    parser.add_argument("--model-dir", type=str, help="Path to downloaded model directory")
-    parser.add_argument("--output-dir", type=str, default=str(Path(__file__).parent.parent), help="Output directory")
-    args = parser.parse_args()
-    if args.model_dir:
-        model_dir = Path(args.model_dir)
-    else:
         model_dir = download_model()
-    output_dir = Path(args.output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
     weights = load_weights(model_dir)
     print("\n=== Building decode model ===")
     decode = PlaprePico()
     populate_weights(decode, weights)
     decode = decode.half()
-    convert_decode(decode, output_dir)
     print("\n=== Copying assets ===")
     copy_assets(model_dir, output_dir)
-    print("\nConversion complete!")
-    print(f"Output: {output_dir}")
 if __name__ == "__main__":

 import numpy as np
 import torch
 import coremltools as ct
+from coremltools.converters.mil.mil import Builder as mb
 from huggingface_hub import snapshot_download
 from safetensors.torch import load_file
     sin = torch.zeros(1, 1, 1, HEAD_DIM, dtype=torch.float16)
     update_mask = torch.zeros(1, 1, MAX_CONTEXT, 1, dtype=torch.float16)
+    update_mask[0, 0, 0, 0] = 1.0  # any valid position for tracing
     speaker_embedding = torch.zeros(1, SPEAKER_DIM, dtype=torch.float16)
     is_speaker_step = torch.zeros(1, dtype=torch.float16)
         minimum_deployment_target=ct.target.iOS18,
     )
+    inject_state_updates(mlmodel)
     out_path = output_dir / "PlaprePico.mlpackage"
     mlmodel.save(str(out_path))
     print(f"Saved decode model to {out_path}")
+    return out_path
+def inject_state_updates(mlmodel):
+    """Inject coreml_update_state ops into a converted stateful CoreML model.
+    torch.jit.trace doesn't emit prim::SetAttr for buffer mutations, so coremltools
+    can't generate coreml_update_state ops automatically. This walks the MIL graph,
+    finds the read_state -> (cast?) -> mul -> add cache update pattern, and inserts
+    coreml_update_state ops before the first consumer of each cache update.
+    """
+    prog = mlmodel._mil_program
+    main_fn = prog.functions["main"]
+    read_ops = list(main_fn.find_ops(op_type="read_state"))
+    print(f"Found {len(read_ops)} read_state ops")
+    updates = []
+    for read_op in read_ops:
+        state_var = read_op.inputs["input"]
+        output = read_op.outputs[0]
+        # FLOAT32: read_state -> cast(fp16->fp32) -> mul -> add
+        # FLOAT16: read_state -> mul -> add
+        first_child = output.child_ops[0]
+        search_output = first_child.outputs[0] if first_child.op_type == "cast" else output
+        mul_op = next((c for c in search_output.child_ops if c.op_type == "mul"), None)
+        if mul_op is None:
+            print(f"  WARNING: no mul found for {state_var.name}")
+            continue
+        add_op = next((c for c in mul_op.outputs[0].child_ops if c.op_type == "add"), None)
+        if add_op is None:
+            print(f"  WARNING: no add found for {state_var.name}")
+            continue
+        updates.append((state_var, add_op))
+    print(f"Injecting {len(updates)} coreml_update_state ops...")
+    block = main_fn.find_ops(op_type="read_state")[0].enclosing_block
+    with block:
+        for state_var, add_op in updates:
+            add_out = add_op.outputs[0]
+            consumers = list(add_out.child_ops)
+            if not consumers:
+                print(f"  WARNING: no consumers for {state_var.name} add output")
+                continue
+            first_consumer = consumers[0]
+            with mb.set_before_op(before_op=first_consumer):
+                if str(add_out.dtype) == "fp16":
+                    state_val = add_out
+                else:
+                    state_val = mb.cast(
+                        x=add_out, dtype="fp16",
+                        name=f"state_cast_{state_var.name}",
+                    )
+                mb.coreml_update_state(
+                    state=state_var, value=state_val,
+                    name=f"state_update_{state_var.name}",
+                )
+    prog_str = str(prog)
+    print(f"  read_state: {prog_str.count('read_state')}")
+    print(f"  coreml_update_state: {prog_str.count('coreml_update_state')}")
 def copy_assets(model_dir: Path, output_dir: Path):
     print(f"Wrote manifest to {manifest_path}")
+def convert_llm(output_dir: Path, model_dir: Path | None = None) -> Path:
+    """Convert Plapre Pico LLM end-to-end: download → load → trace → convert →
+    inject state updates → copy assets. Returns path to PlaprePico.mlpackage."""
+    if model_dir is None:
         model_dir = download_model()
     output_dir.mkdir(parents=True, exist_ok=True)
     weights = load_weights(model_dir)
     print("\n=== Building decode model ===")
     decode = PlaprePico()
     populate_weights(decode, weights)
     decode = decode.half()
+    out_path = convert_decode(decode, output_dir)
     print("\n=== Copying assets ===")
     copy_assets(model_dir, output_dir)
+    print(f"\nLLM conversion complete: {out_path}")
+    return out_path
+def main():
+    parser = argparse.ArgumentParser(description="Convert Plapre Pico LLM to CoreML")
+    parser.add_argument("--model-dir", type=str, help="Path to downloaded model directory")
+    parser.add_argument("--output-dir", type=str, default=str(Path(__file__).parent.parent), help="Output directory")
+    args = parser.parse_args()
+    convert_llm(
+        output_dir=Path(args.output_dir),
+        model_dir=Path(args.model_dir) if args.model_dir else None,
+    )
 if __name__ == "__main__":

scripts/inject_state_updates.py DELETED Viewed

@@ -1,172 +0,0 @@
-#!/usr/bin/env python3
-"""
-Post-process a traced CoreML model to inject coreml_update_state ops.
-torch.jit.trace doesn't emit prim::SetAttr for buffer mutations, so coremltools
-can't generate coreml_update_state ops automatically. This script:
-1. Loads the converted model
-2. Finds the read_state -> computation -> cache update pattern
-3. Injects coreml_update_state after each cache update
-4. Saves the fixed model
-"""
-import sys
-import numpy as np
-import torch
-import coremltools as ct
-from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil import types
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent))
-from model_wrapper import (
-    PlaprePico, HIDDEN_SIZE, HEAD_DIM, MAX_CONTEXT,
-    NUM_KV_HEADS, NUM_LAYERS, VOCAB_SIZE, SPEAKER_DIM,
-)
-from convert import load_weights, _map_weight_name, build_kv_cache_states
-from huggingface_hub import snapshot_download
-def convert_and_fix_decode(output_path: Path):
-    """Convert decode model and inject coreml_update_state ops."""
-    model_dir = Path(snapshot_download("syvai/plapre-pico"))
-    weights = load_weights(model_dir)
-    model = PlaprePico()
-    sd = model.state_dict()
-    ns = {}
-    for k, v in weights.items():
-        n = _map_weight_name(k)
-        if n and n in sd and sd[n].shape == v.shape:
-            ns[n] = v
-    model.load_state_dict(ns, strict=False)
-    model = model.half().eval()
-    inputs = (
-        torch.zeros(1, 1, dtype=torch.int32),
-        torch.full((1, 1, 1, MAX_CONTEXT), float("-inf"), dtype=torch.float16),
-        torch.zeros(1, 1, 1, HEAD_DIM, dtype=torch.float16),
-        torch.zeros(1, 1, 1, HEAD_DIM, dtype=torch.float16),
-        torch.zeros(1, 1, MAX_CONTEXT, 1, dtype=torch.float16),
-        torch.zeros(1, SPEAKER_DIM, dtype=torch.float16),
-        torch.zeros(1, dtype=torch.float16),
-    )
-    print("Tracing...")
-    with torch.no_grad():
-        traced = torch.jit.trace(model, inputs)
-    print("Converting to CoreML...")
-    mlmodel = ct.convert(
-        traced,
-        inputs=[
-            ct.TensorType(name="input_ids", shape=(1, 1), dtype=np.int32),
-            ct.TensorType(name="causal_mask", shape=(1, 1, 1, MAX_CONTEXT), dtype=np.float16),
-            ct.TensorType(name="cos", shape=(1, 1, 1, HEAD_DIM), dtype=np.float16),
-            ct.TensorType(name="sin", shape=(1, 1, 1, HEAD_DIM), dtype=np.float16),
-            ct.TensorType(name="update_mask", shape=(1, 1, MAX_CONTEXT, 1), dtype=np.float16),
-            ct.TensorType(name="speaker_embedding", shape=(1, SPEAKER_DIM), dtype=np.float16),
-            ct.TensorType(name="is_speaker_step", shape=(1,), dtype=np.float16),
-        ],
-        outputs=[ct.TensorType(name="logits", dtype=np.float16)],
-        states=build_kv_cache_states(),
-        compute_precision=ct.precision.FLOAT16,
-        minimum_deployment_target=ct.target.iOS18,
-    )
-    prog = mlmodel._mil_program
-    main_fn = prog.functions["main"]
-    # Find all read_state ops and trace to their cache update (add) ops
-    read_ops = list(main_fn.find_ops(op_type="read_state"))
-    print(f"Found {len(read_ops)} read_state ops")
-    updates = []
-    for read_op in read_ops:
-        state_var = read_op.inputs["input"]
-        output = read_op.outputs[0]
-        # Follow the graph from read_state to the cache update (add) op.
-        # With FLOAT32 precision: read_state -> cast(fp16->fp32) -> mul -> add
-        # With FLOAT16 precision: read_state -> mul -> add (no cast needed)
-        first_child = output.child_ops[0]
-        if first_child.op_type == "cast":
-            search_output = first_child.outputs[0]
-        else:
-            search_output = output
-        mul_op = None
-        for child in search_output.child_ops:
-            if child.op_type == "mul":
-                mul_op = child
-                break
-        if mul_op is None:
-            print(f"  WARNING: no mul found for {state_var.name}")
-            continue
-        mul_out = mul_op.outputs[0]
-        add_op = None
-        for child in mul_out.child_ops:
-            if child.op_type == "add":
-                add_op = child
-                break
-        if add_op is None:
-            print(f"  WARNING: no add found for {state_var.name}")
-            continue
-        updates.append((state_var, add_op))
-    print(f"Injecting {len(updates)} coreml_update_state ops...")
-    # Get the block
-    block = main_fn.find_ops(op_type="read_state")[0].enclosing_block
-    injected = 0
-    with block:
-        for state_var, add_op in updates:
-            add_out = add_op.outputs[0]
-            # Find the first consumer of add_out to insert before it
-            consumers = list(add_out.child_ops)
-            if not consumers:
-                print(f"  WARNING: no consumers for {state_var.name} add output")
-                continue
-            # Insert cast fp32->fp16 and coreml_update_state before the first consumer
-            first_consumer = consumers[0]
-            with mb.set_before_op(before_op=first_consumer):
-                # Cast to fp16 if needed (fp32 precision produces fp32 add output)
-                if str(add_out.dtype) == "fp16":
-                    state_val = add_out
-                else:
-                    state_val = mb.cast(
-                        x=add_out, dtype="fp16",
-                        name=f"state_cast_{state_var.name}",
-                    )
-                # Write updated cache back to state
-                updated_val = mb.coreml_update_state(
-                    state=state_var, value=state_val,
-                    name=f"state_update_{state_var.name}",
-                )
-            injected += 1
-    # Verify state injection
-    prog_str = str(prog)
-    print(f"After state injection:")
-    print(f"  read_state: {prog_str.count('read_state')}")
-    print(f"  coreml_update_state: {prog_str.count('coreml_update_state')}")
-    print(f"Saving to {output_path}...")
-    mlmodel.save(str(output_path))
-    print("Done!")
-if __name__ == "__main__":
-    output = Path(__file__).parent.parent / "PlaprePico.mlpackage"
-    convert_and_fix_decode(output)

scripts/mixed_precision.py DELETED Viewed

@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert PlaprePico with mixed precision: fp16 matmuls, fp32 RMSNorm+softmax.
-Uses coremltools' FP16ComputePrecision pass with op_selector to selectively
-downcast ops to fp16 while keeping numerically sensitive ops in fp32.
-"""
-import sys
-import numpy as np
-import torch
-import coremltools as ct
-from coremltools.converters.mil.mil import Builder as mb
-from coremltools.converters.mil.mil.passes.defs.quantization import FP16ComputePrecision
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent))
-from model_wrapper import (
-    PlaprePico, HIDDEN_SIZE, HEAD_DIM, MAX_CONTEXT,
-    NUM_KV_HEADS, NUM_LAYERS, VOCAB_SIZE, SPEAKER_DIM,
-)
-from convert import load_weights, _map_weight_name, build_kv_cache_states
-from huggingface_hub import snapshot_download
-# Ops that MUST stay in fp32 (overflow-prone)
-FP32_OPS = {"reduce_mean", "softmax", "rsqrt", "reduce_sum", "pow"}
-def mixed_precision_selector(op):
-    """Return True if this op should be cast to fp16, False to keep in fp32."""
-    if op.op_type in FP32_OPS:
-        return False
-    return True
-def convert_mixed_precision(output_path: Path):
-    """Convert with mixed precision: fp16 everywhere except RMSNorm+softmax."""
-    model_dir = Path(snapshot_download("syvai/plapre-pico"))
-    weights = load_weights(model_dir)
-    model = PlaprePico()
-    sd = model.state_dict()
-    ns = {}
-    for k, v in weights.items():
-        n = _map_weight_name(k)
-        if n and n in sd and sd[n].shape == v.shape:
-            ns[n] = v
-    model.load_state_dict(ns, strict=False)
-    model = model.half().eval()
-    inputs = (
-        torch.zeros(1, 1, dtype=torch.int32),
-        torch.full((1, 1, 1, MAX_CONTEXT), float("-inf"), dtype=torch.float16),
-        torch.zeros(1, 1, 1, HEAD_DIM, dtype=torch.float16),
-        torch.zeros(1, 1, 1, HEAD_DIM, dtype=torch.float16),
-        torch.zeros(1, 1, MAX_CONTEXT, 1, dtype=torch.float16),
-        torch.zeros(1, SPEAKER_DIM, dtype=torch.float16),
-        torch.zeros(1, dtype=torch.float16),
-    )
-    print("Tracing...")
-    with torch.no_grad():
-        traced = torch.jit.trace(model, inputs)
-    # Convert with NO automatic precision — we'll apply it manually
-    print("Converting to CoreML (no precision pass)...")
-    mlmodel = ct.convert(
-        traced,
-        inputs=[
-            ct.TensorType(name="input_ids", shape=(1, 1), dtype=np.int32),
-            ct.TensorType(name="causal_mask", shape=(1, 1, 1, MAX_CONTEXT), dtype=np.float16),
-            ct.TensorType(name="cos", shape=(1, 1, 1, HEAD_DIM), dtype=np.float16),
-            ct.TensorType(name="sin", shape=(1, 1, 1, HEAD_DIM), dtype=np.float16),
-            ct.TensorType(name="update_mask", shape=(1, 1, MAX_CONTEXT, 1), dtype=np.float16),
-            ct.TensorType(name="speaker_embedding", shape=(1, SPEAKER_DIM), dtype=np.float16),
-            ct.TensorType(name="is_speaker_step", shape=(1,), dtype=np.float16),
-        ],
-        outputs=[ct.TensorType(name="logits", dtype=np.float16)],
-        states=build_kv_cache_states(),
-        compute_precision=ct.precision.FLOAT32,
-        minimum_deployment_target=ct.target.iOS18,
-    )
-    prog = mlmodel._mil_program
-    main_fn = prog.functions["main"]
-    # === Step 1: Inject coreml_update_state ops FIRST (before fp16 pass changes graph) ===
-    print("Injecting coreml_update_state ops...")
-    read_ops = list(main_fn.find_ops(op_type="read_state"))
-    print(f"  Found {len(read_ops)} read_state ops")
-    updates = []
-    for read_op in read_ops:
-        state_var = read_op.inputs["input"]
-        output = read_op.outputs[0]
-        first_child = output.child_ops[0]
-        if first_child.op_type == "cast":
-            search_output = first_child.outputs[0]
-        else:
-            search_output = output
-        mul_op = None
-        for child in search_output.child_ops:
-            if child.op_type == "mul":
-                mul_op = child
-                break
-        if mul_op is None:
-            print(f"  WARNING: no mul found for {state_var.name}")
-            continue
-        mul_out = mul_op.outputs[0]
-        add_op = None
-        for child in mul_out.child_ops:
-            if child.op_type == "add":
-                add_op = child
-                break
-        if add_op is None:
-            print(f"  WARNING: no add found for {state_var.name}")
-            continue
-        updates.append((state_var, add_op))
-    print(f"  Injecting {len(updates)} coreml_update_state ops...")
-    block = main_fn.find_ops(op_type="read_state")[0].enclosing_block
-    with block:
-        for state_var, add_op in updates:
-            add_out = add_op.outputs[0]
-            consumers = list(add_out.child_ops)
-            if not consumers:
-                continue
-            first_consumer = consumers[0]
-            with mb.set_before_op(before_op=first_consumer):
-                if str(add_out.dtype) == "fp16":
-                    state_val = add_out
-                else:
-                    state_val = mb.cast(
-                        x=add_out, dtype="fp16",
-                        name=f"state_cast_{state_var.name}",
-                    )
-                mb.coreml_update_state(
-                    state=state_var, value=state_val,
-                    name=f"state_update_{state_var.name}",
-                )
-    prog_str = str(prog)
-    print(f"  read_state: {prog_str.count('read_state')}")
-    print(f"  coreml_update_state: {prog_str.count('coreml_update_state')}")
-    # === Step 2: Apply selective fp16 cast pass ===
-    print("\nApplying mixed precision (fp16 matmuls, fp32 RMSNorm+softmax)...")
-    mil_str = str(prog)
-    print(f"  Before: {mil_str.count('cast')} cast ops")
-    fp16_pass = FP16ComputePrecision(op_selector=mixed_precision_selector)
-    fp16_pass.apply(prog)
-    mil_str = str(prog)
-    print(f"  After: {mil_str.count('cast')} cast ops")
-    # Verify sensitive ops stayed fp32
-    fp32_softmax = sum(1 for line in mil_str.split('\n') if 'softmax' in line and 'fp32' in line)
-    fp32_reduce = sum(1 for line in mil_str.split('\n') if 'reduce_mean' in line and 'fp32' in line)
-    fp16_linear = sum(1 for line in mil_str.split('\n') if 'linear' in line and 'fp16' in line)
-    fp16_matmul = sum(1 for line in mil_str.split('\n') if 'matmul' in line and 'fp16' in line)
-    print(f"  fp32 softmax: {fp32_softmax}, fp32 reduce_mean: {fp32_reduce}")
-    print(f"  fp16 linear: {fp16_linear}, fp16 matmul: {fp16_matmul}")
-    print(f"\nSaving to {output_path}...")
-    mlmodel.save(str(output_path))
-    print("Done!")
-if __name__ == "__main__":
-    output = Path(__file__).parent.parent / "PlaprePico.mlpackage"
-    convert_mixed_precision(output)