Remove int4 variant which had too poor quality

Files changed (8) hide show

PlaprePico_int4.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -3
PlaprePico_int4.mlpackage/Data/com.apple.CoreML/weights/weight.bin +0 -3
PlaprePico_int4.mlpackage/Manifest.json +0 -18
README.md +0 -1
scripts/build.py +1 -1
scripts/quantize.py +5 -9
swift-cli/Sources/Configuration.swift +3 -4
swift-cli/Sources/main.swift +4 -7

PlaprePico_int4.mlpackage/Data/com.apple.CoreML/model.mlmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a156dd782a9aabc95ce6b1e1a34b05a4c71557a96e36cb69e07833a65c530e04
-size 986129

PlaprePico_int4.mlpackage/Data/com.apple.CoreML/weights/weight.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dbfed1ebb98d1541a462d6ec13e11b22d086c6ea7088ecdcc1c3c687fd99ee2f
-size 59614916

PlaprePico_int4.mlpackage/Manifest.json DELETED Viewed

@@ -1,18 +0,0 @@
-{
-    "fileFormatVersion": "1.0.0",
-    "itemInfoEntries": {
-        "ABEB1845-9A9A-4CDB-AACD-335B4EEE0328": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Specification",
-            "name": "model.mlmodel",
-            "path": "com.apple.CoreML/model.mlmodel"
-        },
-        "F3186CBA-EC22-47C9-AAD1-AE8E1C7669C8": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Weights",
-            "name": "weights",
-            "path": "com.apple.CoreML/weights"
-        }
-    },
-    "rootModelIdentifier": "ABEB1845-9A9A-4CDB-AACD-335B4EEE0328"
-}

README.md CHANGED Viewed

@@ -25,7 +25,6 @@ CoreML conversion of [syvai/plapre-pico](https://huggingface.co/syvai/plapre-pic
 | `KanadeDecoder.mlpackage` | Audio tokens + speaker → mel spectrogram | ~348MB |
 | `Vocoder.mlpackage` | Mel → waveform (F0 + source gen + HiFT + iSTFT baked in) | ~67MB |
 | `PlaprePico_int8.mlpackage` | int8 quantized LLM (comparable quality) | ~120MB |
-| `PlaprePico_int4.mlpackage` | int4 quantized LLM (some quality loss) | ~61MB |
 ## Performance (iPhone 15 / A16, CPU Only)

 | `KanadeDecoder.mlpackage` | Audio tokens + speaker → mel spectrogram | ~348MB |
 | `Vocoder.mlpackage` | Mel → waveform (F0 + source gen + HiFT + iSTFT baked in) | ~67MB |
 | `PlaprePico_int8.mlpackage` | int8 quantized LLM (comparable quality) | ~120MB |
 ## Performance (iPhone 15 / A16, CPU Only)

scripts/build.py CHANGED Viewed

@@ -39,7 +39,7 @@ def main():
                         help="Local Plapre Pico HF snapshot (otherwise downloaded)")
     parser.add_argument("--num-tokens", type=int, default=100,
                         help="Audio token count for vocoder mel length")
-    parser.add_argument("--quantize", action="append", choices=["int4", "int8"], default=[],
                         help="Produce quantized LLM variant(s); may be repeated")
     parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[],
                         help="Skip a stage")

                         help="Local Plapre Pico HF snapshot (otherwise downloaded)")
     parser.add_argument("--num-tokens", type=int, default=100,
                         help="Audio token count for vocoder mel length")
+    parser.add_argument("--quantize", action="append", choices=["int8"], default=[],
                         help="Produce quantized LLM variant(s); may be repeated")
     parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[],
                         help="Skip a stage")

scripts/quantize.py CHANGED Viewed

@@ -2,11 +2,11 @@
 """
 Post-training weight quantization for PlaprePico CoreML model.
-Applies int4 or int8 linear weight quantization to reduce model size
 and improve inference speed (less memory bandwidth).
 Usage:
-    python quantize.py [--bits 4|8] [--input PATH] [--output PATH]
 """
 import argparse
@@ -20,7 +20,7 @@ from coremltools.optimize.coreml import (
 )
-def quantize_model(input_path: Path, output_path: Path, nbits: int = 4):
     print(f"Loading {input_path}...")
     model = ct.models.MLModel(str(input_path), compute_units=ct.ComputeUnit.CPU_ONLY)
@@ -54,19 +54,15 @@ def quantize_model(input_path: Path, output_path: Path, nbits: int = 4):
 def main():
     parser = argparse.ArgumentParser(description="Quantize PlaprePico model")
-    parser.add_argument("--bits", type=int, default=4, choices=[4, 8])
     parser.add_argument("--input", type=str,
                         default=str(Path(__file__).parent.parent / "PlaprePico.mlpackage"))
     parser.add_argument("--output", type=str, default=None)
     args = parser.parse_args()
     input_path = Path(args.input)
-    if args.output:
-        output_path = Path(args.output)
-    else:
-        output_path = input_path.parent / f"PlaprePico_int{args.bits}.mlpackage"
-    quantize_model(input_path, output_path, args.bits)
 if __name__ == "__main__":

 """
 Post-training weight quantization for PlaprePico CoreML model.
+Applies int8 linear weight quantization to reduce model size
 and improve inference speed (less memory bandwidth).
 Usage:
+    python quantize.py [--input PATH] [--output PATH]
 """
 import argparse
 )
+def quantize_model(input_path: Path, output_path: Path, nbits: int = 8):
     print(f"Loading {input_path}...")
     model = ct.models.MLModel(str(input_path), compute_units=ct.ComputeUnit.CPU_ONLY)
 def main():
     parser = argparse.ArgumentParser(description="Quantize PlaprePico model")
     parser.add_argument("--input", type=str,
                         default=str(Path(__file__).parent.parent / "PlaprePico.mlpackage"))
     parser.add_argument("--output", type=str, default=None)
     args = parser.parse_args()
     input_path = Path(args.input)
+    output_path = Path(args.output) if args.output else input_path.parent / "PlaprePico_int8.mlpackage"
+    quantize_model(input_path, output_path, 8)
 if __name__ == "__main__":

swift-cli/Sources/Configuration.swift CHANGED Viewed

@@ -43,10 +43,9 @@ enum PlapreConfig {
     }
     /// Returns the URL for a model package, accounting for quantization flags.
-    static func modelURL(for name: String, useInt4: Bool, useInt8: Bool) -> URL {
-        if name == "PlaprePico" {
-            if useInt4 { return repoRoot.appendingPathComponent("PlaprePico_int4.mlpackage") }
-            if useInt8 { return repoRoot.appendingPathComponent("PlaprePico_int8.mlpackage") }
         }
         return repoRoot.appendingPathComponent("\(name).mlpackage")
     }

     }
     /// Returns the URL for a model package, accounting for quantization flags.
+    static func modelURL(for name: String, useInt8: Bool) -> URL {
+        if name == "PlaprePico" && useInt8 {
+            return repoRoot.appendingPathComponent("PlaprePico_int8.mlpackage")
         }
         return repoRoot.appendingPathComponent("\(name).mlpackage")
     }

swift-cli/Sources/main.swift CHANGED Viewed

@@ -22,10 +22,7 @@ struct PlapreCLI: AsyncParsableCommand {
     @Option(name: .shortAndLong, help: "Output WAV file path")
     var output: String = "output.wav"
-    @Flag(name: .long, help: "Use int4 quantized model (smaller, faster)")
-    var int4 = false
-    @Flag(name: .long, help: "Use int8 quantized model (balanced)")
     var int8 = false
     // MARK: - Run Pipeline
@@ -74,15 +71,15 @@ struct PlapreCLI: AsyncParsableCommand {
         print("\nCompiling models...")
         let decodeModel = try measure("Compile PlaprePico") {
             try compileModel(
-                at: PlapreConfig.modelURL(for: "PlaprePico", useInt4: int4, useInt8: int8))
         }
         let kanadeModel = try measure("Compile KanadeDecoder") {
             try compileModel(
-                at: PlapreConfig.modelURL(for: "KanadeDecoder", useInt4: false, useInt8: false))
         }
         let vocoderModel = try measure("Compile Vocoder") {
             try compileModel(
-                at: PlapreConfig.modelURL(for: "Vocoder", useInt4: false, useInt8: false))
         }
         // Pre-allocate MLMultiArrays (performance-critical: single allocation, reused for all steps)

     @Option(name: .shortAndLong, help: "Output WAV file path")
     var output: String = "output.wav"
+    @Flag(name: .long, help: "Use int8 quantized model (smaller)")
     var int8 = false
     // MARK: - Run Pipeline
         print("\nCompiling models...")
         let decodeModel = try measure("Compile PlaprePico") {
             try compileModel(
+                at: PlapreConfig.modelURL(for: "PlaprePico", useInt8: int8))
         }
         let kanadeModel = try measure("Compile KanadeDecoder") {
             try compileModel(
+                at: PlapreConfig.modelURL(for: "KanadeDecoder", useInt8: false))
         }
         let vocoderModel = try measure("Compile Vocoder") {
             try compileModel(
+                at: PlapreConfig.modelURL(for: "Vocoder", useInt8: false))
         }
         // Pre-allocate MLMultiArrays (performance-critical: single allocation, reused for all steps)