Daniel Rothmann commited on
Commit ·
5ed6654
1
Parent(s): e79fa0a
Remove int4 variant which had too poor quality
Browse files- PlaprePico_int4.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -3
- PlaprePico_int4.mlpackage/Data/com.apple.CoreML/weights/weight.bin +0 -3
- PlaprePico_int4.mlpackage/Manifest.json +0 -18
- README.md +0 -1
- scripts/build.py +1 -1
- scripts/quantize.py +5 -9
- swift-cli/Sources/Configuration.swift +3 -4
- swift-cli/Sources/main.swift +4 -7
PlaprePico_int4.mlpackage/Data/com.apple.CoreML/model.mlmodel
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a156dd782a9aabc95ce6b1e1a34b05a4c71557a96e36cb69e07833a65c530e04
|
| 3 |
-
size 986129
|
|
|
|
|
|
|
|
|
|
|
|
PlaprePico_int4.mlpackage/Data/com.apple.CoreML/weights/weight.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:dbfed1ebb98d1541a462d6ec13e11b22d086c6ea7088ecdcc1c3c687fd99ee2f
|
| 3 |
-
size 59614916
|
|
|
|
|
|
|
|
|
|
|
|
PlaprePico_int4.mlpackage/Manifest.json
DELETED
|
@@ -1,18 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"fileFormatVersion": "1.0.0",
|
| 3 |
-
"itemInfoEntries": {
|
| 4 |
-
"ABEB1845-9A9A-4CDB-AACD-335B4EEE0328": {
|
| 5 |
-
"author": "com.apple.CoreML",
|
| 6 |
-
"description": "CoreML Model Specification",
|
| 7 |
-
"name": "model.mlmodel",
|
| 8 |
-
"path": "com.apple.CoreML/model.mlmodel"
|
| 9 |
-
},
|
| 10 |
-
"F3186CBA-EC22-47C9-AAD1-AE8E1C7669C8": {
|
| 11 |
-
"author": "com.apple.CoreML",
|
| 12 |
-
"description": "CoreML Model Weights",
|
| 13 |
-
"name": "weights",
|
| 14 |
-
"path": "com.apple.CoreML/weights"
|
| 15 |
-
}
|
| 16 |
-
},
|
| 17 |
-
"rootModelIdentifier": "ABEB1845-9A9A-4CDB-AACD-335B4EEE0328"
|
| 18 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -25,7 +25,6 @@ CoreML conversion of [syvai/plapre-pico](https://huggingface.co/syvai/plapre-pic
|
|
| 25 |
| `KanadeDecoder.mlpackage` | Audio tokens + speaker → mel spectrogram | ~348MB |
|
| 26 |
| `Vocoder.mlpackage` | Mel → waveform (F0 + source gen + HiFT + iSTFT baked in) | ~67MB |
|
| 27 |
| `PlaprePico_int8.mlpackage` | int8 quantized LLM (comparable quality) | ~120MB |
|
| 28 |
-
| `PlaprePico_int4.mlpackage` | int4 quantized LLM (some quality loss) | ~61MB |
|
| 29 |
|
| 30 |
## Performance (iPhone 15 / A16, CPU Only)
|
| 31 |
|
|
|
|
| 25 |
| `KanadeDecoder.mlpackage` | Audio tokens + speaker → mel spectrogram | ~348MB |
|
| 26 |
| `Vocoder.mlpackage` | Mel → waveform (F0 + source gen + HiFT + iSTFT baked in) | ~67MB |
|
| 27 |
| `PlaprePico_int8.mlpackage` | int8 quantized LLM (comparable quality) | ~120MB |
|
|
|
|
| 28 |
|
| 29 |
## Performance (iPhone 15 / A16, CPU Only)
|
| 30 |
|
scripts/build.py
CHANGED
|
@@ -39,7 +39,7 @@ def main():
|
|
| 39 |
help="Local Plapre Pico HF snapshot (otherwise downloaded)")
|
| 40 |
parser.add_argument("--num-tokens", type=int, default=100,
|
| 41 |
help="Audio token count for vocoder mel length")
|
| 42 |
-
parser.add_argument("--quantize", action="append", choices=["
|
| 43 |
help="Produce quantized LLM variant(s); may be repeated")
|
| 44 |
parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[],
|
| 45 |
help="Skip a stage")
|
|
|
|
| 39 |
help="Local Plapre Pico HF snapshot (otherwise downloaded)")
|
| 40 |
parser.add_argument("--num-tokens", type=int, default=100,
|
| 41 |
help="Audio token count for vocoder mel length")
|
| 42 |
+
parser.add_argument("--quantize", action="append", choices=["int8"], default=[],
|
| 43 |
help="Produce quantized LLM variant(s); may be repeated")
|
| 44 |
parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[],
|
| 45 |
help="Skip a stage")
|
scripts/quantize.py
CHANGED
|
@@ -2,11 +2,11 @@
|
|
| 2 |
"""
|
| 3 |
Post-training weight quantization for PlaprePico CoreML model.
|
| 4 |
|
| 5 |
-
Applies
|
| 6 |
and improve inference speed (less memory bandwidth).
|
| 7 |
|
| 8 |
Usage:
|
| 9 |
-
python quantize.py [--
|
| 10 |
"""
|
| 11 |
|
| 12 |
import argparse
|
|
@@ -20,7 +20,7 @@ from coremltools.optimize.coreml import (
|
|
| 20 |
)
|
| 21 |
|
| 22 |
|
| 23 |
-
def quantize_model(input_path: Path, output_path: Path, nbits: int =
|
| 24 |
print(f"Loading {input_path}...")
|
| 25 |
model = ct.models.MLModel(str(input_path), compute_units=ct.ComputeUnit.CPU_ONLY)
|
| 26 |
|
|
@@ -54,19 +54,15 @@ def quantize_model(input_path: Path, output_path: Path, nbits: int = 4):
|
|
| 54 |
|
| 55 |
def main():
|
| 56 |
parser = argparse.ArgumentParser(description="Quantize PlaprePico model")
|
| 57 |
-
parser.add_argument("--bits", type=int, default=4, choices=[4, 8])
|
| 58 |
parser.add_argument("--input", type=str,
|
| 59 |
default=str(Path(__file__).parent.parent / "PlaprePico.mlpackage"))
|
| 60 |
parser.add_argument("--output", type=str, default=None)
|
| 61 |
args = parser.parse_args()
|
| 62 |
|
| 63 |
input_path = Path(args.input)
|
| 64 |
-
if args.output
|
| 65 |
-
output_path = Path(args.output)
|
| 66 |
-
else:
|
| 67 |
-
output_path = input_path.parent / f"PlaprePico_int{args.bits}.mlpackage"
|
| 68 |
|
| 69 |
-
quantize_model(input_path, output_path,
|
| 70 |
|
| 71 |
|
| 72 |
if __name__ == "__main__":
|
|
|
|
| 2 |
"""
|
| 3 |
Post-training weight quantization for PlaprePico CoreML model.
|
| 4 |
|
| 5 |
+
Applies int8 linear weight quantization to reduce model size
|
| 6 |
and improve inference speed (less memory bandwidth).
|
| 7 |
|
| 8 |
Usage:
|
| 9 |
+
python quantize.py [--input PATH] [--output PATH]
|
| 10 |
"""
|
| 11 |
|
| 12 |
import argparse
|
|
|
|
| 20 |
)
|
| 21 |
|
| 22 |
|
| 23 |
+
def quantize_model(input_path: Path, output_path: Path, nbits: int = 8):
|
| 24 |
print(f"Loading {input_path}...")
|
| 25 |
model = ct.models.MLModel(str(input_path), compute_units=ct.ComputeUnit.CPU_ONLY)
|
| 26 |
|
|
|
|
| 54 |
|
| 55 |
def main():
|
| 56 |
parser = argparse.ArgumentParser(description="Quantize PlaprePico model")
|
|
|
|
| 57 |
parser.add_argument("--input", type=str,
|
| 58 |
default=str(Path(__file__).parent.parent / "PlaprePico.mlpackage"))
|
| 59 |
parser.add_argument("--output", type=str, default=None)
|
| 60 |
args = parser.parse_args()
|
| 61 |
|
| 62 |
input_path = Path(args.input)
|
| 63 |
+
output_path = Path(args.output) if args.output else input_path.parent / "PlaprePico_int8.mlpackage"
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
quantize_model(input_path, output_path, 8)
|
| 66 |
|
| 67 |
|
| 68 |
if __name__ == "__main__":
|
swift-cli/Sources/Configuration.swift
CHANGED
|
@@ -43,10 +43,9 @@ enum PlapreConfig {
|
|
| 43 |
}
|
| 44 |
|
| 45 |
/// Returns the URL for a model package, accounting for quantization flags.
|
| 46 |
-
static func modelURL(for name: String,
|
| 47 |
-
if name == "PlaprePico" {
|
| 48 |
-
|
| 49 |
-
if useInt8 { return repoRoot.appendingPathComponent("PlaprePico_int8.mlpackage") }
|
| 50 |
}
|
| 51 |
return repoRoot.appendingPathComponent("\(name).mlpackage")
|
| 52 |
}
|
|
|
|
| 43 |
}
|
| 44 |
|
| 45 |
/// Returns the URL for a model package, accounting for quantization flags.
|
| 46 |
+
static func modelURL(for name: String, useInt8: Bool) -> URL {
|
| 47 |
+
if name == "PlaprePico" && useInt8 {
|
| 48 |
+
return repoRoot.appendingPathComponent("PlaprePico_int8.mlpackage")
|
|
|
|
| 49 |
}
|
| 50 |
return repoRoot.appendingPathComponent("\(name).mlpackage")
|
| 51 |
}
|
swift-cli/Sources/main.swift
CHANGED
|
@@ -22,10 +22,7 @@ struct PlapreCLI: AsyncParsableCommand {
|
|
| 22 |
@Option(name: .shortAndLong, help: "Output WAV file path")
|
| 23 |
var output: String = "output.wav"
|
| 24 |
|
| 25 |
-
@Flag(name: .long, help: "Use
|
| 26 |
-
var int4 = false
|
| 27 |
-
|
| 28 |
-
@Flag(name: .long, help: "Use int8 quantized model (balanced)")
|
| 29 |
var int8 = false
|
| 30 |
|
| 31 |
// MARK: - Run Pipeline
|
|
@@ -74,15 +71,15 @@ struct PlapreCLI: AsyncParsableCommand {
|
|
| 74 |
print("\nCompiling models...")
|
| 75 |
let decodeModel = try measure("Compile PlaprePico") {
|
| 76 |
try compileModel(
|
| 77 |
-
at: PlapreConfig.modelURL(for: "PlaprePico",
|
| 78 |
}
|
| 79 |
let kanadeModel = try measure("Compile KanadeDecoder") {
|
| 80 |
try compileModel(
|
| 81 |
-
at: PlapreConfig.modelURL(for: "KanadeDecoder",
|
| 82 |
}
|
| 83 |
let vocoderModel = try measure("Compile Vocoder") {
|
| 84 |
try compileModel(
|
| 85 |
-
at: PlapreConfig.modelURL(for: "Vocoder",
|
| 86 |
}
|
| 87 |
|
| 88 |
// Pre-allocate MLMultiArrays (performance-critical: single allocation, reused for all steps)
|
|
|
|
| 22 |
@Option(name: .shortAndLong, help: "Output WAV file path")
|
| 23 |
var output: String = "output.wav"
|
| 24 |
|
| 25 |
+
@Flag(name: .long, help: "Use int8 quantized model (smaller)")
|
|
|
|
|
|
|
|
|
|
| 26 |
var int8 = false
|
| 27 |
|
| 28 |
// MARK: - Run Pipeline
|
|
|
|
| 71 |
print("\nCompiling models...")
|
| 72 |
let decodeModel = try measure("Compile PlaprePico") {
|
| 73 |
try compileModel(
|
| 74 |
+
at: PlapreConfig.modelURL(for: "PlaprePico", useInt8: int8))
|
| 75 |
}
|
| 76 |
let kanadeModel = try measure("Compile KanadeDecoder") {
|
| 77 |
try compileModel(
|
| 78 |
+
at: PlapreConfig.modelURL(for: "KanadeDecoder", useInt8: false))
|
| 79 |
}
|
| 80 |
let vocoderModel = try measure("Compile Vocoder") {
|
| 81 |
try compileModel(
|
| 82 |
+
at: PlapreConfig.modelURL(for: "Vocoder", useInt8: false))
|
| 83 |
}
|
| 84 |
|
| 85 |
// Pre-allocate MLMultiArrays (performance-critical: single allocation, reused for all steps)
|