Daniel Rothmann commited on
Commit
5ed6654
·
1 Parent(s): e79fa0a

Remove int4 variant which had too poor quality

Browse files
PlaprePico_int4.mlpackage/Data/com.apple.CoreML/model.mlmodel DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a156dd782a9aabc95ce6b1e1a34b05a4c71557a96e36cb69e07833a65c530e04
3
- size 986129
 
 
 
 
PlaprePico_int4.mlpackage/Data/com.apple.CoreML/weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbfed1ebb98d1541a462d6ec13e11b22d086c6ea7088ecdcc1c3c687fd99ee2f
3
- size 59614916
 
 
 
 
PlaprePico_int4.mlpackage/Manifest.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "fileFormatVersion": "1.0.0",
3
- "itemInfoEntries": {
4
- "ABEB1845-9A9A-4CDB-AACD-335B4EEE0328": {
5
- "author": "com.apple.CoreML",
6
- "description": "CoreML Model Specification",
7
- "name": "model.mlmodel",
8
- "path": "com.apple.CoreML/model.mlmodel"
9
- },
10
- "F3186CBA-EC22-47C9-AAD1-AE8E1C7669C8": {
11
- "author": "com.apple.CoreML",
12
- "description": "CoreML Model Weights",
13
- "name": "weights",
14
- "path": "com.apple.CoreML/weights"
15
- }
16
- },
17
- "rootModelIdentifier": "ABEB1845-9A9A-4CDB-AACD-335B4EEE0328"
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -25,7 +25,6 @@ CoreML conversion of [syvai/plapre-pico](https://huggingface.co/syvai/plapre-pic
25
  | `KanadeDecoder.mlpackage` | Audio tokens + speaker → mel spectrogram | ~348MB |
26
  | `Vocoder.mlpackage` | Mel → waveform (F0 + source gen + HiFT + iSTFT baked in) | ~67MB |
27
  | `PlaprePico_int8.mlpackage` | int8 quantized LLM (comparable quality) | ~120MB |
28
- | `PlaprePico_int4.mlpackage` | int4 quantized LLM (some quality loss) | ~61MB |
29
 
30
  ## Performance (iPhone 15 / A16, CPU Only)
31
 
 
25
  | `KanadeDecoder.mlpackage` | Audio tokens + speaker → mel spectrogram | ~348MB |
26
  | `Vocoder.mlpackage` | Mel → waveform (F0 + source gen + HiFT + iSTFT baked in) | ~67MB |
27
  | `PlaprePico_int8.mlpackage` | int8 quantized LLM (comparable quality) | ~120MB |
 
28
 
29
  ## Performance (iPhone 15 / A16, CPU Only)
30
 
scripts/build.py CHANGED
@@ -39,7 +39,7 @@ def main():
39
  help="Local Plapre Pico HF snapshot (otherwise downloaded)")
40
  parser.add_argument("--num-tokens", type=int, default=100,
41
  help="Audio token count for vocoder mel length")
42
- parser.add_argument("--quantize", action="append", choices=["int4", "int8"], default=[],
43
  help="Produce quantized LLM variant(s); may be repeated")
44
  parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[],
45
  help="Skip a stage")
 
39
  help="Local Plapre Pico HF snapshot (otherwise downloaded)")
40
  parser.add_argument("--num-tokens", type=int, default=100,
41
  help="Audio token count for vocoder mel length")
42
+ parser.add_argument("--quantize", action="append", choices=["int8"], default=[],
43
  help="Produce quantized LLM variant(s); may be repeated")
44
  parser.add_argument("--skip", action="append", choices=["llm", "audio"], default=[],
45
  help="Skip a stage")
scripts/quantize.py CHANGED
@@ -2,11 +2,11 @@
2
  """
3
  Post-training weight quantization for PlaprePico CoreML model.
4
 
5
- Applies int4 or int8 linear weight quantization to reduce model size
6
  and improve inference speed (less memory bandwidth).
7
 
8
  Usage:
9
- python quantize.py [--bits 4|8] [--input PATH] [--output PATH]
10
  """
11
 
12
  import argparse
@@ -20,7 +20,7 @@ from coremltools.optimize.coreml import (
20
  )
21
 
22
 
23
- def quantize_model(input_path: Path, output_path: Path, nbits: int = 4):
24
  print(f"Loading {input_path}...")
25
  model = ct.models.MLModel(str(input_path), compute_units=ct.ComputeUnit.CPU_ONLY)
26
 
@@ -54,19 +54,15 @@ def quantize_model(input_path: Path, output_path: Path, nbits: int = 4):
54
 
55
  def main():
56
  parser = argparse.ArgumentParser(description="Quantize PlaprePico model")
57
- parser.add_argument("--bits", type=int, default=4, choices=[4, 8])
58
  parser.add_argument("--input", type=str,
59
  default=str(Path(__file__).parent.parent / "PlaprePico.mlpackage"))
60
  parser.add_argument("--output", type=str, default=None)
61
  args = parser.parse_args()
62
 
63
  input_path = Path(args.input)
64
- if args.output:
65
- output_path = Path(args.output)
66
- else:
67
- output_path = input_path.parent / f"PlaprePico_int{args.bits}.mlpackage"
68
 
69
- quantize_model(input_path, output_path, args.bits)
70
 
71
 
72
  if __name__ == "__main__":
 
2
  """
3
  Post-training weight quantization for PlaprePico CoreML model.
4
 
5
+ Applies int8 linear weight quantization to reduce model size
6
  and improve inference speed (less memory bandwidth).
7
 
8
  Usage:
9
+ python quantize.py [--input PATH] [--output PATH]
10
  """
11
 
12
  import argparse
 
20
  )
21
 
22
 
23
+ def quantize_model(input_path: Path, output_path: Path, nbits: int = 8):
24
  print(f"Loading {input_path}...")
25
  model = ct.models.MLModel(str(input_path), compute_units=ct.ComputeUnit.CPU_ONLY)
26
 
 
54
 
55
  def main():
56
  parser = argparse.ArgumentParser(description="Quantize PlaprePico model")
 
57
  parser.add_argument("--input", type=str,
58
  default=str(Path(__file__).parent.parent / "PlaprePico.mlpackage"))
59
  parser.add_argument("--output", type=str, default=None)
60
  args = parser.parse_args()
61
 
62
  input_path = Path(args.input)
63
+ output_path = Path(args.output) if args.output else input_path.parent / "PlaprePico_int8.mlpackage"
 
 
 
64
 
65
+ quantize_model(input_path, output_path, 8)
66
 
67
 
68
  if __name__ == "__main__":
swift-cli/Sources/Configuration.swift CHANGED
@@ -43,10 +43,9 @@ enum PlapreConfig {
43
  }
44
 
45
  /// Returns the URL for a model package, accounting for quantization flags.
46
- static func modelURL(for name: String, useInt4: Bool, useInt8: Bool) -> URL {
47
- if name == "PlaprePico" {
48
- if useInt4 { return repoRoot.appendingPathComponent("PlaprePico_int4.mlpackage") }
49
- if useInt8 { return repoRoot.appendingPathComponent("PlaprePico_int8.mlpackage") }
50
  }
51
  return repoRoot.appendingPathComponent("\(name).mlpackage")
52
  }
 
43
  }
44
 
45
  /// Returns the URL for a model package, accounting for quantization flags.
46
+ static func modelURL(for name: String, useInt8: Bool) -> URL {
47
+ if name == "PlaprePico" && useInt8 {
48
+ return repoRoot.appendingPathComponent("PlaprePico_int8.mlpackage")
 
49
  }
50
  return repoRoot.appendingPathComponent("\(name).mlpackage")
51
  }
swift-cli/Sources/main.swift CHANGED
@@ -22,10 +22,7 @@ struct PlapreCLI: AsyncParsableCommand {
22
  @Option(name: .shortAndLong, help: "Output WAV file path")
23
  var output: String = "output.wav"
24
 
25
- @Flag(name: .long, help: "Use int4 quantized model (smaller, faster)")
26
- var int4 = false
27
-
28
- @Flag(name: .long, help: "Use int8 quantized model (balanced)")
29
  var int8 = false
30
 
31
  // MARK: - Run Pipeline
@@ -74,15 +71,15 @@ struct PlapreCLI: AsyncParsableCommand {
74
  print("\nCompiling models...")
75
  let decodeModel = try measure("Compile PlaprePico") {
76
  try compileModel(
77
- at: PlapreConfig.modelURL(for: "PlaprePico", useInt4: int4, useInt8: int8))
78
  }
79
  let kanadeModel = try measure("Compile KanadeDecoder") {
80
  try compileModel(
81
- at: PlapreConfig.modelURL(for: "KanadeDecoder", useInt4: false, useInt8: false))
82
  }
83
  let vocoderModel = try measure("Compile Vocoder") {
84
  try compileModel(
85
- at: PlapreConfig.modelURL(for: "Vocoder", useInt4: false, useInt8: false))
86
  }
87
 
88
  // Pre-allocate MLMultiArrays (performance-critical: single allocation, reused for all steps)
 
22
  @Option(name: .shortAndLong, help: "Output WAV file path")
23
  var output: String = "output.wav"
24
 
25
+ @Flag(name: .long, help: "Use int8 quantized model (smaller)")
 
 
 
26
  var int8 = false
27
 
28
  // MARK: - Run Pipeline
 
71
  print("\nCompiling models...")
72
  let decodeModel = try measure("Compile PlaprePico") {
73
  try compileModel(
74
+ at: PlapreConfig.modelURL(for: "PlaprePico", useInt8: int8))
75
  }
76
  let kanadeModel = try measure("Compile KanadeDecoder") {
77
  try compileModel(
78
+ at: PlapreConfig.modelURL(for: "KanadeDecoder", useInt8: false))
79
  }
80
  let vocoderModel = try measure("Compile Vocoder") {
81
  try compileModel(
82
+ at: PlapreConfig.modelURL(for: "Vocoder", useInt8: false))
83
  }
84
 
85
  // Pre-allocate MLMultiArrays (performance-critical: single allocation, reused for all steps)