aufklarer commited on 20 days ago

Commit

4298b54

verified ·

1 Parent(s): d4055fc

6-model ANE architecture: TextProjector, CodeEmbedder, MultiCodeEmbedder, CodeDecoder, MultiCodeDecoder, SpeechDecoder + speaker embedding

Browse files

Files changed (41) hide show

{CodePredictor.mlmodelc → CodeDecoder.mlmodelc}/analytics/coremldata.bin +1 -1
{MimiDecoder.mlmodelc → CodeDecoder.mlmodelc}/coremldata.bin +2 -2
{Talker.mlmodelc → CodeDecoder.mlmodelc}/metadata.json +28 -37
CodeDecoder.mlmodelc/model.mil +0 -0
{MimiDecoder.mlmodelc → CodeDecoder.mlmodelc}/weights/weight.bin +2 -2
{Talker.mlmodelc → CodeEmbedder.mlmodelc}/analytics/coremldata.bin +1 -1
CodeEmbedder.mlmodelc/coremldata.bin +3 -0
CodeEmbedder.mlmodelc/metadata.json +67 -0
CodeEmbedder.mlmodelc/model.mil +29 -0
CodeEmbedder.mlmodelc/weights/weight.bin +3 -0
CodePredictor.mlmodelc/metadata.json +0 -328
CodePredictor.mlmodelc/model.mil +0 -0
MimiDecoder.mlmodelc/model.mil +0 -0
{MimiDecoder.mlmodelc → MultiCodeDecoder.mlmodelc}/analytics/coremldata.bin +1 -1
MultiCodeDecoder.mlmodelc/coremldata.bin +3 -0
MultiCodeDecoder.mlmodelc/metadata.json +162 -0
MultiCodeDecoder.mlmodelc/model.mil +0 -0
{Talker.mlmodelc → MultiCodeDecoder.mlmodelc}/weights/weight.bin +2 -2
MultiCodeEmbedder.mlmodelc/analytics/coremldata.bin +3 -0
MultiCodeEmbedder.mlmodelc/coremldata.bin +3 -0
MultiCodeEmbedder.mlmodelc/metadata.json +67 -0
MultiCodeEmbedder.mlmodelc/model.mil +29 -0
MultiCodeEmbedder.mlmodelc/weights/weight.bin +3 -0
SpeechDecoder.mlmodelc/analytics/coremldata.bin +3 -0
SpeechDecoder.mlmodelc/coremldata.bin +3 -0
{MimiDecoder.mlmodelc → SpeechDecoder.mlmodelc}/metadata.json +24 -27
SpeechDecoder.mlmodelc/model.mil +0 -0
{CodePredictor.mlmodelc → SpeechDecoder.mlmodelc}/weights/weight.bin +2 -2
Talker.mlmodelc/coremldata.bin +0 -3
Talker.mlmodelc/model.mil +0 -0
TextProjector.mlmodelc/analytics/coremldata.bin +3 -0
TextProjector.mlmodelc/coremldata.bin +3 -0
TextProjector.mlmodelc/metadata.json +69 -0
TextProjector.mlmodelc/model.mil +35 -0
TextProjector.mlmodelc/weights/weight.bin +3 -0
config.json +14 -10
embeddings.safetensors +0 -3
CodePredictor.mlmodelc/coremldata.bin → speaker_embedding.npy +2 -2
tts_bos_embed.npy +3 -0
tts_eos_embed.npy +3 -0
tts_pad_embed.npy +3 -0

{CodePredictor.mlmodelc → CodeDecoder.mlmodelc}/analytics/coremldata.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19727f665522e27ea6d1a551f82df935419b51e9b060af76ee1622837d1b80f1
 size 243

 version https://git-lfs.github.com/spec/v1
+oid sha256:f73b5cb1ea58da1354e9693a58de02d8313261cd34868b6b5bac0cf4830f091c
 size 243

{MimiDecoder.mlmodelc → CodeDecoder.mlmodelc}/coremldata.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38a4d9c02d6c4d443b13335769dd6ffa1ad88ac64b7799e11098910b4652e734
-size 399

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae57abaa76c7566ec6ec30e316d1408fa75e1af208d630a552d0d66b4a260f68
+size 642

{Talker.mlmodelc → CodeDecoder.mlmodelc}/metadata.json RENAMED Viewed

@@ -1,15 +1,15 @@
 [
   {
     "metadataOutputVersion" : "3.0",
-    "storagePrecision" : "Mixed (Float16, Int32, Palettized (8 bits), UInt8)",
     "outputSchema" : [
       {
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float16",
-        "formattedType" : "MultiArray (Float16 1 × 3072)",
         "shortDescription" : "",
-        "shape" : "[1, 3072]",
         "name" : "logits",
         "type" : "MultiArray"
       },
@@ -27,20 +27,20 @@
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float16",
-        "formattedType" : "MultiArray (Float16 1 × 28672 × 1 × 1)",
         "shortDescription" : "",
-        "shape" : "[1, 28672, 1, 1]",
-        "name" : "key_cache_updates",
         "type" : "MultiArray"
       },
       {
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float16",
-        "formattedType" : "MultiArray (Float16 1 × 28672 × 1 × 1)",
         "shortDescription" : "",
-        "shape" : "[1, 28672, 1, 1]",
-        "name" : "value_cache_updates",
         "type" : "MultiArray"
       }
     ],
@@ -49,26 +49,27 @@
     ],
     "specificationVersion" : 9,
     "mlProgramOperationTypeHistogram" : {
-      "Ios18.expandDims" : 6,
       "Ios18.softmax" : 28,
-      "Ios18.mul" : 618,
-      "Ios18.matmul" : 56,
       "Ios18.rsqrt" : 113,
       "Ios16.reduceMean" : 113,
-      "Split" : 2,
       "Tile" : 56,
-      "Ios18.add" : 309,
-      "Ios16.reduceSum" : 56,
-      "Ios18.reshape" : 336,
       "Ios18.constexprLutToDense" : 197,
-      "Ios18.conv" : 197,
-      "Ios18.concat" : 58,
-      "Ios18.transpose" : 168,
       "Ios18.sub" : 1,
       "Ios18.pow" : 113,
       "Ios18.silu" : 28,
-      "Ios18.squeeze" : 2,
-      "Ios18.sliceByIndex" : 112
     },
     "computePrecision" : "Mixed (Float16, Float32, Int32)",
     "isUpdatable" : "0",
@@ -87,7 +88,7 @@
       "name" : "MLModelType_mlProgram"
     },
     "userDefinedMetadata" : {
-      "com.github.apple.coremltools.conversion_date" : "2026-03-29",
       "com.github.apple.coremltools.source" : "torch==2.10.0",
       "com.github.apple.coremltools.version" : "9.0",
       "com.github.apple.coremltools.source_dialect" : "TorchScript"
@@ -106,21 +107,11 @@
       {
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
-        "dataType" : "Float16",
-        "formattedType" : "MultiArray (Float16 1 × 128 × 1)",
-        "shortDescription" : "",
-        "shape" : "[1, 128, 1]",
-        "name" : "rope_cos",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float16",
-        "formattedType" : "MultiArray (Float16 1 × 128 × 1)",
         "shortDescription" : "",
-        "shape" : "[1, 128, 1]",
-        "name" : "rope_sin",
         "type" : "MultiArray"
       },
       {
@@ -164,7 +155,7 @@
         "type" : "MultiArray"
       }
     ],
-    "generatedClassName" : "Talker",
     "method" : "predict"
   }
 ]

 [
   {
     "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (8 bits), UInt8)",
     "outputSchema" : [
       {
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 3072)",
         "shortDescription" : "",
+        "shape" : "[1, 1, 3072]",
         "name" : "logits",
         "type" : "MultiArray"
       },
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 28672 × 1 × 256)",
         "shortDescription" : "",
+        "shape" : "[1, 28672, 1, 256]",
+        "name" : "new_key_cache",
         "type" : "MultiArray"
       },
       {
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 28672 × 1 × 256)",
         "shortDescription" : "",
+        "shape" : "[1, 28672, 1, 256]",
+        "name" : "new_value_cache",
         "type" : "MultiArray"
       }
     ],
     ],
     "specificationVersion" : 9,
     "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 235,
       "Ios18.softmax" : 28,
+      "Ios18.cos" : 1,
+      "Ios18.mul" : 566,
+      "Ios18.matmul" : 57,
       "Ios18.rsqrt" : 113,
       "Ios16.reduceMean" : 113,
+      "Ios18.sin" : 1,
       "Tile" : 56,
+      "Ios18.add" : 311,
+      "Ios18.reshape" : 252,
       "Ios18.constexprLutToDense" : 197,
+      "Ios18.linear" : 197,
+      "Ios18.concat" : 59,
+      "Ios18.transpose" : 57,
       "Ios18.sub" : 1,
       "Ios18.pow" : 113,
+      "Ios18.cast" : 1,
       "Ios18.silu" : 28,
+      "Ios18.sliceByIndex" : 168,
+      "Ios18.squeeze" : 283
     },
     "computePrecision" : "Mixed (Float16, Float32, Int32)",
     "isUpdatable" : "0",
       "name" : "MLModelType_mlProgram"
     },
     "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-03-30",
       "com.github.apple.coremltools.source" : "torch==2.10.0",
       "com.github.apple.coremltools.version" : "9.0",
       "com.github.apple.coremltools.source_dialect" : "TorchScript"
       {
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
         "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
         "type" : "MultiArray"
       },
       {
         "type" : "MultiArray"
       }
     ],
+    "generatedClassName" : "CodeDecoder",
     "method" : "predict"
   }
 ]

CodeDecoder.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

{MimiDecoder.mlmodelc → CodeDecoder.mlmodelc}/weights/weight.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:202b1d7b254ee9496615d236ef280d4dae2966ad54d2a13fdebbe2c4c684b958
-size 228181632

 version https://git-lfs.github.com/spec/v1
+oid sha256:65e3442b2ae6df9cc1d3942018bdd6622d174798b265355bc8003226a15068b4
+size 443824768

{Talker.mlmodelc → CodeEmbedder.mlmodelc}/analytics/coremldata.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff24977af0106176ba5e5526c8b1a451d42597675c7e5b3c50f303c9913db6bd
 size 243

 version https://git-lfs.github.com/spec/v1
+oid sha256:602128d98154e1cab615ea7fe218dd91acd52338868d29fe4c1140535e22b851
 size 243

CodeEmbedder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b27307e3f1f9f5ef310e6fc7ee27ec3d69428c438b77720e86b5a1cb5bfa9919
+size 380

CodeEmbedder.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,67 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1024 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1024, 1, 1]",
+        "name" : "input_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Select" : 1,
+      "Ios18.squeeze" : 2,
+      "Ios18.gather" : 1,
+      "Ios18.expandDims" : 2,
+      "Ios18.cast" : 3,
+      "Ios18.greaterEqual" : 1,
+      "Ios18.add" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-03-30",
+      "com.github.apple.coremltools.source" : "torch==2.10.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "CodeEmbedder",
+    "method" : "predict"
+  }
+]

CodeEmbedder.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,29 @@

+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
+{
+    func main<ios18>(tensor<int32, [1]> input_ids) {
+            int32 emb_batch_dims_0 = const()[name = string("emb_batch_dims_0"), val = int32(0)];
+            bool emb_validate_indices_0 = const()[name = string("emb_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [3072, 1024]> codec_embedding_weight_to_fp16 = const()[name = string("codec_embedding_weight_to_fp16"), val = tensor<fp16, [3072, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            string input_ids_to_int16_dtype_0 = const()[name = string("input_ids_to_int16_dtype_0"), val = string("int16")];
+            string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("int32")];
+            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
+            tensor<int16, [1]> input_ids_to_int16 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = string("cast_5")];
+            tensor<int32, [1]> cast_2 = cast(dtype = cast_2_dtype_0, x = input_ids_to_int16)[name = string("cast_4")];
+            tensor<bool, [1]> greater_equal_0 = greater_equal(x = cast_2, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
+            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(3072)];
+            tensor<int32, [1]> add_0 = add(x = cast_2, y = slice_by_index_0)[name = string("add_0")];
+            tensor<int32, [1]> select_0 = select(a = cast_2, b = add_0, cond = greater_equal_0)[name = string("select_0")];
+            int32 emb_cast_fp16_cast_uint16_axis_0 = const()[name = string("emb_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
+            string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
+            tensor<int16, [1]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")];
+            tensor<fp16, [1, 1024]> emb_cast_fp16_cast_uint16_cast_uint16 = gather(axis = emb_cast_fp16_cast_uint16_axis_0, batch_dims = emb_batch_dims_0, indices = select_0_to_int16, validate_indices = emb_validate_indices_0, x = codec_embedding_weight_to_fp16)[name = string("emb_cast_fp16_cast_uint16_cast_uint16")];
+            tensor<int32, [1]> var_8_axes_0 = const()[name = string("op_8_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1024]> var_8_cast_fp16 = squeeze(axes = var_8_axes_0, x = emb_cast_fp16_cast_uint16_cast_uint16)[name = string("op_8_cast_fp16")];
+            tensor<fp16, [1024]> var_10_cast_fp16 = squeeze(x = var_8_cast_fp16)[name = string("op_10_cast_fp16")];
+            tensor<int32, [1]> var_12_axes_0 = const()[name = string("op_12_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024, 1]> var_12_cast_fp16 = expand_dims(axes = var_12_axes_0, x = var_10_cast_fp16)[name = string("op_12_cast_fp16")];
+            tensor<int32, [1]> var_14_axes_0 = const()[name = string("op_14_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024, 1, 1]> input_embeds = expand_dims(axes = var_14_axes_0, x = var_12_cast_fp16)[name = string("op_14_cast_fp16")];
+        } -> (input_embeds);
+}

CodeEmbedder.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc523d30a88406fc4988b3e523cddba01b004a688071591be1db81e3abe0830b
+size 6291584

CodePredictor.mlmodelc/metadata.json DELETED Viewed

@@ -1,328 +0,0 @@
-[
-  {
-    "metadataOutputVersion" : "3.0",
-    "storagePrecision" : "Float32",
-    "outputSchema" : [
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32 1 × 1 × 1024)",
-        "shortDescription" : "",
-        "shape" : "[1, 1, 1024]",
-        "name" : "hidden_states",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_0_key_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_0_value_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_1_key_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_1_value_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_2_key_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_2_value_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_3_key_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_3_value_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_4_key_cache_out",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32)",
-        "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "layer_4_value_cache_out",
-        "type" : "MultiArray"
-      }
-    ],
-    "modelParameters" : [
-    ],
-    "specificationVersion" : 9,
-    "mlProgramOperationTypeHistogram" : {
-      "Ios18.expandDims" : 3,
-      "Ios18.softmax" : 5,
-      "Ios18.cos" : 1,
-      "Ios18.mul" : 93,
-      "Ios18.matmul" : 10,
-      "Ios18.rsqrt" : 21,
-      "Identity" : 1,
-      "Ios16.reduceMean" : 21,
-      "Ios18.sin" : 1,
-      "Tile" : 10,
-      "Ios18.gather" : 5,
-      "Ios18.add" : 46,
-      "Ios18.reshape" : 40,
-      "Shape" : 5,
-      "Ios18.linear" : 35,
-      "Ios18.concat" : 25,
-      "Ios18.cast" : 1,
-      "Ios18.sub" : 10,
-      "Ios18.pow" : 21,
-      "Ios18.transpose" : 50,
-      "Ios18.silu" : 5,
-      "Ios18.sliceByIndex" : 21
-    },
-    "computePrecision" : "Mixed (Float32, Int32)",
-    "isUpdatable" : "0",
-    "stateSchema" : [
-    ],
-    "availability" : {
-      "macOS" : "15.0",
-      "tvOS" : "18.0",
-      "visionOS" : "2.0",
-      "watchOS" : "11.0",
-      "iOS" : "18.0",
-      "macCatalyst" : "18.0"
-    },
-    "modelType" : {
-      "name" : "MLModelType_mlProgram"
-    },
-    "userDefinedMetadata" : {
-      "com.github.apple.coremltools.conversion_date" : "2026-03-29",
-      "com.github.apple.coremltools.source" : "torch==2.10.0",
-      "com.github.apple.coremltools.version" : "9.0",
-      "com.github.apple.coremltools.source_dialect" : "TorchScript"
-    },
-    "inputSchema" : [
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 1...20 × 1024",
-        "shapeRange" : "[[1, 1], [1, 20], [1024, 1024]]",
-        "formattedType" : "MultiArray (Float32 1 × 1 × 1024)",
-        "type" : "MultiArray",
-        "shape" : "[1, 1, 1024]",
-        "name" : "input_embeds",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Int32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 1...20",
-        "shapeRange" : "[[1, 1], [1, 20]]",
-        "formattedType" : "MultiArray (Int32 1 × 1)",
-        "type" : "MultiArray",
-        "shape" : "[1, 1]",
-        "name" : "position_ids",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 1 × 1...20 × 1...21",
-        "shapeRange" : "[[1, 1], [1, 1], [1, 20], [1, 21]]",
-        "formattedType" : "MultiArray (Float32 1 × 1 × 1 × 1)",
-        "type" : "MultiArray",
-        "shape" : "[1, 1, 1, 1]",
-        "name" : "causal_mask",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_0_key_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_0_value_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_1_key_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_1_value_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_2_key_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_2_value_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_3_key_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_3_value_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_4_key_cache",
-        "shortDescription" : ""
-      },
-      {
-        "dataType" : "Float32",
-        "hasShapeFlexibility" : "1",
-        "isOptional" : "0",
-        "shapeFlexibility" : "1 × 8 × 1...20 × 128",
-        "shapeRange" : "[[1, 1], [8, 8], [1, 20], [128, 128]]",
-        "formattedType" : "MultiArray (Float32 1 × 8 × 1 × 128)",
-        "type" : "MultiArray",
-        "shape" : "[1, 8, 1, 128]",
-        "name" : "layer_4_value_cache",
-        "shortDescription" : ""
-      }
-    ],
-    "generatedClassName" : "CodePredictor",
-    "method" : "predict"
-  }
-]

CodePredictor.mlmodelc/model.mil DELETED Viewed

The diff for this file is too large to render. See raw diff

MimiDecoder.mlmodelc/model.mil DELETED Viewed

The diff for this file is too large to render. See raw diff

{MimiDecoder.mlmodelc → MultiCodeDecoder.mlmodelc}/analytics/coremldata.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a4f8fad96e23cd81374cff387bd5a1860e54eaa2bc9df0f41238e3a67c92ea2
 size 243

 version https://git-lfs.github.com/spec/v1
+oid sha256:e732fb4ad346cd6a1db697a4d94a0441850902701e0767a7cdaebaa9de1abc88
 size 243

MultiCodeDecoder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d491845bb47ba5f76ee40491f3c1824d4a1f30ec2675daa19d64b97a59c65fe7
+size 636

MultiCodeDecoder.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,162 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float32, Palettized (8 bits), UInt8)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 15 × 2048)",
+        "shortDescription" : "",
+        "shape" : "[1, 15, 2048]",
+        "name" : "all_logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1024, 1, 1]",
+        "name" : "hidden_states",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 5120 × 1 × 16)",
+        "shortDescription" : "",
+        "shape" : "[1, 5120, 1, 16]",
+        "name" : "new_key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 5120 × 1 × 16)",
+        "shortDescription" : "",
+        "shape" : "[1, 5120, 1, 16]",
+        "name" : "new_value_cache",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 51,
+      "Ios18.softmax" : 5,
+      "Ios18.cos" : 1,
+      "Ios18.mul" : 106,
+      "Ios18.matmul" : 11,
+      "Ios18.rsqrt" : 21,
+      "Ios16.reduceMean" : 21,
+      "Ios18.sin" : 1,
+      "Tile" : 10,
+      "Ios18.add" : 58,
+      "Ios18.reshape" : 45,
+      "Ios18.constexprLutToDense" : 50,
+      "Ios18.linear" : 50,
+      "Ios18.concat" : 13,
+      "Ios18.transpose" : 11,
+      "Ios18.sub" : 1,
+      "Ios18.pow" : 21,
+      "Ios18.cast" : 10,
+      "Ios18.silu" : 5,
+      "Stack" : 1,
+      "Ios18.sliceByIndex" : 30,
+      "Ios18.squeeze" : 68
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-03-30",
+      "com.github.apple.coremltools.source" : "torch==2.10.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1024, 1, 1]",
+        "name" : "input_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 5120 × 1 × 16)",
+        "shortDescription" : "",
+        "shape" : "[1, 5120, 1, 16]",
+        "name" : "key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 16)",
+        "shortDescription" : "",
+        "shape" : "[1, 16]",
+        "name" : "key_padding_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 16)",
+        "shortDescription" : "",
+        "shape" : "[1, 16]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 5120 × 1 × 16)",
+        "shortDescription" : "",
+        "shape" : "[1, 5120, 1, 16]",
+        "name" : "value_cache",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MultiCodeDecoder",
+    "method" : "predict"
+  }
+]

MultiCodeDecoder.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

{Talker.mlmodelc → MultiCodeDecoder.mlmodelc}/weights/weight.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7ecf9b65affc83a8d09586e223e520d94ecbd015af8aa41d45e1af9a9647e95
-size 443812480

 version https://git-lfs.github.com/spec/v1
+oid sha256:6576808c3628a4065e578154a9d2473b303cb8b1eb2516bb9f2595728fbd50f0
+size 110234752

MultiCodeEmbedder.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:217a014dd5f42957a214530704a9e1764035baa415d29d243a1df80d25e3e41b
+size 243

MultiCodeEmbedder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dddbe801a9e029787cfa48d15ef2dad0606a617f9b719211c2c46e6698f30d56
+size 380

MultiCodeEmbedder.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,67 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1024 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1024, 1, 1]",
+        "name" : "input_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Select" : 1,
+      "Ios18.squeeze" : 2,
+      "Ios18.gather" : 1,
+      "Ios18.expandDims" : 2,
+      "Ios18.cast" : 3,
+      "Ios18.greaterEqual" : 1,
+      "Ios18.add" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-03-30",
+      "com.github.apple.coremltools.source" : "torch==2.10.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MultiCodeEmbedder",
+    "method" : "predict"
+  }
+]

MultiCodeEmbedder.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,29 @@

+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.10.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
+{
+    func main<ios18>(tensor<int32, [1]> input_ids) {
+            int32 emb_batch_dims_0 = const()[name = string("emb_batch_dims_0"), val = int32(0)];
+            bool emb_validate_indices_0 = const()[name = string("emb_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [30720, 1024]> embedding_weight_to_fp16 = const()[name = string("embedding_weight_to_fp16"), val = tensor<fp16, [30720, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            string input_ids_to_int16_dtype_0 = const()[name = string("input_ids_to_int16_dtype_0"), val = string("int16")];
+            string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("int32")];
+            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
+            tensor<int16, [1]> input_ids_to_int16 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = string("cast_5")];
+            tensor<int32, [1]> cast_2 = cast(dtype = cast_2_dtype_0, x = input_ids_to_int16)[name = string("cast_4")];
+            tensor<bool, [1]> greater_equal_0 = greater_equal(x = cast_2, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
+            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(30720)];
+            tensor<int32, [1]> add_0 = add(x = cast_2, y = slice_by_index_0)[name = string("add_0")];
+            tensor<int32, [1]> select_0 = select(a = cast_2, b = add_0, cond = greater_equal_0)[name = string("select_0")];
+            int32 emb_cast_fp16_cast_uint16_axis_0 = const()[name = string("emb_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
+            string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
+            tensor<int16, [1]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_3")];
+            tensor<fp16, [1, 1024]> emb_cast_fp16_cast_uint16_cast_uint16 = gather(axis = emb_cast_fp16_cast_uint16_axis_0, batch_dims = emb_batch_dims_0, indices = select_0_to_int16, validate_indices = emb_validate_indices_0, x = embedding_weight_to_fp16)[name = string("emb_cast_fp16_cast_uint16_cast_uint16")];
+            tensor<int32, [1]> var_8_axes_0 = const()[name = string("op_8_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1024]> var_8_cast_fp16 = squeeze(axes = var_8_axes_0, x = emb_cast_fp16_cast_uint16_cast_uint16)[name = string("op_8_cast_fp16")];
+            tensor<fp16, [1024]> var_10_cast_fp16 = squeeze(x = var_8_cast_fp16)[name = string("op_10_cast_fp16")];
+            tensor<int32, [1]> var_12_axes_0 = const()[name = string("op_12_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024, 1]> var_12_cast_fp16 = expand_dims(axes = var_12_axes_0, x = var_10_cast_fp16)[name = string("op_12_cast_fp16")];
+            tensor<int32, [1]> var_14_axes_0 = const()[name = string("op_14_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024, 1, 1]> input_embeds = expand_dims(axes = var_14_axes_0, x = var_12_cast_fp16)[name = string("op_14_cast_fp16")];
+        } -> (input_embeds);
+}

MultiCodeEmbedder.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9215324f237e6e41298eaca272ea0d6f0b6c10b36ee00fd1009e03f2b193485c
+size 62914688

SpeechDecoder.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a79a81c5dcb274ad3102040cfe775bc2bcfcdf07a1940737027ea31b878f258d
+size 243

SpeechDecoder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65312a07616d4b78cad34ae7c37d3579a6b0316e04caa63ebf9aa1882473c99b
+size 378

{MimiDecoder.mlmodelc → SpeechDecoder.mlmodelc}/metadata.json RENAMED Viewed

@@ -1,16 +1,16 @@
 [
   {
     "metadataOutputVersion" : "3.0",
-    "storagePrecision" : "Float16",
     "outputSchema" : [
       {
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float16",
-        "formattedType" : "MultiArray (Float16)",
         "shortDescription" : "",
-        "shape" : "[]",
-        "name" : "waveform",
         "type" : "MultiArray"
       }
     ],
@@ -20,31 +20,30 @@
     "specificationVersion" : 9,
     "mlProgramOperationTypeHistogram" : {
       "Ios18.softmax" : 8,
-      "Ios18.mul" : 219,
       "Ios18.matmul" : 16,
       "Ios18.rsqrt" : 17,
       "Ios16.reduceMean" : 17,
       "Ios18.sin" : 29,
-      "Ios18.greaterEqual" : 1,
-      "Select" : 1,
-      "Ios18.gather" : 24,
-      "Ios18.add" : 116,
       "Ios18.layerNorm" : 2,
       "Ios18.reshape" : 32,
-      "Shape" : 8,
       "Pad" : 17,
       "Ios18.linear" : 62,
       "Ios18.conv" : 31,
-      "Ios18.concat" : 27,
-      "Ios18.sub" : 16,
       "Ios18.transpose" : 40,
-      "Ios18.cast" : 34,
-      "Ios18.pow" : 17,
       "Ios18.silu" : 8,
       "Ios18.gelu" : 2,
       "Ios18.convTranspose" : 6,
-      "Ios18.sliceByIndex" : 56,
-      "Ios18.clip" : 1
     },
     "computePrecision" : "Mixed (Float16, Float32, Int16, Int32, UInt16)",
     "isUpdatable" : "0",
@@ -63,26 +62,24 @@
       "name" : "MLModelType_mlProgram"
     },
     "userDefinedMetadata" : {
-      "com.github.apple.coremltools.conversion_date" : "2026-03-29",
       "com.github.apple.coremltools.source" : "torch==2.10.0",
       "com.github.apple.coremltools.version" : "9.0",
       "com.github.apple.coremltools.source_dialect" : "TorchScript"
     },
     "inputSchema" : [
       {
-        "shortDescription" : "",
-        "dataType" : "Int32",
-        "hasShapeFlexibility" : "1",
         "isOptional" : "0",
-        "shapeFlexibility" : "1 × 16 × 4 | 1 × 16 × 14 | 1 × 16 × 35 | 1 × 16 × 50",
-        "formattedType" : "MultiArray (Int32 1 × 16 × 4)",
-        "type" : "MultiArray",
-        "shape" : "[1, 16, 4]",
-        "name" : "codes",
-        "enumeratedShapes" : "[[1, 16, 4], [1, 16, 14], [1, 16, 35], [1, 16, 50]]"
       }
     ],
-    "generatedClassName" : "MimiDecoder",
     "method" : "predict"
   }
 ]

 [
   {
     "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (8 bits), UInt8)",
     "outputSchema" : [
       {
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 240000)",
         "shortDescription" : "",
+        "shape" : "[1, 1, 240000]",
+        "name" : "audio",
         "type" : "MultiArray"
       }
     ],
     "specificationVersion" : 9,
     "mlProgramOperationTypeHistogram" : {
       "Ios18.softmax" : 8,
+      "Ios18.mul" : 158,
       "Ios18.matmul" : 16,
       "Ios18.rsqrt" : 17,
       "Ios16.reduceMean" : 17,
       "Ios18.sin" : 29,
+      "Ios18.greaterEqual" : 2,
+      "Select" : 2,
+      "Ios18.gather" : 16,
+      "Ios18.add" : 117,
       "Ios18.layerNorm" : 2,
       "Ios18.reshape" : 32,
       "Pad" : 17,
+      "Ios18.constexprLutToDense" : 133,
       "Ios18.linear" : 62,
       "Ios18.conv" : 31,
+      "Ios18.concat" : 16,
       "Ios18.transpose" : 40,
+      "Ios18.cast" : 19,
+      "Ios18.pow" : 46,
       "Ios18.silu" : 8,
       "Ios18.gelu" : 2,
+      "Ios18.clip" : 1,
       "Ios18.convTranspose" : 6,
+      "Ios18.sliceByIndex" : 54
     },
     "computePrecision" : "Mixed (Float16, Float32, Int16, Int32, UInt16)",
     "isUpdatable" : "0",
       "name" : "MLModelType_mlProgram"
     },
     "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-03-30",
       "com.github.apple.coremltools.source" : "torch==2.10.0",
       "com.github.apple.coremltools.version" : "9.0",
       "com.github.apple.coremltools.source_dialect" : "TorchScript"
     },
     "inputSchema" : [
       {
+        "hasShapeFlexibility" : "0",
         "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 16 × 125)",
+        "shortDescription" : "",
+        "shape" : "[1, 16, 125]",
+        "name" : "audio_codes",
+        "type" : "MultiArray"
       }
     ],
+    "generatedClassName" : "SpeechDecoder",
     "method" : "predict"
   }
 ]

SpeechDecoder.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

{CodePredictor.mlmodelc → SpeechDecoder.mlmodelc}/weights/weight.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:569ed049b7819129c6a83cce000a932c9cdc78ff7b5a5bf4e364278846222cae
-size 314651712

 version https://git-lfs.github.com/spec/v1
+oid sha256:00a1ed6d52f41d82a8da012a1344f2df17d0b1d658957c5f2afa52a4b186d03d
+size 114209024

Talker.mlmodelc/coremldata.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9b2bac3c7bb2a17381413465a43834b013095bf4f23f1ae95d4c929f5f8675ca
-size 672

Talker.mlmodelc/model.mil DELETED Viewed

The diff for this file is too large to render. See raw diff

TextProjector.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3a2e57b7663be9f3871eb8b39b7c99da3b597bc6da4f416302d7ae5c8654f1d
+size 243

TextProjector.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9beb53dcdd366de5a75e075dfec05c3289e92ccc39c624b42ac39811e90c3437
+size 380

TextProjector.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,69 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (8 bits), UInt8)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1024 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1024, 1, 1]",
+        "name" : "input_embeds",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Select" : 2,
+      "Ios18.squeeze" : 2,
+      "Ios18.gather" : 1,
+      "Ios18.linear" : 2,
+      "Ios18.expandDims" : 2,
+      "Ios18.constexprLutToDense" : 3,
+      "Ios18.silu" : 1,
+      "Ios18.greaterEqual" : 2,
+      "Ios18.add" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-03-30",
+      "com.github.apple.coremltools.source" : "torch==2.10.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextProjector",
+    "method" : "predict"
+  }
+]

TextProjector.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,35 @@

+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})]
+{
+    func main<ios18>(tensor<int32, [1]> input_ids) {
+            int32 input_1_batch_dims_0 = const()[name = string("input_1_batch_dims_0"), val = int32(0)];
+            bool input_1_validate_indices_0 = const()[name = string("input_1_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [151936, 2048]> text_embedding_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [151936, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311165056))))[name = string("text_embedding_weight_to_fp16_palettized")];
+            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
+            tensor<bool, [1]> greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
+            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(151936)];
+            tensor<int32, [1]> add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")];
+            tensor<int32, [1]> select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")];
+            int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)];
+            tensor<bool, [1]> greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")];
+            int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(151936)];
+            tensor<int32, [1]> add_0_1 = add(x = select_0, y = slice_by_index_0_1)[name = string("add_0_1")];
+            tensor<int32, [1]> select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")];
+            int32 input_1_cast_fp16_axis_0 = const()[name = string("input_1_cast_fp16_axis_0"), val = int32(0)];
+            tensor<fp16, [1, 2048]> input_1_cast_fp16 = gather(axis = input_1_cast_fp16_axis_0, batch_dims = input_1_batch_dims_0, indices = select_0_1, validate_indices = input_1_validate_indices_0, x = text_embedding_weight_to_fp16_palettized)[name = string("input_1_cast_fp16")];
+            tensor<fp16, [2048, 2048]> text_projection_linear_fc1_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [2048, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311165632))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315360000))))[name = string("text_projection_linear_fc1_weight_to_fp16_palettized")];
+            tensor<fp16, [2048]> text_projection_linear_fc1_bias_to_fp16 = const()[name = string("text_projection_linear_fc1_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315360576)))];
+            tensor<fp16, [1, 2048]> linear_0_cast_fp16 = linear(bias = text_projection_linear_fc1_bias_to_fp16, weight = text_projection_linear_fc1_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [1, 2048]> input_cast_fp16 = silu(x = linear_0_cast_fp16)[name = string("input_cast_fp16")];
+            tensor<fp16, [1024, 2048]> text_projection_linear_fc2_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315364736))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317461952))))[name = string("text_projection_linear_fc2_weight_to_fp16_palettized")];
+            tensor<fp16, [1024]> text_projection_linear_fc2_bias_to_fp16 = const()[name = string("text_projection_linear_fc2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317462528)))];
+            tensor<fp16, [1, 1024]> linear_1_cast_fp16 = linear(bias = text_projection_linear_fc2_bias_to_fp16, weight = text_projection_linear_fc2_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [1]> var_18_axes_0 = const()[name = string("op_18_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1024]> var_18_cast_fp16 = squeeze(axes = var_18_axes_0, x = linear_1_cast_fp16)[name = string("op_18_cast_fp16")];
+            tensor<fp16, [1024]> var_20_cast_fp16 = squeeze(x = var_18_cast_fp16)[name = string("op_20_cast_fp16")];
+            tensor<int32, [1]> var_22_axes_0 = const()[name = string("op_22_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024, 1]> var_22_cast_fp16 = expand_dims(axes = var_22_axes_0, x = var_20_cast_fp16)[name = string("op_22_cast_fp16")];
+            tensor<int32, [1]> var_24_axes_0 = const()[name = string("op_24_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1024, 1, 1]> input_embeds = expand_dims(axes = var_24_axes_0, x = var_22_cast_fp16)[name = string("op_24_cast_fp16")];
+        } -> (input_embeds);
+}

TextProjector.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78ece3af64cde140b4a2fecdb4117461956b6887cff95d060adf23168b2f93d3
+size 317464640

config.json CHANGED Viewed

@@ -1,15 +1,19 @@
 {
   "model_type": "qwen3_tts_coreml",
   "model_id": "Qwen/Qwen3-TTS-12Hz-0.6B-Base",
   "hidden_size": 1024,
-  "num_layers": 28,
-  "num_heads": 16,
-  "num_kv_heads": 8,
-  "head_dim": 128,
-  "intermediate_size": 3072,
-  "codec_vocab_size": 3072,
-  "text_hidden_size": 2048,
-  "text_vocab_size": 151936,
-  "max_seq_len": 512,
-  "quantization": "int8"
 }

 {
   "model_type": "qwen3_tts_coreml",
+  "architecture": "6-model-ane",
   "model_id": "Qwen/Qwen3-TTS-12Hz-0.6B-Base",
+  "models": [
+    "TextProjector",
+    "CodeEmbedder",
+    "MultiCodeEmbedder",
+    "CodeDecoder",
+    "MultiCodeDecoder",
+    "SpeechDecoder"
+  ],
+  "quantization": "W8A16",
+  "max_seq_len": 256,
+  "max_codec_tokens": 125,
+  "sample_rate": 24000,
   "hidden_size": 1024,
+  "requires_speaker_embedding": true
 }

embeddings.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dc051095c79172639ea331308a35dcf7a242da6c99c8b56ff5347ed6df788d5e
-size 767042816

CodePredictor.mlmodelc/coremldata.bin → speaker_embedding.npy RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10759300000129103c06b834961616b4094b766d20ea2ae68fe216f0ae0293f9
-size 1509

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f4dbee4423ef6c15fcfbac5eeaefe2de2d7c2ffc305c81a1adefbf255aadfd4
+size 4224

tts_bos_embed.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fa7ddf7338467bd9210725673f307f600e34817709d34b67f47eff3d8a5650c
+size 4224

tts_eos_embed.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3edd5d5c1432e9129bd942683928f8ded011a0942cf66a34a7969d6062dbbdb8
+size 4224

tts_pad_embed.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06d52c2f64113a6e027d622cb939d06e949137dc5a4ad92100432cf92ff25a89
+size 4224