Fix

by GradientDescent2718 - opened Jan 4

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+2548

-2525

Files changed (23) hide show

Sortformer.mlmodelc/coremldata.bin +1 -1
Sortformer.mlmodelc/metadata.json +2 -2
Sortformer.mlmodelc/model0/coremldata.bin +1 -1
Sortformer.mlmodelc/model1/coremldata.bin +1 -1
Sortformer.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
Sortformer.mlpackage/Manifest.json +3 -3
SortformerNvidiaHigh.mlmodelc/coremldata.bin +1 -1
SortformerNvidiaHigh.mlmodelc/metadata.json +2 -2
SortformerNvidiaHigh.mlmodelc/model0/coremldata.bin +1 -1
SortformerNvidiaHigh.mlmodelc/model1/coremldata.bin +1 -1
SortformerNvidiaHigh.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
SortformerNvidiaHigh.mlpackage/Manifest.json +3 -3
SortformerNvidiaLow.mlmodelc/analytics/coremldata.bin +1 -1
SortformerNvidiaLow.mlmodelc/coremldata.bin +2 -2
SortformerNvidiaLow.mlmodelc/metadata.json +32 -12
SortformerNvidiaLow.mlmodelc/model0/coremldata.bin +2 -2
SortformerNvidiaLow.mlmodelc/model0/model.mil +24 -24
SortformerNvidiaLow.mlmodelc/model1/coremldata.bin +2 -2
SortformerNvidiaLow.mlmodelc/model1/model.mil +0 -0
SortformerNvidiaLow.mlmodelc/model1/weights/1-weight.bin +2 -2
SortformerNvidiaLow.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
SortformerNvidiaLow.mlpackage/Data/com.apple.CoreML/weights/1-weight.bin +2 -2
SortformerNvidiaLow.mlpackage/Manifest.json +8 -8

Sortformer.mlmodelc/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb202c84a24f72caf7bbdedd81218e1b2778049debfe0585eccff40cb17b96c0
 size 1078

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d69415aa509e8c407dfa64136402232dd02610c41d8c5729446830639373376
 size 1078

Sortformer.mlmodelc/metadata.json CHANGED Viewed

@@ -163,9 +163,9 @@
       "frame_duration" : "0.08",
       "spkcache_update_period" : "31",
       "chunk_len" : "6",
-      "mel_feature_frames" : "48",
-      "chunk_right_context" : "7",
       "subsampling_factor" : "8",
       "fifo_len" : "40",
       "chunk_left_context" : "1",
       "spkcache_len" : "188"

       "frame_duration" : "0.08",
       "spkcache_update_period" : "31",
       "chunk_len" : "6",
       "subsampling_factor" : "8",
+      "chunk_right_context" : "7",
+      "mel_feature_frames" : "48",
       "fifo_len" : "40",
       "chunk_left_context" : "1",
       "spkcache_len" : "188"

Sortformer.mlmodelc/model0/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe69aaf7236e35953785e4445e228c4bd3af3f3a436b9f0fb81fe4918a2d54d7
 size 632

 version https://git-lfs.github.com/spec/v1
+oid sha256:94cc859be2d5514e057506ea9bed05ac455fb72ebf05a52e4aeddf5dea80ceb1
 size 632

Sortformer.mlmodelc/model1/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69f9dd0cf00b5542f98ce26d81b0b2d0c5c4bbe825342d3130b5aaa419224513
 size 585

 version https://git-lfs.github.com/spec/v1
+oid sha256:e613306cf96ba390ac0ebb449947c8b4dc344a6a9e3be69b8220ad1caa74056b
 size 585

Sortformer.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:933b3a03dd31858d8f92c743fd5f648568bd2593e7279b09bedfb20397051979
-size 762290

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4f5a6e1ce4615929239b6a59a4a91378483c95335e468d02bf57ff2d3e615e2
+size 762968

Sortformer.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "BF219566-F155-42C7-822D-D8F94577A054": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
-        "E44485FE-6869-448C-871D-024F45320C41": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
-    "rootModelIdentifier": "E44485FE-6869-448C-871D-024F45320C41"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "91957EE3-C4C0-4E0F-95BC-72E14851707C": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
+        "FC8642BA-1EC2-4646-874C-1A4912A3591C": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "FC8642BA-1EC2-4646-874C-1A4912A3591C"
 }

SortformerNvidiaHigh.mlmodelc/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bce85a71873d0b3330dae51fa1df61f9485656b1432c97bdb3628ddf6e8c7687
 size 1091

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b04dcb554e85010612799da011422017ff4db0bba30b755296dac6ed4821799
 size 1091

SortformerNvidiaHigh.mlmodelc/metadata.json CHANGED Viewed

@@ -161,7 +161,7 @@
       }
     ],
     "userDefinedMetadata" : {
-      "spkcache_len" : "188",
       "spkcache_update_period" : "300",
       "chunk_len" : "340",
       "mel_feature_frames" : "2720",
@@ -169,7 +169,7 @@
       "subsampling_factor" : "8",
       "fifo_len" : "40",
       "chunk_left_context" : "1",
-      "frame_duration" : "0.08"
     },
     "generatedClassName" : "SortformerNvidiaHigh",
     "method" : "predict"

       }
     ],
     "userDefinedMetadata" : {
+      "frame_duration" : "0.08",
       "spkcache_update_period" : "300",
       "chunk_len" : "340",
       "mel_feature_frames" : "2720",
       "subsampling_factor" : "8",
       "fifo_len" : "40",
       "chunk_left_context" : "1",
+      "spkcache_len" : "188"
     },
     "generatedClassName" : "SortformerNvidiaHigh",
     "method" : "predict"

SortformerNvidiaHigh.mlmodelc/model0/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2045223a88c6a15d00b08f272d5d4210ffa4d96c860630f7072ab29aa6dee79a
 size 634

 version https://git-lfs.github.com/spec/v1
+oid sha256:330c631d7b8d6100ea7cb1f93616512b083fef39ca4ffb6c8f9e6fba756f4fcd
 size 634

SortformerNvidiaHigh.mlmodelc/model1/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12b8beb6f2ffed92fb707893ceb22189c7c1dcaa3769d26a804eb400cfeb9d4a
 size 587

 version https://git-lfs.github.com/spec/v1
+oid sha256:44face253d8b2c35339a1a54a9f709c369a877b3f673c9e2c1ce4db485d562ea
 size 587

SortformerNvidiaHigh.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be97be5a77a9c80359ce597c623e695ed32a5ead59a338f7d152d4be4c600a76
-size 1086880

 version https://git-lfs.github.com/spec/v1
+oid sha256:06b732c6ce6334a05528abd80bcc45db2cbebaf84bf8f3f11f1a64951b94f2b2
+size 1087553

SortformerNvidiaHigh.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "C2E55AEB-6940-4114-ADD4-D98C7345B073": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
-        "F250FEE8-A027-4BAB-B7D1-630291725950": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
-    "rootModelIdentifier": "F250FEE8-A027-4BAB-B7D1-630291725950"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "CEC453DA-87EE-46E9-892F-6F776828BCD5": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
+        "DB79E6F4-4356-4B37-88CF-F9BBC351598A": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "DB79E6F4-4356-4B37-88CF-F9BBC351598A"
 }

SortformerNvidiaLow.mlmodelc/analytics/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a96be4debfe5612eb3f316a5d91baee98be511526a4f9f59974a2c2bcf5d323
 size 202

 version https://git-lfs.github.com/spec/v1
+oid sha256:05d430bf8aa7b473ee252c1908e3571759a90b20755b892365bcab3a68dadba6
 size 202

SortformerNvidiaLow.mlmodelc/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85ca72159bbd43a48615c8031f286453171e2de971532225cff51b0240096179
-size 1083

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e4a1a547d8c3a75c4e383ec9686ddd32b6bb62706c9fb5774646bcaa154e93a
+size 1217

SortformerNvidiaLow.mlmodelc/metadata.json CHANGED Viewed

@@ -7,9 +7,9 @@
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32 1 × 242 × 4)",
         "shortDescription" : "Combined speaker probabilities for the speaker  cache, FIFO queue, and chunk",
-        "shape" : "[1, 242, 4]",
         "name" : "speaker_preds",
         "type" : "MultiArray"
       },
@@ -18,7 +18,7 @@
         "isOptional" : "0",
         "dataType" : "Float32",
         "formattedType" : "MultiArray (Float32 1 × 14 × 512)",
-        "shortDescription" : "Speaker embeddings for the new chunk",
         "shape" : "[1, 14, 512]",
         "name" : "chunk_pre_encoder_embs",
         "type" : "MultiArray"
@@ -32,6 +32,26 @@
         "shape" : "[1]",
         "name" : "chunk_pre_encoder_lengths",
         "type" : "MultiArray"
       }
     ],
     "version" : "2.1",
@@ -45,7 +65,7 @@
     "mlProgramOperationTypeHistogram" : {
       "Ios16.floorDiv" : 3,
       "Transpose" : 193,
-      "Identity" : 2,
       "Ios16.softmax" : 35,
       "Ios16.gatherAlongAxis" : 1,
       "Split" : 17,
@@ -61,7 +81,7 @@
       "Pad" : 34,
       "ExpandDims" : 25,
       "Ios16.sub" : 6,
-      "Ios16.cast" : 16,
       "Ios16.less" : 7,
       "Ios16.conv" : 56,
       "Ios16.relu" : 23,
@@ -143,9 +163,9 @@
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float32",
-        "formattedType" : "MultiArray (Float32 1 × 40 × 512)",
         "shortDescription" : "First-In-First-Out speech queue",
-        "shape" : "[1, 40, 512]",
         "name" : "fifo",
         "type" : "MultiArray"
       },
@@ -161,15 +181,15 @@
       }
     ],
     "userDefinedMetadata" : {
-      "frame_duration" : "0.08",
-      "spkcache_update_period" : "31",
       "chunk_len" : "6",
       "subsampling_factor" : "8",
-      "chunk_right_context" : "7",
       "mel_feature_frames" : "48",
-      "fifo_len" : "40",
       "chunk_left_context" : "1",
-      "spkcache_len" : "188"
     },
     "generatedClassName" : "SortformerNvidiaLow",
     "method" : "predict"

         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 390 × 4)",
         "shortDescription" : "Combined speaker probabilities for the speaker  cache, FIFO queue, and chunk",
+        "shape" : "[1, 390, 4]",
         "name" : "speaker_preds",
         "type" : "MultiArray"
       },
         "isOptional" : "0",
         "dataType" : "Float32",
         "formattedType" : "MultiArray (Float32 1 × 14 × 512)",
+        "shortDescription" : "Audio embeddings for the new chunk",
         "shape" : "[1, 14, 512]",
         "name" : "chunk_pre_encoder_embs",
         "type" : "MultiArray"
         "shape" : "[1]",
         "name" : "chunk_pre_encoder_lengths",
         "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 390 × 192)",
+        "shortDescription" : "Speaker embeddings",
+        "shape" : "[1, 390, 192]",
+        "name" : "nest_encoder_embs",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "Number of frames with speaker embeddings",
+        "shape" : "[1]",
+        "name" : "nest_encoder_lengths",
+        "type" : "MultiArray"
       }
     ],
     "version" : "2.1",
     "mlProgramOperationTypeHistogram" : {
       "Ios16.floorDiv" : 3,
       "Transpose" : 193,
+      "Identity" : 3,
       "Ios16.softmax" : 35,
       "Ios16.gatherAlongAxis" : 1,
       "Split" : 17,
       "Pad" : 34,
       "ExpandDims" : 25,
       "Ios16.sub" : 6,
+      "Ios16.cast" : 17,
       "Ios16.less" : 7,
       "Ios16.conv" : 56,
       "Ios16.relu" : 23,
         "hasShapeFlexibility" : "0",
         "isOptional" : "0",
         "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 188 × 512)",
         "shortDescription" : "First-In-First-Out speech queue",
+        "shape" : "[1, 188, 512]",
         "name" : "fifo",
         "type" : "MultiArray"
       },
       }
     ],
     "userDefinedMetadata" : {
+      "spkcache_len" : "188",
+      "spkcache_update_period" : "144",
       "chunk_len" : "6",
       "subsampling_factor" : "8",
       "mel_feature_frames" : "48",
+      "chunk_right_context" : "7",
+      "fifo_len" : "188",
       "chunk_left_context" : "1",
+      "frame_duration" : "0.08"
     },
     "generatedClassName" : "SortformerNvidiaLow",
     "method" : "predict"

SortformerNvidiaLow.mlmodelc/model0/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c00973ffe17f5a5d536891804b1e9b7145990cc029a5c7bbfad39f942d83003
-size 632

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c8b4cc493f20fb84b0695e5048ded26bced335faefc02fd0ec0ce90a94856b9
+size 633

SortformerNvidiaLow.mlmodelc/model0/model.mil CHANGED Viewed

@@ -1,7 +1,7 @@
 program(1.0)
 [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.9.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
 {
-    func main<ios16>(tensor<fp32, [1, 112, 128]> chunk, tensor<int32, [1]> chunk_lengths, tensor<fp32, [1, 40, 512]> fifo, tensor<int32, [1]> fifo_lengths, tensor<fp32, [1, 188, 512]> spkcache, tensor<int32, [1]> spkcache_lengths) {
             tensor<fp32, [256]> model_encoder_pre_encode_conv_0_bias = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_0_bias"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(64)))];
             tensor<fp32, [256, 1, 3, 3]> model_encoder_pre_encode_conv_0_weight = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_0_weight"), val = tensor<fp32, [256, 1, 3, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(1152)))];
             tensor<fp32, [256]> model_encoder_pre_encode_conv_2_bias = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_2_bias"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(10432)))];
@@ -157,45 +157,45 @@ program(1.0)
             tensor<fp32, [1, 14, 512]> chunk_pre_encoder_embs = linear(bias = model_encoder_pre_encode_out_bias, weight = model_encoder_pre_encode_out_weight, x = input)[name = tensor<string, []>("linear_0")];
             tensor<string, []> var_241_dtype_0 = const()[name = tensor<string, []>("op_241_dtype_0"), val = tensor<string, []>("int32")];
             tensor<int32, [1]> size0 = const()[name = tensor<string, []>("size0"), val = tensor<int32, [1]>([188])];
-            tensor<int32, [1]> size1 = const()[name = tensor<string, []>("size1"), val = tensor<int32, [1]>([40])];
             tensor<int32, []> var_264 = const()[name = tensor<string, []>("op_264"), val = tensor<int32, []>(1)];
             tensor<bool, []> full_concat_interleave_0 = const()[name = tensor<string, []>("full_concat_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp32, [1, 242, 512]> full_concat = concat(axis = var_264, interleave = full_concat_interleave_0, values = (spkcache, fifo, chunk_pre_encoder_embs))[name = tensor<string, []>("full_concat")];
             tensor<int32, [1]> var_273 = add(x = spkcache_lengths, y = fifo_lengths)[name = tensor<string, []>("op_273")];
             tensor<int32, [1]> chunk_pre_encoder_lengths = cast(dtype = var_241_dtype_0, x = current_lengths)[name = tensor<string, []>("cast_4")];
             tensor<int32, [1]> pre_encoder_lengths = add(x = var_273, y = chunk_pre_encoder_lengths)[name = tensor<string, []>("total_length")];
-            tensor<int32, [242]> out_pos = const()[name = tensor<string, []>("out_pos"), val = tensor<int32, [242]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241])];
-            tensor<bool, [242]> var_284 = greater_equal(x = out_pos, y = spkcache_lengths)[name = tensor<string, []>("op_284")];
             tensor<string, []> in_seg1_or_2_dtype_0 = const()[name = tensor<string, []>("in_seg1_or_2_dtype_0"), val = tensor<string, []>("int32")];
-            tensor<bool, [242]> var_290 = greater_equal(x = out_pos, y = var_273)[name = tensor<string, []>("op_290")];
             tensor<string, []> in_seg2_dtype_0 = const()[name = tensor<string, []>("in_seg2_dtype_0"), val = tensor<string, []>("int32")];
             tensor<int32, [1]> var_297 = sub(x = size0, y = spkcache_lengths)[name = tensor<string, []>("op_297")];
-            tensor<int32, [242]> in_seg1_or_2 = cast(dtype = in_seg1_or_2_dtype_0, x = var_284)[name = tensor<string, []>("cast_3")];
-            tensor<int32, [242]> var_298 = mul(x = in_seg1_or_2, y = var_297)[name = tensor<string, []>("op_298")];
             tensor<int32, [1]> var_300 = sub(x = size1, y = fifo_lengths)[name = tensor<string, []>("op_300")];
-            tensor<int32, [242]> in_seg2 = cast(dtype = in_seg2_dtype_0, x = var_290)[name = tensor<string, []>("cast_2")];
-            tensor<int32, [242]> var_301 = mul(x = in_seg2, y = var_300)[name = tensor<string, []>("op_301")];
-            tensor<int32, [242]> offset = add(x = var_298, y = var_301)[name = tensor<string, []>("offset")];
-            tensor<int32, [242]> var_305 = add(x = out_pos, y = offset)[name = tensor<string, []>("op_305")];
-            tensor<int32, []> var_309 = const()[name = tensor<string, []>("op_309"), val = tensor<int32, []>(241)];
             tensor<int32, []> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, []>(0)];
-            tensor<int32, [242]> minimum_0 = minimum(x = var_305, y = var_309)[name = tensor<string, []>("minimum_0")];
-            tensor<int32, [242]> maximum_0 = maximum(x = minimum_0, y = var_310)[name = tensor<string, []>("maximum_0")];
             tensor<int32, [1]> var_313_axes_0 = const()[name = tensor<string, []>("op_313_axes_0"), val = tensor<int32, [1]>([0])];
-            tensor<int32, [1, 242]> var_313 = expand_dims(axes = var_313_axes_0, x = maximum_0)[name = tensor<string, []>("op_313")];
             tensor<int32, [1]> var_315_axes_0 = const()[name = tensor<string, []>("op_315_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<int32, [1, 242, 1]> var_315 = expand_dims(axes = var_315_axes_0, x = var_313)[name = tensor<string, []>("op_315")];
             tensor<int32, [3]> gather_idx_reps_0 = const()[name = tensor<string, []>("gather_idx_reps_0"), val = tensor<int32, [3]>([1, 1, 512])];
-            tensor<int32, [1, 242, 512]> gather_idx = tile(reps = gather_idx_reps_0, x = var_315)[name = tensor<string, []>("gather_idx")];
             tensor<int32, []> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, []>(1)];
-            tensor<fp32, [1, 242, 512]> output = gather_along_axis(axis = var_320, indices = gather_idx, x = full_concat)[name = tensor<string, []>("output")];
-            tensor<bool, [242]> var_323 = less(x = out_pos, y = pre_encoder_lengths)[name = tensor<string, []>("op_323")];
             tensor<string, []> var_328_dtype_0 = const()[name = tensor<string, []>("op_328_dtype_0"), val = tensor<string, []>("fp32")];
             tensor<int32, [1]> var_330_axes_0 = const()[name = tensor<string, []>("op_330_axes_0"), val = tensor<int32, [1]>([0])];
-            tensor<fp32, [242]> var_328 = cast(dtype = var_328_dtype_0, x = var_323)[name = tensor<string, []>("cast_1")];
-            tensor<fp32, [1, 242]> var_330 = expand_dims(axes = var_330_axes_0, x = var_328)[name = tensor<string, []>("op_330")];
             tensor<int32, [1]> var_332_axes_0 = const()[name = tensor<string, []>("op_332_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp32, [1, 242, 1]> var_332 = expand_dims(axes = var_332_axes_0, x = var_330)[name = tensor<string, []>("op_332")];
-            tensor<fp32, [1, 242, 512]> pre_encoder_embs = mul(x = output, y = var_332)[name = tensor<string, []>("op_333")];
         } -> (pre_encoder_embs, pre_encoder_lengths, chunk_pre_encoder_embs, chunk_pre_encoder_lengths);
 }

 program(1.0)
 [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.9.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
 {
+    func main<ios16>(tensor<fp32, [1, 112, 128]> chunk, tensor<int32, [1]> chunk_lengths, tensor<fp32, [1, 188, 512]> fifo, tensor<int32, [1]> fifo_lengths, tensor<fp32, [1, 188, 512]> spkcache, tensor<int32, [1]> spkcache_lengths) {
             tensor<fp32, [256]> model_encoder_pre_encode_conv_0_bias = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_0_bias"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(64)))];
             tensor<fp32, [256, 1, 3, 3]> model_encoder_pre_encode_conv_0_weight = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_0_weight"), val = tensor<fp32, [256, 1, 3, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(1152)))];
             tensor<fp32, [256]> model_encoder_pre_encode_conv_2_bias = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_2_bias"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(10432)))];
             tensor<fp32, [1, 14, 512]> chunk_pre_encoder_embs = linear(bias = model_encoder_pre_encode_out_bias, weight = model_encoder_pre_encode_out_weight, x = input)[name = tensor<string, []>("linear_0")];
             tensor<string, []> var_241_dtype_0 = const()[name = tensor<string, []>("op_241_dtype_0"), val = tensor<string, []>("int32")];
             tensor<int32, [1]> size0 = const()[name = tensor<string, []>("size0"), val = tensor<int32, [1]>([188])];
+            tensor<int32, [1]> size1 = const()[name = tensor<string, []>("size1"), val = tensor<int32, [1]>([188])];
             tensor<int32, []> var_264 = const()[name = tensor<string, []>("op_264"), val = tensor<int32, []>(1)];
             tensor<bool, []> full_concat_interleave_0 = const()[name = tensor<string, []>("full_concat_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp32, [1, 390, 512]> full_concat = concat(axis = var_264, interleave = full_concat_interleave_0, values = (spkcache, fifo, chunk_pre_encoder_embs))[name = tensor<string, []>("full_concat")];
             tensor<int32, [1]> var_273 = add(x = spkcache_lengths, y = fifo_lengths)[name = tensor<string, []>("op_273")];
             tensor<int32, [1]> chunk_pre_encoder_lengths = cast(dtype = var_241_dtype_0, x = current_lengths)[name = tensor<string, []>("cast_4")];
             tensor<int32, [1]> pre_encoder_lengths = add(x = var_273, y = chunk_pre_encoder_lengths)[name = tensor<string, []>("total_length")];
+            tensor<int32, [390]> out_pos = const()[name = tensor<string, []>("out_pos"), val = tensor<int32, [390]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389])];
+            tensor<bool, [390]> var_284 = greater_equal(x = out_pos, y = spkcache_lengths)[name = tensor<string, []>("op_284")];
             tensor<string, []> in_seg1_or_2_dtype_0 = const()[name = tensor<string, []>("in_seg1_or_2_dtype_0"), val = tensor<string, []>("int32")];
+            tensor<bool, [390]> var_290 = greater_equal(x = out_pos, y = var_273)[name = tensor<string, []>("op_290")];
             tensor<string, []> in_seg2_dtype_0 = const()[name = tensor<string, []>("in_seg2_dtype_0"), val = tensor<string, []>("int32")];
             tensor<int32, [1]> var_297 = sub(x = size0, y = spkcache_lengths)[name = tensor<string, []>("op_297")];
+            tensor<int32, [390]> in_seg1_or_2 = cast(dtype = in_seg1_or_2_dtype_0, x = var_284)[name = tensor<string, []>("cast_3")];
+            tensor<int32, [390]> var_298 = mul(x = in_seg1_or_2, y = var_297)[name = tensor<string, []>("op_298")];
             tensor<int32, [1]> var_300 = sub(x = size1, y = fifo_lengths)[name = tensor<string, []>("op_300")];
+            tensor<int32, [390]> in_seg2 = cast(dtype = in_seg2_dtype_0, x = var_290)[name = tensor<string, []>("cast_2")];
+            tensor<int32, [390]> var_301 = mul(x = in_seg2, y = var_300)[name = tensor<string, []>("op_301")];
+            tensor<int32, [390]> offset = add(x = var_298, y = var_301)[name = tensor<string, []>("offset")];
+            tensor<int32, [390]> var_305 = add(x = out_pos, y = offset)[name = tensor<string, []>("op_305")];
+            tensor<int32, []> var_309 = const()[name = tensor<string, []>("op_309"), val = tensor<int32, []>(389)];
             tensor<int32, []> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, []>(0)];
+            tensor<int32, [390]> minimum_0 = minimum(x = var_305, y = var_309)[name = tensor<string, []>("minimum_0")];
+            tensor<int32, [390]> maximum_0 = maximum(x = minimum_0, y = var_310)[name = tensor<string, []>("maximum_0")];
             tensor<int32, [1]> var_313_axes_0 = const()[name = tensor<string, []>("op_313_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1, 390]> var_313 = expand_dims(axes = var_313_axes_0, x = maximum_0)[name = tensor<string, []>("op_313")];
             tensor<int32, [1]> var_315_axes_0 = const()[name = tensor<string, []>("op_315_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<int32, [1, 390, 1]> var_315 = expand_dims(axes = var_315_axes_0, x = var_313)[name = tensor<string, []>("op_315")];
             tensor<int32, [3]> gather_idx_reps_0 = const()[name = tensor<string, []>("gather_idx_reps_0"), val = tensor<int32, [3]>([1, 1, 512])];
+            tensor<int32, [1, 390, 512]> gather_idx = tile(reps = gather_idx_reps_0, x = var_315)[name = tensor<string, []>("gather_idx")];
             tensor<int32, []> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, []>(1)];
+            tensor<fp32, [1, 390, 512]> output = gather_along_axis(axis = var_320, indices = gather_idx, x = full_concat)[name = tensor<string, []>("output")];
+            tensor<bool, [390]> var_323 = less(x = out_pos, y = pre_encoder_lengths)[name = tensor<string, []>("op_323")];
             tensor<string, []> var_328_dtype_0 = const()[name = tensor<string, []>("op_328_dtype_0"), val = tensor<string, []>("fp32")];
             tensor<int32, [1]> var_330_axes_0 = const()[name = tensor<string, []>("op_330_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp32, [390]> var_328 = cast(dtype = var_328_dtype_0, x = var_323)[name = tensor<string, []>("cast_1")];
+            tensor<fp32, [1, 390]> var_330 = expand_dims(axes = var_330_axes_0, x = var_328)[name = tensor<string, []>("op_330")];
             tensor<int32, [1]> var_332_axes_0 = const()[name = tensor<string, []>("op_332_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp32, [1, 390, 1]> var_332 = expand_dims(axes = var_332_axes_0, x = var_330)[name = tensor<string, []>("op_332")];
+            tensor<fp32, [1, 390, 512]> pre_encoder_embs = mul(x = output, y = var_332)[name = tensor<string, []>("op_333")];
         } -> (pre_encoder_embs, pre_encoder_lengths, chunk_pre_encoder_embs, chunk_pre_encoder_lengths);
 }

SortformerNvidiaLow.mlmodelc/model1/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5507ee0c17c75411804df4ddc48d11b87deb021bc531290e8c3817f0601afa04
-size 585

 version https://git-lfs.github.com/spec/v1
+oid sha256:b229c16fa358c45202e9ff8ac6185c79bc0493d107dad567dd2e4324c18b6245
+size 656

SortformerNvidiaLow.mlmodelc/model1/model.mil CHANGED Viewed

The diff for this file is too large to render. See raw diff

SortformerNvidiaLow.mlmodelc/model1/weights/1-weight.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c85926af77684bce762b355a2b162df557d832444fbeb79ee195113a4bbf1db
-size 230428224

 version https://git-lfs.github.com/spec/v1
+oid sha256:e98531d7e961c3e8c43f8f1266abd6bbc110e11c52cda4d75a506f7fab53f2d4
+size 235580992

SortformerNvidiaLow.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d476ed462ba9341ddbc1eb0ef789bb8d97ca5cc11a46ba0d2b74d418f32434a
-size 762295

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ffc3fabdd04016403677dbb182996bb9802fa4fcafaea8742148385b1989bc9
+size 857102

SortformerNvidiaLow.mlpackage/Data/com.apple.CoreML/weights/1-weight.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c85926af77684bce762b355a2b162df557d832444fbeb79ee195113a4bbf1db
-size 230428224

 version https://git-lfs.github.com/spec/v1
+oid sha256:e98531d7e961c3e8c43f8f1266abd6bbc110e11c52cda4d75a506f7fab53f2d4
+size 235580992

SortformerNvidiaLow.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "1940BF88-E8D9-4C67-82FC-AABB8A032C1A": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Specification",
-            "name": "model.mlmodel",
-            "path": "com.apple.CoreML/model.mlmodel"
-        },
-        "84A637AA-9D30-4709-A1C1-1B2E5C8F0C09": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         }
     },
-    "rootModelIdentifier": "1940BF88-E8D9-4C67-82FC-AABB8A032C1A"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "9FC923D1-4D56-4C41-9FDA-AF1A43ABEA24": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
+        },
+        "E5741981-7159-4A38-9EB7-F97029CE1B28": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "E5741981-7159-4A38-9EB7-F97029CE1B28"
 }