Sortformer.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb202c84a24f72caf7bbdedd81218e1b2778049debfe0585eccff40cb17b96c0
3
  size 1078
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d69415aa509e8c407dfa64136402232dd02610c41d8c5729446830639373376
3
  size 1078
Sortformer.mlmodelc/metadata.json CHANGED
@@ -163,9 +163,9 @@
163
  "frame_duration" : "0.08",
164
  "spkcache_update_period" : "31",
165
  "chunk_len" : "6",
166
- "mel_feature_frames" : "48",
167
- "chunk_right_context" : "7",
168
  "subsampling_factor" : "8",
 
 
169
  "fifo_len" : "40",
170
  "chunk_left_context" : "1",
171
  "spkcache_len" : "188"
 
163
  "frame_duration" : "0.08",
164
  "spkcache_update_period" : "31",
165
  "chunk_len" : "6",
 
 
166
  "subsampling_factor" : "8",
167
+ "chunk_right_context" : "7",
168
+ "mel_feature_frames" : "48",
169
  "fifo_len" : "40",
170
  "chunk_left_context" : "1",
171
  "spkcache_len" : "188"
Sortformer.mlmodelc/model0/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe69aaf7236e35953785e4445e228c4bd3af3f3a436b9f0fb81fe4918a2d54d7
3
  size 632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94cc859be2d5514e057506ea9bed05ac455fb72ebf05a52e4aeddf5dea80ceb1
3
  size 632
Sortformer.mlmodelc/model1/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69f9dd0cf00b5542f98ce26d81b0b2d0c5c4bbe825342d3130b5aaa419224513
3
  size 585
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e613306cf96ba390ac0ebb449947c8b4dc344a6a9e3be69b8220ad1caa74056b
3
  size 585
Sortformer.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:933b3a03dd31858d8f92c743fd5f648568bd2593e7279b09bedfb20397051979
3
- size 762290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f5a6e1ce4615929239b6a59a4a91378483c95335e468d02bf57ff2d3e615e2
3
+ size 762968
Sortformer.mlpackage/Manifest.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
- "BF219566-F155-42C7-822D-D8F94577A054": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Weights",
7
  "name": "weights",
8
  "path": "com.apple.CoreML/weights"
9
  },
10
- "E44485FE-6869-448C-871D-024F45320C41": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Specification",
13
  "name": "model.mlmodel",
14
  "path": "com.apple.CoreML/model.mlmodel"
15
  }
16
  },
17
- "rootModelIdentifier": "E44485FE-6869-448C-871D-024F45320C41"
18
  }
 
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
+ "91957EE3-C4C0-4E0F-95BC-72E14851707C": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Weights",
7
  "name": "weights",
8
  "path": "com.apple.CoreML/weights"
9
  },
10
+ "FC8642BA-1EC2-4646-874C-1A4912A3591C": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Specification",
13
  "name": "model.mlmodel",
14
  "path": "com.apple.CoreML/model.mlmodel"
15
  }
16
  },
17
+ "rootModelIdentifier": "FC8642BA-1EC2-4646-874C-1A4912A3591C"
18
  }
SortformerNvidiaHigh.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bce85a71873d0b3330dae51fa1df61f9485656b1432c97bdb3628ddf6e8c7687
3
  size 1091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b04dcb554e85010612799da011422017ff4db0bba30b755296dac6ed4821799
3
  size 1091
SortformerNvidiaHigh.mlmodelc/metadata.json CHANGED
@@ -161,7 +161,7 @@
161
  }
162
  ],
163
  "userDefinedMetadata" : {
164
- "spkcache_len" : "188",
165
  "spkcache_update_period" : "300",
166
  "chunk_len" : "340",
167
  "mel_feature_frames" : "2720",
@@ -169,7 +169,7 @@
169
  "subsampling_factor" : "8",
170
  "fifo_len" : "40",
171
  "chunk_left_context" : "1",
172
- "frame_duration" : "0.08"
173
  },
174
  "generatedClassName" : "SortformerNvidiaHigh",
175
  "method" : "predict"
 
161
  }
162
  ],
163
  "userDefinedMetadata" : {
164
+ "frame_duration" : "0.08",
165
  "spkcache_update_period" : "300",
166
  "chunk_len" : "340",
167
  "mel_feature_frames" : "2720",
 
169
  "subsampling_factor" : "8",
170
  "fifo_len" : "40",
171
  "chunk_left_context" : "1",
172
+ "spkcache_len" : "188"
173
  },
174
  "generatedClassName" : "SortformerNvidiaHigh",
175
  "method" : "predict"
SortformerNvidiaHigh.mlmodelc/model0/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2045223a88c6a15d00b08f272d5d4210ffa4d96c860630f7072ab29aa6dee79a
3
  size 634
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330c631d7b8d6100ea7cb1f93616512b083fef39ca4ffb6c8f9e6fba756f4fcd
3
  size 634
SortformerNvidiaHigh.mlmodelc/model1/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b8beb6f2ffed92fb707893ceb22189c7c1dcaa3769d26a804eb400cfeb9d4a
3
  size 587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44face253d8b2c35339a1a54a9f709c369a877b3f673c9e2c1ce4db485d562ea
3
  size 587
SortformerNvidiaHigh.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be97be5a77a9c80359ce597c623e695ed32a5ead59a338f7d152d4be4c600a76
3
- size 1086880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b732c6ce6334a05528abd80bcc45db2cbebaf84bf8f3f11f1a64951b94f2b2
3
+ size 1087553
SortformerNvidiaHigh.mlpackage/Manifest.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
- "C2E55AEB-6940-4114-ADD4-D98C7345B073": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Weights",
7
  "name": "weights",
8
  "path": "com.apple.CoreML/weights"
9
  },
10
- "F250FEE8-A027-4BAB-B7D1-630291725950": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Specification",
13
  "name": "model.mlmodel",
14
  "path": "com.apple.CoreML/model.mlmodel"
15
  }
16
  },
17
- "rootModelIdentifier": "F250FEE8-A027-4BAB-B7D1-630291725950"
18
  }
 
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
+ "CEC453DA-87EE-46E9-892F-6F776828BCD5": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Weights",
7
  "name": "weights",
8
  "path": "com.apple.CoreML/weights"
9
  },
10
+ "DB79E6F4-4356-4B37-88CF-F9BBC351598A": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Specification",
13
  "name": "model.mlmodel",
14
  "path": "com.apple.CoreML/model.mlmodel"
15
  }
16
  },
17
+ "rootModelIdentifier": "DB79E6F4-4356-4B37-88CF-F9BBC351598A"
18
  }
SortformerNvidiaLow.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a96be4debfe5612eb3f316a5d91baee98be511526a4f9f59974a2c2bcf5d323
3
  size 202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05d430bf8aa7b473ee252c1908e3571759a90b20755b892365bcab3a68dadba6
3
  size 202
SortformerNvidiaLow.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85ca72159bbd43a48615c8031f286453171e2de971532225cff51b0240096179
3
- size 1083
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e4a1a547d8c3a75c4e383ec9686ddd32b6bb62706c9fb5774646bcaa154e93a
3
+ size 1217
SortformerNvidiaLow.mlmodelc/metadata.json CHANGED
@@ -7,9 +7,9 @@
7
  "hasShapeFlexibility" : "0",
8
  "isOptional" : "0",
9
  "dataType" : "Float32",
10
- "formattedType" : "MultiArray (Float32 1 × 242 × 4)",
11
  "shortDescription" : "Combined speaker probabilities for the speaker cache, FIFO queue, and chunk",
12
- "shape" : "[1, 242, 4]",
13
  "name" : "speaker_preds",
14
  "type" : "MultiArray"
15
  },
@@ -18,7 +18,7 @@
18
  "isOptional" : "0",
19
  "dataType" : "Float32",
20
  "formattedType" : "MultiArray (Float32 1 × 14 × 512)",
21
- "shortDescription" : "Speaker embeddings for the new chunk",
22
  "shape" : "[1, 14, 512]",
23
  "name" : "chunk_pre_encoder_embs",
24
  "type" : "MultiArray"
@@ -32,6 +32,26 @@
32
  "shape" : "[1]",
33
  "name" : "chunk_pre_encoder_lengths",
34
  "type" : "MultiArray"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
  ],
37
  "version" : "2.1",
@@ -45,7 +65,7 @@
45
  "mlProgramOperationTypeHistogram" : {
46
  "Ios16.floorDiv" : 3,
47
  "Transpose" : 193,
48
- "Identity" : 2,
49
  "Ios16.softmax" : 35,
50
  "Ios16.gatherAlongAxis" : 1,
51
  "Split" : 17,
@@ -61,7 +81,7 @@
61
  "Pad" : 34,
62
  "ExpandDims" : 25,
63
  "Ios16.sub" : 6,
64
- "Ios16.cast" : 16,
65
  "Ios16.less" : 7,
66
  "Ios16.conv" : 56,
67
  "Ios16.relu" : 23,
@@ -143,9 +163,9 @@
143
  "hasShapeFlexibility" : "0",
144
  "isOptional" : "0",
145
  "dataType" : "Float32",
146
- "formattedType" : "MultiArray (Float32 1 × 40 × 512)",
147
  "shortDescription" : "First-In-First-Out speech queue",
148
- "shape" : "[1, 40, 512]",
149
  "name" : "fifo",
150
  "type" : "MultiArray"
151
  },
@@ -161,15 +181,15 @@
161
  }
162
  ],
163
  "userDefinedMetadata" : {
164
- "frame_duration" : "0.08",
165
- "spkcache_update_period" : "31",
166
  "chunk_len" : "6",
167
  "subsampling_factor" : "8",
168
- "chunk_right_context" : "7",
169
  "mel_feature_frames" : "48",
170
- "fifo_len" : "40",
 
171
  "chunk_left_context" : "1",
172
- "spkcache_len" : "188"
173
  },
174
  "generatedClassName" : "SortformerNvidiaLow",
175
  "method" : "predict"
 
7
  "hasShapeFlexibility" : "0",
8
  "isOptional" : "0",
9
  "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 390 × 4)",
11
  "shortDescription" : "Combined speaker probabilities for the speaker cache, FIFO queue, and chunk",
12
+ "shape" : "[1, 390, 4]",
13
  "name" : "speaker_preds",
14
  "type" : "MultiArray"
15
  },
 
18
  "isOptional" : "0",
19
  "dataType" : "Float32",
20
  "formattedType" : "MultiArray (Float32 1 × 14 × 512)",
21
+ "shortDescription" : "Audio embeddings for the new chunk",
22
  "shape" : "[1, 14, 512]",
23
  "name" : "chunk_pre_encoder_embs",
24
  "type" : "MultiArray"
 
32
  "shape" : "[1]",
33
  "name" : "chunk_pre_encoder_lengths",
34
  "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 1 × 390 × 192)",
41
+ "shortDescription" : "Speaker embeddings",
42
+ "shape" : "[1, 390, 192]",
43
+ "name" : "nest_encoder_embs",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Int32",
50
+ "formattedType" : "MultiArray (Int32 1)",
51
+ "shortDescription" : "Number of frames with speaker embeddings",
52
+ "shape" : "[1]",
53
+ "name" : "nest_encoder_lengths",
54
+ "type" : "MultiArray"
55
  }
56
  ],
57
  "version" : "2.1",
 
65
  "mlProgramOperationTypeHistogram" : {
66
  "Ios16.floorDiv" : 3,
67
  "Transpose" : 193,
68
+ "Identity" : 3,
69
  "Ios16.softmax" : 35,
70
  "Ios16.gatherAlongAxis" : 1,
71
  "Split" : 17,
 
81
  "Pad" : 34,
82
  "ExpandDims" : 25,
83
  "Ios16.sub" : 6,
84
+ "Ios16.cast" : 17,
85
  "Ios16.less" : 7,
86
  "Ios16.conv" : 56,
87
  "Ios16.relu" : 23,
 
163
  "hasShapeFlexibility" : "0",
164
  "isOptional" : "0",
165
  "dataType" : "Float32",
166
+ "formattedType" : "MultiArray (Float32 1 × 188 × 512)",
167
  "shortDescription" : "First-In-First-Out speech queue",
168
+ "shape" : "[1, 188, 512]",
169
  "name" : "fifo",
170
  "type" : "MultiArray"
171
  },
 
181
  }
182
  ],
183
  "userDefinedMetadata" : {
184
+ "spkcache_len" : "188",
185
+ "spkcache_update_period" : "144",
186
  "chunk_len" : "6",
187
  "subsampling_factor" : "8",
 
188
  "mel_feature_frames" : "48",
189
+ "chunk_right_context" : "7",
190
+ "fifo_len" : "188",
191
  "chunk_left_context" : "1",
192
+ "frame_duration" : "0.08"
193
  },
194
  "generatedClassName" : "SortformerNvidiaLow",
195
  "method" : "predict"
SortformerNvidiaLow.mlmodelc/model0/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c00973ffe17f5a5d536891804b1e9b7145990cc029a5c7bbfad39f942d83003
3
- size 632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c8b4cc493f20fb84b0695e5048ded26bced335faefc02fd0ec0ce90a94856b9
3
+ size 633
SortformerNvidiaLow.mlmodelc/model0/model.mil CHANGED
@@ -1,7 +1,7 @@
1
  program(1.0)
2
  [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.9.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
3
  {
4
- func main<ios16>(tensor<fp32, [1, 112, 128]> chunk, tensor<int32, [1]> chunk_lengths, tensor<fp32, [1, 40, 512]> fifo, tensor<int32, [1]> fifo_lengths, tensor<fp32, [1, 188, 512]> spkcache, tensor<int32, [1]> spkcache_lengths) {
5
  tensor<fp32, [256]> model_encoder_pre_encode_conv_0_bias = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_0_bias"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(64)))];
6
  tensor<fp32, [256, 1, 3, 3]> model_encoder_pre_encode_conv_0_weight = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_0_weight"), val = tensor<fp32, [256, 1, 3, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(1152)))];
7
  tensor<fp32, [256]> model_encoder_pre_encode_conv_2_bias = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_2_bias"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(10432)))];
@@ -157,45 +157,45 @@ program(1.0)
157
  tensor<fp32, [1, 14, 512]> chunk_pre_encoder_embs = linear(bias = model_encoder_pre_encode_out_bias, weight = model_encoder_pre_encode_out_weight, x = input)[name = tensor<string, []>("linear_0")];
158
  tensor<string, []> var_241_dtype_0 = const()[name = tensor<string, []>("op_241_dtype_0"), val = tensor<string, []>("int32")];
159
  tensor<int32, [1]> size0 = const()[name = tensor<string, []>("size0"), val = tensor<int32, [1]>([188])];
160
- tensor<int32, [1]> size1 = const()[name = tensor<string, []>("size1"), val = tensor<int32, [1]>([40])];
161
  tensor<int32, []> var_264 = const()[name = tensor<string, []>("op_264"), val = tensor<int32, []>(1)];
162
  tensor<bool, []> full_concat_interleave_0 = const()[name = tensor<string, []>("full_concat_interleave_0"), val = tensor<bool, []>(false)];
163
- tensor<fp32, [1, 242, 512]> full_concat = concat(axis = var_264, interleave = full_concat_interleave_0, values = (spkcache, fifo, chunk_pre_encoder_embs))[name = tensor<string, []>("full_concat")];
164
  tensor<int32, [1]> var_273 = add(x = spkcache_lengths, y = fifo_lengths)[name = tensor<string, []>("op_273")];
165
  tensor<int32, [1]> chunk_pre_encoder_lengths = cast(dtype = var_241_dtype_0, x = current_lengths)[name = tensor<string, []>("cast_4")];
166
  tensor<int32, [1]> pre_encoder_lengths = add(x = var_273, y = chunk_pre_encoder_lengths)[name = tensor<string, []>("total_length")];
167
- tensor<int32, [242]> out_pos = const()[name = tensor<string, []>("out_pos"), val = tensor<int32, [242]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241])];
168
- tensor<bool, [242]> var_284 = greater_equal(x = out_pos, y = spkcache_lengths)[name = tensor<string, []>("op_284")];
169
  tensor<string, []> in_seg1_or_2_dtype_0 = const()[name = tensor<string, []>("in_seg1_or_2_dtype_0"), val = tensor<string, []>("int32")];
170
- tensor<bool, [242]> var_290 = greater_equal(x = out_pos, y = var_273)[name = tensor<string, []>("op_290")];
171
  tensor<string, []> in_seg2_dtype_0 = const()[name = tensor<string, []>("in_seg2_dtype_0"), val = tensor<string, []>("int32")];
172
  tensor<int32, [1]> var_297 = sub(x = size0, y = spkcache_lengths)[name = tensor<string, []>("op_297")];
173
- tensor<int32, [242]> in_seg1_or_2 = cast(dtype = in_seg1_or_2_dtype_0, x = var_284)[name = tensor<string, []>("cast_3")];
174
- tensor<int32, [242]> var_298 = mul(x = in_seg1_or_2, y = var_297)[name = tensor<string, []>("op_298")];
175
  tensor<int32, [1]> var_300 = sub(x = size1, y = fifo_lengths)[name = tensor<string, []>("op_300")];
176
- tensor<int32, [242]> in_seg2 = cast(dtype = in_seg2_dtype_0, x = var_290)[name = tensor<string, []>("cast_2")];
177
- tensor<int32, [242]> var_301 = mul(x = in_seg2, y = var_300)[name = tensor<string, []>("op_301")];
178
- tensor<int32, [242]> offset = add(x = var_298, y = var_301)[name = tensor<string, []>("offset")];
179
- tensor<int32, [242]> var_305 = add(x = out_pos, y = offset)[name = tensor<string, []>("op_305")];
180
- tensor<int32, []> var_309 = const()[name = tensor<string, []>("op_309"), val = tensor<int32, []>(241)];
181
  tensor<int32, []> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, []>(0)];
182
- tensor<int32, [242]> minimum_0 = minimum(x = var_305, y = var_309)[name = tensor<string, []>("minimum_0")];
183
- tensor<int32, [242]> maximum_0 = maximum(x = minimum_0, y = var_310)[name = tensor<string, []>("maximum_0")];
184
  tensor<int32, [1]> var_313_axes_0 = const()[name = tensor<string, []>("op_313_axes_0"), val = tensor<int32, [1]>([0])];
185
- tensor<int32, [1, 242]> var_313 = expand_dims(axes = var_313_axes_0, x = maximum_0)[name = tensor<string, []>("op_313")];
186
  tensor<int32, [1]> var_315_axes_0 = const()[name = tensor<string, []>("op_315_axes_0"), val = tensor<int32, [1]>([-1])];
187
- tensor<int32, [1, 242, 1]> var_315 = expand_dims(axes = var_315_axes_0, x = var_313)[name = tensor<string, []>("op_315")];
188
  tensor<int32, [3]> gather_idx_reps_0 = const()[name = tensor<string, []>("gather_idx_reps_0"), val = tensor<int32, [3]>([1, 1, 512])];
189
- tensor<int32, [1, 242, 512]> gather_idx = tile(reps = gather_idx_reps_0, x = var_315)[name = tensor<string, []>("gather_idx")];
190
  tensor<int32, []> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, []>(1)];
191
- tensor<fp32, [1, 242, 512]> output = gather_along_axis(axis = var_320, indices = gather_idx, x = full_concat)[name = tensor<string, []>("output")];
192
- tensor<bool, [242]> var_323 = less(x = out_pos, y = pre_encoder_lengths)[name = tensor<string, []>("op_323")];
193
  tensor<string, []> var_328_dtype_0 = const()[name = tensor<string, []>("op_328_dtype_0"), val = tensor<string, []>("fp32")];
194
  tensor<int32, [1]> var_330_axes_0 = const()[name = tensor<string, []>("op_330_axes_0"), val = tensor<int32, [1]>([0])];
195
- tensor<fp32, [242]> var_328 = cast(dtype = var_328_dtype_0, x = var_323)[name = tensor<string, []>("cast_1")];
196
- tensor<fp32, [1, 242]> var_330 = expand_dims(axes = var_330_axes_0, x = var_328)[name = tensor<string, []>("op_330")];
197
  tensor<int32, [1]> var_332_axes_0 = const()[name = tensor<string, []>("op_332_axes_0"), val = tensor<int32, [1]>([-1])];
198
- tensor<fp32, [1, 242, 1]> var_332 = expand_dims(axes = var_332_axes_0, x = var_330)[name = tensor<string, []>("op_332")];
199
- tensor<fp32, [1, 242, 512]> pre_encoder_embs = mul(x = output, y = var_332)[name = tensor<string, []>("op_333")];
200
  } -> (pre_encoder_embs, pre_encoder_lengths, chunk_pre_encoder_embs, chunk_pre_encoder_lengths);
201
  }
 
1
  program(1.0)
2
  [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.9.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
3
  {
4
+ func main<ios16>(tensor<fp32, [1, 112, 128]> chunk, tensor<int32, [1]> chunk_lengths, tensor<fp32, [1, 188, 512]> fifo, tensor<int32, [1]> fifo_lengths, tensor<fp32, [1, 188, 512]> spkcache, tensor<int32, [1]> spkcache_lengths) {
5
  tensor<fp32, [256]> model_encoder_pre_encode_conv_0_bias = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_0_bias"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(64)))];
6
  tensor<fp32, [256, 1, 3, 3]> model_encoder_pre_encode_conv_0_weight = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_0_weight"), val = tensor<fp32, [256, 1, 3, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(1152)))];
7
  tensor<fp32, [256]> model_encoder_pre_encode_conv_2_bias = const()[name = tensor<string, []>("model_encoder_pre_encode_conv_2_bias"), val = tensor<fp32, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/0-weight.bin"), offset = tensor<uint64, []>(10432)))];
 
157
  tensor<fp32, [1, 14, 512]> chunk_pre_encoder_embs = linear(bias = model_encoder_pre_encode_out_bias, weight = model_encoder_pre_encode_out_weight, x = input)[name = tensor<string, []>("linear_0")];
158
  tensor<string, []> var_241_dtype_0 = const()[name = tensor<string, []>("op_241_dtype_0"), val = tensor<string, []>("int32")];
159
  tensor<int32, [1]> size0 = const()[name = tensor<string, []>("size0"), val = tensor<int32, [1]>([188])];
160
+ tensor<int32, [1]> size1 = const()[name = tensor<string, []>("size1"), val = tensor<int32, [1]>([188])];
161
  tensor<int32, []> var_264 = const()[name = tensor<string, []>("op_264"), val = tensor<int32, []>(1)];
162
  tensor<bool, []> full_concat_interleave_0 = const()[name = tensor<string, []>("full_concat_interleave_0"), val = tensor<bool, []>(false)];
163
+ tensor<fp32, [1, 390, 512]> full_concat = concat(axis = var_264, interleave = full_concat_interleave_0, values = (spkcache, fifo, chunk_pre_encoder_embs))[name = tensor<string, []>("full_concat")];
164
  tensor<int32, [1]> var_273 = add(x = spkcache_lengths, y = fifo_lengths)[name = tensor<string, []>("op_273")];
165
  tensor<int32, [1]> chunk_pre_encoder_lengths = cast(dtype = var_241_dtype_0, x = current_lengths)[name = tensor<string, []>("cast_4")];
166
  tensor<int32, [1]> pre_encoder_lengths = add(x = var_273, y = chunk_pre_encoder_lengths)[name = tensor<string, []>("total_length")];
167
+ tensor<int32, [390]> out_pos = const()[name = tensor<string, []>("out_pos"), val = tensor<int32, [390]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389])];
168
+ tensor<bool, [390]> var_284 = greater_equal(x = out_pos, y = spkcache_lengths)[name = tensor<string, []>("op_284")];
169
  tensor<string, []> in_seg1_or_2_dtype_0 = const()[name = tensor<string, []>("in_seg1_or_2_dtype_0"), val = tensor<string, []>("int32")];
170
+ tensor<bool, [390]> var_290 = greater_equal(x = out_pos, y = var_273)[name = tensor<string, []>("op_290")];
171
  tensor<string, []> in_seg2_dtype_0 = const()[name = tensor<string, []>("in_seg2_dtype_0"), val = tensor<string, []>("int32")];
172
  tensor<int32, [1]> var_297 = sub(x = size0, y = spkcache_lengths)[name = tensor<string, []>("op_297")];
173
+ tensor<int32, [390]> in_seg1_or_2 = cast(dtype = in_seg1_or_2_dtype_0, x = var_284)[name = tensor<string, []>("cast_3")];
174
+ tensor<int32, [390]> var_298 = mul(x = in_seg1_or_2, y = var_297)[name = tensor<string, []>("op_298")];
175
  tensor<int32, [1]> var_300 = sub(x = size1, y = fifo_lengths)[name = tensor<string, []>("op_300")];
176
+ tensor<int32, [390]> in_seg2 = cast(dtype = in_seg2_dtype_0, x = var_290)[name = tensor<string, []>("cast_2")];
177
+ tensor<int32, [390]> var_301 = mul(x = in_seg2, y = var_300)[name = tensor<string, []>("op_301")];
178
+ tensor<int32, [390]> offset = add(x = var_298, y = var_301)[name = tensor<string, []>("offset")];
179
+ tensor<int32, [390]> var_305 = add(x = out_pos, y = offset)[name = tensor<string, []>("op_305")];
180
+ tensor<int32, []> var_309 = const()[name = tensor<string, []>("op_309"), val = tensor<int32, []>(389)];
181
  tensor<int32, []> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, []>(0)];
182
+ tensor<int32, [390]> minimum_0 = minimum(x = var_305, y = var_309)[name = tensor<string, []>("minimum_0")];
183
+ tensor<int32, [390]> maximum_0 = maximum(x = minimum_0, y = var_310)[name = tensor<string, []>("maximum_0")];
184
  tensor<int32, [1]> var_313_axes_0 = const()[name = tensor<string, []>("op_313_axes_0"), val = tensor<int32, [1]>([0])];
185
+ tensor<int32, [1, 390]> var_313 = expand_dims(axes = var_313_axes_0, x = maximum_0)[name = tensor<string, []>("op_313")];
186
  tensor<int32, [1]> var_315_axes_0 = const()[name = tensor<string, []>("op_315_axes_0"), val = tensor<int32, [1]>([-1])];
187
+ tensor<int32, [1, 390, 1]> var_315 = expand_dims(axes = var_315_axes_0, x = var_313)[name = tensor<string, []>("op_315")];
188
  tensor<int32, [3]> gather_idx_reps_0 = const()[name = tensor<string, []>("gather_idx_reps_0"), val = tensor<int32, [3]>([1, 1, 512])];
189
+ tensor<int32, [1, 390, 512]> gather_idx = tile(reps = gather_idx_reps_0, x = var_315)[name = tensor<string, []>("gather_idx")];
190
  tensor<int32, []> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, []>(1)];
191
+ tensor<fp32, [1, 390, 512]> output = gather_along_axis(axis = var_320, indices = gather_idx, x = full_concat)[name = tensor<string, []>("output")];
192
+ tensor<bool, [390]> var_323 = less(x = out_pos, y = pre_encoder_lengths)[name = tensor<string, []>("op_323")];
193
  tensor<string, []> var_328_dtype_0 = const()[name = tensor<string, []>("op_328_dtype_0"), val = tensor<string, []>("fp32")];
194
  tensor<int32, [1]> var_330_axes_0 = const()[name = tensor<string, []>("op_330_axes_0"), val = tensor<int32, [1]>([0])];
195
+ tensor<fp32, [390]> var_328 = cast(dtype = var_328_dtype_0, x = var_323)[name = tensor<string, []>("cast_1")];
196
+ tensor<fp32, [1, 390]> var_330 = expand_dims(axes = var_330_axes_0, x = var_328)[name = tensor<string, []>("op_330")];
197
  tensor<int32, [1]> var_332_axes_0 = const()[name = tensor<string, []>("op_332_axes_0"), val = tensor<int32, [1]>([-1])];
198
+ tensor<fp32, [1, 390, 1]> var_332 = expand_dims(axes = var_332_axes_0, x = var_330)[name = tensor<string, []>("op_332")];
199
+ tensor<fp32, [1, 390, 512]> pre_encoder_embs = mul(x = output, y = var_332)[name = tensor<string, []>("op_333")];
200
  } -> (pre_encoder_embs, pre_encoder_lengths, chunk_pre_encoder_embs, chunk_pre_encoder_lengths);
201
  }
SortformerNvidiaLow.mlmodelc/model1/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5507ee0c17c75411804df4ddc48d11b87deb021bc531290e8c3817f0601afa04
3
- size 585
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b229c16fa358c45202e9ff8ac6185c79bc0493d107dad567dd2e4324c18b6245
3
+ size 656
SortformerNvidiaLow.mlmodelc/model1/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
SortformerNvidiaLow.mlmodelc/model1/weights/1-weight.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c85926af77684bce762b355a2b162df557d832444fbeb79ee195113a4bbf1db
3
- size 230428224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e98531d7e961c3e8c43f8f1266abd6bbc110e11c52cda4d75a506f7fab53f2d4
3
+ size 235580992
SortformerNvidiaLow.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d476ed462ba9341ddbc1eb0ef789bb8d97ca5cc11a46ba0d2b74d418f32434a
3
- size 762295
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ffc3fabdd04016403677dbb182996bb9802fa4fcafaea8742148385b1989bc9
3
+ size 857102
SortformerNvidiaLow.mlpackage/Data/com.apple.CoreML/weights/1-weight.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c85926af77684bce762b355a2b162df557d832444fbeb79ee195113a4bbf1db
3
- size 230428224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e98531d7e961c3e8c43f8f1266abd6bbc110e11c52cda4d75a506f7fab53f2d4
3
+ size 235580992
SortformerNvidiaLow.mlpackage/Manifest.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
- "1940BF88-E8D9-4C67-82FC-AABB8A032C1A": {
5
- "author": "com.apple.CoreML",
6
- "description": "CoreML Model Specification",
7
- "name": "model.mlmodel",
8
- "path": "com.apple.CoreML/model.mlmodel"
9
- },
10
- "84A637AA-9D30-4709-A1C1-1B2E5C8F0C09": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Weights",
13
  "name": "weights",
14
  "path": "com.apple.CoreML/weights"
 
 
 
 
 
 
15
  }
16
  },
17
- "rootModelIdentifier": "1940BF88-E8D9-4C67-82FC-AABB8A032C1A"
18
  }
 
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
+ "9FC923D1-4D56-4C41-9FDA-AF1A43ABEA24": {
 
 
 
 
 
 
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Weights",
7
  "name": "weights",
8
  "path": "com.apple.CoreML/weights"
9
+ },
10
+ "E5741981-7159-4A38-9EB7-F97029CE1B28": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
  }
16
  },
17
+ "rootModelIdentifier": "E5741981-7159-4A38-9EB7-F97029CE1B28"
18
  }