Add W8A16-multifunction SpeechDecoder (0.6b + 1.7b)

#1
qwen3_tts/speech_decoder/12hz-0.6b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf0a9a3a35b57b915b95b799666902aec9ddc59650bf63d61a2f3e37b0e766c
3
+ size 243
qwen3_tts/speech_decoder/12hz-0.6b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209c17e8cd64ae321ed68617d24fd86e7a6d48784ddf6e4688c091795ab2d1fa
3
+ size 896
qwen3_tts/speech_decoder/12hz-0.6b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Int32, Palettized (8 bits), UInt8)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 1920)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1, 1920]",
13
+ "name" : "audio",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 8192, 1, 1]",
23
+ "name" : "key_cache_updates",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 1)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 8192, 1, 1]",
33
+ "name" : "value_cache_updates",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1)",
41
+ "shortDescription" : "",
42
+ "shape" : "[1, 1024, 1, 1]",
43
+ "name" : "hidden_context_update",
44
+ "type" : "MultiArray"
45
+ }
46
+ ],
47
+ "modelParameters" : [
48
+
49
+ ],
50
+ "specificationVersion" : 9,
51
+ "functions" : [
52
+ {
53
+ "inputSchema" : [
54
+ {
55
+ "hasShapeFlexibility" : "0",
56
+ "isOptional" : "0",
57
+ "dataType" : "Int32",
58
+ "formattedType" : "MultiArray (Int32 1 × 16 × 4)",
59
+ "shortDescription" : "",
60
+ "shape" : "[1, 16, 4]",
61
+ "name" : "audio_codes",
62
+ "type" : "MultiArray"
63
+ },
64
+ {
65
+ "hasShapeFlexibility" : "0",
66
+ "isOptional" : "0",
67
+ "dataType" : "Int32",
68
+ "formattedType" : "MultiArray (Int32 1)",
69
+ "shortDescription" : "",
70
+ "shape" : "[1]",
71
+ "name" : "cache_length",
72
+ "type" : "MultiArray"
73
+ },
74
+ {
75
+ "hasShapeFlexibility" : "0",
76
+ "isOptional" : "0",
77
+ "dataType" : "Float16",
78
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
79
+ "shortDescription" : "",
80
+ "shape" : "[1, 8192, 1, 256]",
81
+ "name" : "key_cache",
82
+ "type" : "MultiArray"
83
+ },
84
+ {
85
+ "hasShapeFlexibility" : "0",
86
+ "isOptional" : "0",
87
+ "dataType" : "Float16",
88
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
89
+ "shortDescription" : "",
90
+ "shape" : "[1, 8192, 1, 256]",
91
+ "name" : "value_cache",
92
+ "type" : "MultiArray"
93
+ },
94
+ {
95
+ "hasShapeFlexibility" : "0",
96
+ "isOptional" : "0",
97
+ "dataType" : "Float16",
98
+ "formattedType" : "MultiArray (Float16 1 × 256)",
99
+ "shortDescription" : "",
100
+ "shape" : "[1, 256]",
101
+ "name" : "key_padding_mask",
102
+ "type" : "MultiArray"
103
+ },
104
+ {
105
+ "hasShapeFlexibility" : "0",
106
+ "isOptional" : "0",
107
+ "dataType" : "Float16",
108
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1)",
109
+ "shortDescription" : "",
110
+ "shape" : "[1, 1024, 1, 1]",
111
+ "name" : "hidden_context",
112
+ "type" : "MultiArray"
113
+ },
114
+ {
115
+ "hasShapeFlexibility" : "0",
116
+ "isOptional" : "0",
117
+ "dataType" : "Float16",
118
+ "formattedType" : "MultiArray (Float16 1 × 4 × 256)",
119
+ "shortDescription" : "",
120
+ "shape" : "[1, 4, 256]",
121
+ "name" : "kv_cache_update_mask",
122
+ "type" : "MultiArray"
123
+ },
124
+ {
125
+ "hasShapeFlexibility" : "0",
126
+ "isOptional" : "0",
127
+ "dataType" : "Float16",
128
+ "formattedType" : "MultiArray (Float16 1 × 4 × 256)",
129
+ "shortDescription" : "",
130
+ "shape" : "[1, 4, 256]",
131
+ "name" : "qk_mask",
132
+ "type" : "MultiArray"
133
+ }
134
+ ],
135
+ "computePrecision" : "Mixed (Float16, Float32, Int16, Int32, UInt16)",
136
+ "storagePrecision" : "Mixed (Float16, Int32, Palettized (8 bits), UInt8)",
137
+ "stateSchema" : [
138
+
139
+ ],
140
+ "outputSchema" : [
141
+ {
142
+ "hasShapeFlexibility" : "0",
143
+ "isOptional" : "0",
144
+ "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 7680)",
146
+ "shortDescription" : "",
147
+ "shape" : "[1, 1, 1, 7680]",
148
+ "name" : "audio",
149
+ "type" : "MultiArray"
150
+ },
151
+ {
152
+ "hasShapeFlexibility" : "0",
153
+ "isOptional" : "0",
154
+ "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 4)",
156
+ "shortDescription" : "",
157
+ "shape" : "[1, 8192, 1, 4]",
158
+ "name" : "key_cache_updates",
159
+ "type" : "MultiArray"
160
+ },
161
+ {
162
+ "hasShapeFlexibility" : "0",
163
+ "isOptional" : "0",
164
+ "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 4)",
166
+ "shortDescription" : "",
167
+ "shape" : "[1, 8192, 1, 4]",
168
+ "name" : "value_cache_updates",
169
+ "type" : "MultiArray"
170
+ },
171
+ {
172
+ "hasShapeFlexibility" : "0",
173
+ "isOptional" : "0",
174
+ "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 4)",
176
+ "shortDescription" : "",
177
+ "shape" : "[1, 1024, 1, 4]",
178
+ "name" : "hidden_context_update",
179
+ "type" : "MultiArray"
180
+ }
181
+ ],
182
+ "name" : "throughput",
183
+ "mlProgramOperationTypeHistogram" : {
184
+ "Ios18.expandDims" : 26,
185
+ "Ios18.mul" : 218,
186
+ "Ios18.softmax" : 8,
187
+ "Ios18.rsqrt" : 17,
188
+ "Ios18.matmul" : 32,
189
+ "Ios16.reduceMean" : 17,
190
+ "Split" : 3,
191
+ "Ios18.greaterEqual" : 1,
192
+ "Select" : 1,
193
+ "Ios18.sin" : 29,
194
+ "Ios18.gather" : 18,
195
+ "Ios18.add" : 141,
196
+ "Ios16.reduceSum" : 1,
197
+ "Ios18.layerNorm" : 2,
198
+ "Ios18.reshape" : 92,
199
+ "Pad" : 17,
200
+ "Ios18.constexprLutToDense" : 116,
201
+ "Ios18.conv" : 99,
202
+ "Ios18.concat" : 19,
203
+ "Ios18.transpose" : 62,
204
+ "Ios18.sub" : 1,
205
+ "Ios18.cast" : 19,
206
+ "Ios18.silu" : 8,
207
+ "Ios18.gelu" : 2,
208
+ "Ios18.clip" : 1,
209
+ "Ios18.sliceByIndex" : 35,
210
+ "Ios18.squeeze" : 18
211
+ }
212
+ },
213
+ {
214
+ "inputSchema" : [
215
+ {
216
+ "hasShapeFlexibility" : "0",
217
+ "isOptional" : "0",
218
+ "dataType" : "Int32",
219
+ "formattedType" : "MultiArray (Int32 1 × 16 × 1)",
220
+ "shortDescription" : "",
221
+ "shape" : "[1, 16, 1]",
222
+ "name" : "audio_codes",
223
+ "type" : "MultiArray"
224
+ },
225
+ {
226
+ "hasShapeFlexibility" : "0",
227
+ "isOptional" : "0",
228
+ "dataType" : "Int32",
229
+ "formattedType" : "MultiArray (Int32 1)",
230
+ "shortDescription" : "",
231
+ "shape" : "[1]",
232
+ "name" : "cache_length",
233
+ "type" : "MultiArray"
234
+ },
235
+ {
236
+ "hasShapeFlexibility" : "0",
237
+ "isOptional" : "0",
238
+ "dataType" : "Float16",
239
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
240
+ "shortDescription" : "",
241
+ "shape" : "[1, 8192, 1, 256]",
242
+ "name" : "key_cache",
243
+ "type" : "MultiArray"
244
+ },
245
+ {
246
+ "hasShapeFlexibility" : "0",
247
+ "isOptional" : "0",
248
+ "dataType" : "Float16",
249
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
250
+ "shortDescription" : "",
251
+ "shape" : "[1, 8192, 1, 256]",
252
+ "name" : "value_cache",
253
+ "type" : "MultiArray"
254
+ },
255
+ {
256
+ "hasShapeFlexibility" : "0",
257
+ "isOptional" : "0",
258
+ "dataType" : "Float16",
259
+ "formattedType" : "MultiArray (Float16 1 × 256)",
260
+ "shortDescription" : "",
261
+ "shape" : "[1, 256]",
262
+ "name" : "key_padding_mask",
263
+ "type" : "MultiArray"
264
+ },
265
+ {
266
+ "hasShapeFlexibility" : "0",
267
+ "isOptional" : "0",
268
+ "dataType" : "Float16",
269
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 4)",
270
+ "shortDescription" : "",
271
+ "shape" : "[1, 1024, 1, 4]",
272
+ "name" : "hidden_context",
273
+ "type" : "MultiArray"
274
+ },
275
+ {
276
+ "hasShapeFlexibility" : "0",
277
+ "isOptional" : "0",
278
+ "dataType" : "Float16",
279
+ "formattedType" : "MultiArray (Float16 1 × 1 × 256)",
280
+ "shortDescription" : "",
281
+ "shape" : "[1, 1, 256]",
282
+ "name" : "kv_cache_update_mask",
283
+ "type" : "MultiArray"
284
+ }
285
+ ],
286
+ "computePrecision" : "Mixed (Float16, Float32, Int16, Int32, UInt16)",
287
+ "storagePrecision" : "Mixed (Float16, Int32, Palettized (8 bits), UInt8)",
288
+ "stateSchema" : [
289
+
290
+ ],
291
+ "outputSchema" : [
292
+ {
293
+ "hasShapeFlexibility" : "0",
294
+ "isOptional" : "0",
295
+ "dataType" : "Float16",
296
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 1920)",
297
+ "shortDescription" : "",
298
+ "shape" : "[1, 1, 1, 1920]",
299
+ "name" : "audio",
300
+ "type" : "MultiArray"
301
+ },
302
+ {
303
+ "hasShapeFlexibility" : "0",
304
+ "isOptional" : "0",
305
+ "dataType" : "Float16",
306
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 1)",
307
+ "shortDescription" : "",
308
+ "shape" : "[1, 8192, 1, 1]",
309
+ "name" : "key_cache_updates",
310
+ "type" : "MultiArray"
311
+ },
312
+ {
313
+ "hasShapeFlexibility" : "0",
314
+ "isOptional" : "0",
315
+ "dataType" : "Float16",
316
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 1)",
317
+ "shortDescription" : "",
318
+ "shape" : "[1, 8192, 1, 1]",
319
+ "name" : "value_cache_updates",
320
+ "type" : "MultiArray"
321
+ },
322
+ {
323
+ "hasShapeFlexibility" : "0",
324
+ "isOptional" : "0",
325
+ "dataType" : "Float16",
326
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1)",
327
+ "shortDescription" : "",
328
+ "shape" : "[1, 1024, 1, 1]",
329
+ "name" : "hidden_context_update",
330
+ "type" : "MultiArray"
331
+ }
332
+ ],
333
+ "name" : "latency",
334
+ "mlProgramOperationTypeHistogram" : {
335
+ "Ios18.expandDims" : 25,
336
+ "Ios18.mul" : 218,
337
+ "Ios18.softmax" : 8,
338
+ "Ios18.rsqrt" : 17,
339
+ "Ios18.matmul" : 32,
340
+ "Ios16.reduceMean" : 17,
341
+ "Split" : 3,
342
+ "Ios18.greaterEqual" : 1,
343
+ "Select" : 1,
344
+ "Ios18.sin" : 29,
345
+ "Ios18.gather" : 18,
346
+ "Ios18.add" : 132,
347
+ "Ios16.reduceSum" : 1,
348
+ "Ios18.layerNorm" : 2,
349
+ "Ios18.reshape" : 92,
350
+ "Pad" : 17,
351
+ "Ios18.constexprLutToDense" : 116,
352
+ "Ios18.conv" : 99,
353
+ "Ios18.concat" : 19,
354
+ "Ios18.transpose" : 62,
355
+ "Ios18.sub" : 1,
356
+ "Ios18.cast" : 19,
357
+ "Ios18.silu" : 8,
358
+ "Ios18.gelu" : 2,
359
+ "Ios18.clip" : 1,
360
+ "Ios18.sliceByIndex" : 35,
361
+ "Ios18.squeeze" : 18
362
+ }
363
+ }
364
+ ],
365
+ "mlProgramOperationTypeHistogram" : {
366
+ "Ios18.expandDims" : 25,
367
+ "Ios18.mul" : 218,
368
+ "Ios18.softmax" : 8,
369
+ "Ios18.rsqrt" : 17,
370
+ "Ios18.matmul" : 32,
371
+ "Ios16.reduceMean" : 17,
372
+ "Split" : 3,
373
+ "Ios18.greaterEqual" : 1,
374
+ "Select" : 1,
375
+ "Ios18.sin" : 29,
376
+ "Ios18.gather" : 18,
377
+ "Ios18.add" : 132,
378
+ "Ios16.reduceSum" : 1,
379
+ "Ios18.layerNorm" : 2,
380
+ "Ios18.reshape" : 92,
381
+ "Pad" : 17,
382
+ "Ios18.constexprLutToDense" : 116,
383
+ "Ios18.conv" : 99,
384
+ "Ios18.concat" : 19,
385
+ "Ios18.transpose" : 62,
386
+ "Ios18.sub" : 1,
387
+ "Ios18.cast" : 19,
388
+ "Ios18.silu" : 8,
389
+ "Ios18.gelu" : 2,
390
+ "Ios18.clip" : 1,
391
+ "Ios18.sliceByIndex" : 35,
392
+ "Ios18.squeeze" : 18
393
+ },
394
+ "isUpdatable" : "0",
395
+ "stateSchema" : [
396
+
397
+ ],
398
+ "availability" : {
399
+ "macOS" : "15.0",
400
+ "tvOS" : "18.0",
401
+ "visionOS" : "2.0",
402
+ "watchOS" : "11.0",
403
+ "iOS" : "18.0",
404
+ "macCatalyst" : "18.0"
405
+ },
406
+ "computePrecision" : "Mixed (Float16, Float32, Int16, Int32, UInt16)",
407
+ "modelType" : {
408
+ "name" : "MLModelType_mlProgram"
409
+ },
410
+ "inputSchema" : [
411
+ {
412
+ "hasShapeFlexibility" : "0",
413
+ "isOptional" : "0",
414
+ "dataType" : "Int32",
415
+ "formattedType" : "MultiArray (Int32 1 × 16 × 1)",
416
+ "shortDescription" : "",
417
+ "shape" : "[1, 16, 1]",
418
+ "name" : "audio_codes",
419
+ "type" : "MultiArray"
420
+ },
421
+ {
422
+ "hasShapeFlexibility" : "0",
423
+ "isOptional" : "0",
424
+ "dataType" : "Int32",
425
+ "formattedType" : "MultiArray (Int32 1)",
426
+ "shortDescription" : "",
427
+ "shape" : "[1]",
428
+ "name" : "cache_length",
429
+ "type" : "MultiArray"
430
+ },
431
+ {
432
+ "hasShapeFlexibility" : "0",
433
+ "isOptional" : "0",
434
+ "dataType" : "Float16",
435
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
436
+ "shortDescription" : "",
437
+ "shape" : "[1, 8192, 1, 256]",
438
+ "name" : "key_cache",
439
+ "type" : "MultiArray"
440
+ },
441
+ {
442
+ "hasShapeFlexibility" : "0",
443
+ "isOptional" : "0",
444
+ "dataType" : "Float16",
445
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
446
+ "shortDescription" : "",
447
+ "shape" : "[1, 8192, 1, 256]",
448
+ "name" : "value_cache",
449
+ "type" : "MultiArray"
450
+ },
451
+ {
452
+ "hasShapeFlexibility" : "0",
453
+ "isOptional" : "0",
454
+ "dataType" : "Float16",
455
+ "formattedType" : "MultiArray (Float16 1 × 256)",
456
+ "shortDescription" : "",
457
+ "shape" : "[1, 256]",
458
+ "name" : "key_padding_mask",
459
+ "type" : "MultiArray"
460
+ },
461
+ {
462
+ "hasShapeFlexibility" : "0",
463
+ "isOptional" : "0",
464
+ "dataType" : "Float16",
465
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 4)",
466
+ "shortDescription" : "",
467
+ "shape" : "[1, 1024, 1, 4]",
468
+ "name" : "hidden_context",
469
+ "type" : "MultiArray"
470
+ },
471
+ {
472
+ "hasShapeFlexibility" : "0",
473
+ "isOptional" : "0",
474
+ "dataType" : "Float16",
475
+ "formattedType" : "MultiArray (Float16 1 × 1 × 256)",
476
+ "shortDescription" : "",
477
+ "shape" : "[1, 1, 256]",
478
+ "name" : "kv_cache_update_mask",
479
+ "type" : "MultiArray"
480
+ }
481
+ ],
482
+ "defaultFunctionName" : "latency",
483
+ "generatedClassName" : "SpeechDecoder",
484
+ "userDefinedMetadata" : {
485
+
486
+ },
487
+ "method" : "predict"
488
+ }
489
+ ]
qwen3_tts/speech_decoder/12hz-0.6b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
qwen3_tts/speech_decoder/12hz-0.6b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc780657305201c968c11456f851d6cbdbf6b477664cc0d92cf09df4a1bb2a3
3
+ size 114238784
qwen3_tts/speech_decoder/12hz-1.7b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf0a9a3a35b57b915b95b799666902aec9ddc59650bf63d61a2f3e37b0e766c
3
+ size 243
qwen3_tts/speech_decoder/12hz-1.7b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209c17e8cd64ae321ed68617d24fd86e7a6d48784ddf6e4688c091795ab2d1fa
3
+ size 896
qwen3_tts/speech_decoder/12hz-1.7b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Int32, Palettized (8 bits), UInt8)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 1920)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1, 1920]",
13
+ "name" : "audio",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 8192, 1, 1]",
23
+ "name" : "key_cache_updates",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 1)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 8192, 1, 1]",
33
+ "name" : "value_cache_updates",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1)",
41
+ "shortDescription" : "",
42
+ "shape" : "[1, 1024, 1, 1]",
43
+ "name" : "hidden_context_update",
44
+ "type" : "MultiArray"
45
+ }
46
+ ],
47
+ "modelParameters" : [
48
+
49
+ ],
50
+ "specificationVersion" : 9,
51
+ "functions" : [
52
+ {
53
+ "inputSchema" : [
54
+ {
55
+ "hasShapeFlexibility" : "0",
56
+ "isOptional" : "0",
57
+ "dataType" : "Int32",
58
+ "formattedType" : "MultiArray (Int32 1 × 16 × 4)",
59
+ "shortDescription" : "",
60
+ "shape" : "[1, 16, 4]",
61
+ "name" : "audio_codes",
62
+ "type" : "MultiArray"
63
+ },
64
+ {
65
+ "hasShapeFlexibility" : "0",
66
+ "isOptional" : "0",
67
+ "dataType" : "Int32",
68
+ "formattedType" : "MultiArray (Int32 1)",
69
+ "shortDescription" : "",
70
+ "shape" : "[1]",
71
+ "name" : "cache_length",
72
+ "type" : "MultiArray"
73
+ },
74
+ {
75
+ "hasShapeFlexibility" : "0",
76
+ "isOptional" : "0",
77
+ "dataType" : "Float16",
78
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
79
+ "shortDescription" : "",
80
+ "shape" : "[1, 8192, 1, 256]",
81
+ "name" : "key_cache",
82
+ "type" : "MultiArray"
83
+ },
84
+ {
85
+ "hasShapeFlexibility" : "0",
86
+ "isOptional" : "0",
87
+ "dataType" : "Float16",
88
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
89
+ "shortDescription" : "",
90
+ "shape" : "[1, 8192, 1, 256]",
91
+ "name" : "value_cache",
92
+ "type" : "MultiArray"
93
+ },
94
+ {
95
+ "hasShapeFlexibility" : "0",
96
+ "isOptional" : "0",
97
+ "dataType" : "Float16",
98
+ "formattedType" : "MultiArray (Float16 1 × 256)",
99
+ "shortDescription" : "",
100
+ "shape" : "[1, 256]",
101
+ "name" : "key_padding_mask",
102
+ "type" : "MultiArray"
103
+ },
104
+ {
105
+ "hasShapeFlexibility" : "0",
106
+ "isOptional" : "0",
107
+ "dataType" : "Float16",
108
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1)",
109
+ "shortDescription" : "",
110
+ "shape" : "[1, 1024, 1, 1]",
111
+ "name" : "hidden_context",
112
+ "type" : "MultiArray"
113
+ },
114
+ {
115
+ "hasShapeFlexibility" : "0",
116
+ "isOptional" : "0",
117
+ "dataType" : "Float16",
118
+ "formattedType" : "MultiArray (Float16 1 × 4 × 256)",
119
+ "shortDescription" : "",
120
+ "shape" : "[1, 4, 256]",
121
+ "name" : "kv_cache_update_mask",
122
+ "type" : "MultiArray"
123
+ },
124
+ {
125
+ "hasShapeFlexibility" : "0",
126
+ "isOptional" : "0",
127
+ "dataType" : "Float16",
128
+ "formattedType" : "MultiArray (Float16 1 × 4 × 256)",
129
+ "shortDescription" : "",
130
+ "shape" : "[1, 4, 256]",
131
+ "name" : "qk_mask",
132
+ "type" : "MultiArray"
133
+ }
134
+ ],
135
+ "computePrecision" : "Mixed (Float16, Float32, Int16, Int32, UInt16)",
136
+ "storagePrecision" : "Mixed (Float16, Int32, Palettized (8 bits), UInt8)",
137
+ "stateSchema" : [
138
+
139
+ ],
140
+ "outputSchema" : [
141
+ {
142
+ "hasShapeFlexibility" : "0",
143
+ "isOptional" : "0",
144
+ "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 7680)",
146
+ "shortDescription" : "",
147
+ "shape" : "[1, 1, 1, 7680]",
148
+ "name" : "audio",
149
+ "type" : "MultiArray"
150
+ },
151
+ {
152
+ "hasShapeFlexibility" : "0",
153
+ "isOptional" : "0",
154
+ "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 4)",
156
+ "shortDescription" : "",
157
+ "shape" : "[1, 8192, 1, 4]",
158
+ "name" : "key_cache_updates",
159
+ "type" : "MultiArray"
160
+ },
161
+ {
162
+ "hasShapeFlexibility" : "0",
163
+ "isOptional" : "0",
164
+ "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 4)",
166
+ "shortDescription" : "",
167
+ "shape" : "[1, 8192, 1, 4]",
168
+ "name" : "value_cache_updates",
169
+ "type" : "MultiArray"
170
+ },
171
+ {
172
+ "hasShapeFlexibility" : "0",
173
+ "isOptional" : "0",
174
+ "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 4)",
176
+ "shortDescription" : "",
177
+ "shape" : "[1, 1024, 1, 4]",
178
+ "name" : "hidden_context_update",
179
+ "type" : "MultiArray"
180
+ }
181
+ ],
182
+ "name" : "throughput",
183
+ "mlProgramOperationTypeHistogram" : {
184
+ "Ios18.expandDims" : 26,
185
+ "Ios18.mul" : 218,
186
+ "Ios18.softmax" : 8,
187
+ "Ios18.rsqrt" : 17,
188
+ "Ios18.matmul" : 32,
189
+ "Ios16.reduceMean" : 17,
190
+ "Split" : 3,
191
+ "Ios18.greaterEqual" : 1,
192
+ "Select" : 1,
193
+ "Ios18.sin" : 29,
194
+ "Ios18.gather" : 18,
195
+ "Ios18.add" : 141,
196
+ "Ios16.reduceSum" : 1,
197
+ "Ios18.layerNorm" : 2,
198
+ "Ios18.reshape" : 92,
199
+ "Pad" : 17,
200
+ "Ios18.constexprLutToDense" : 116,
201
+ "Ios18.conv" : 99,
202
+ "Ios18.concat" : 19,
203
+ "Ios18.transpose" : 62,
204
+ "Ios18.sub" : 1,
205
+ "Ios18.cast" : 19,
206
+ "Ios18.silu" : 8,
207
+ "Ios18.gelu" : 2,
208
+ "Ios18.clip" : 1,
209
+ "Ios18.sliceByIndex" : 35,
210
+ "Ios18.squeeze" : 18
211
+ }
212
+ },
213
+ {
214
+ "inputSchema" : [
215
+ {
216
+ "hasShapeFlexibility" : "0",
217
+ "isOptional" : "0",
218
+ "dataType" : "Int32",
219
+ "formattedType" : "MultiArray (Int32 1 × 16 × 1)",
220
+ "shortDescription" : "",
221
+ "shape" : "[1, 16, 1]",
222
+ "name" : "audio_codes",
223
+ "type" : "MultiArray"
224
+ },
225
+ {
226
+ "hasShapeFlexibility" : "0",
227
+ "isOptional" : "0",
228
+ "dataType" : "Int32",
229
+ "formattedType" : "MultiArray (Int32 1)",
230
+ "shortDescription" : "",
231
+ "shape" : "[1]",
232
+ "name" : "cache_length",
233
+ "type" : "MultiArray"
234
+ },
235
+ {
236
+ "hasShapeFlexibility" : "0",
237
+ "isOptional" : "0",
238
+ "dataType" : "Float16",
239
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
240
+ "shortDescription" : "",
241
+ "shape" : "[1, 8192, 1, 256]",
242
+ "name" : "key_cache",
243
+ "type" : "MultiArray"
244
+ },
245
+ {
246
+ "hasShapeFlexibility" : "0",
247
+ "isOptional" : "0",
248
+ "dataType" : "Float16",
249
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
250
+ "shortDescription" : "",
251
+ "shape" : "[1, 8192, 1, 256]",
252
+ "name" : "value_cache",
253
+ "type" : "MultiArray"
254
+ },
255
+ {
256
+ "hasShapeFlexibility" : "0",
257
+ "isOptional" : "0",
258
+ "dataType" : "Float16",
259
+ "formattedType" : "MultiArray (Float16 1 × 256)",
260
+ "shortDescription" : "",
261
+ "shape" : "[1, 256]",
262
+ "name" : "key_padding_mask",
263
+ "type" : "MultiArray"
264
+ },
265
+ {
266
+ "hasShapeFlexibility" : "0",
267
+ "isOptional" : "0",
268
+ "dataType" : "Float16",
269
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 4)",
270
+ "shortDescription" : "",
271
+ "shape" : "[1, 1024, 1, 4]",
272
+ "name" : "hidden_context",
273
+ "type" : "MultiArray"
274
+ },
275
+ {
276
+ "hasShapeFlexibility" : "0",
277
+ "isOptional" : "0",
278
+ "dataType" : "Float16",
279
+ "formattedType" : "MultiArray (Float16 1 × 1 × 256)",
280
+ "shortDescription" : "",
281
+ "shape" : "[1, 1, 256]",
282
+ "name" : "kv_cache_update_mask",
283
+ "type" : "MultiArray"
284
+ }
285
+ ],
286
+ "computePrecision" : "Mixed (Float16, Float32, Int16, Int32, UInt16)",
287
+ "storagePrecision" : "Mixed (Float16, Int32, Palettized (8 bits), UInt8)",
288
+ "stateSchema" : [
289
+
290
+ ],
291
+ "outputSchema" : [
292
+ {
293
+ "hasShapeFlexibility" : "0",
294
+ "isOptional" : "0",
295
+ "dataType" : "Float16",
296
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 1920)",
297
+ "shortDescription" : "",
298
+ "shape" : "[1, 1, 1, 1920]",
299
+ "name" : "audio",
300
+ "type" : "MultiArray"
301
+ },
302
+ {
303
+ "hasShapeFlexibility" : "0",
304
+ "isOptional" : "0",
305
+ "dataType" : "Float16",
306
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 1)",
307
+ "shortDescription" : "",
308
+ "shape" : "[1, 8192, 1, 1]",
309
+ "name" : "key_cache_updates",
310
+ "type" : "MultiArray"
311
+ },
312
+ {
313
+ "hasShapeFlexibility" : "0",
314
+ "isOptional" : "0",
315
+ "dataType" : "Float16",
316
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 1)",
317
+ "shortDescription" : "",
318
+ "shape" : "[1, 8192, 1, 1]",
319
+ "name" : "value_cache_updates",
320
+ "type" : "MultiArray"
321
+ },
322
+ {
323
+ "hasShapeFlexibility" : "0",
324
+ "isOptional" : "0",
325
+ "dataType" : "Float16",
326
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 1)",
327
+ "shortDescription" : "",
328
+ "shape" : "[1, 1024, 1, 1]",
329
+ "name" : "hidden_context_update",
330
+ "type" : "MultiArray"
331
+ }
332
+ ],
333
+ "name" : "latency",
334
+ "mlProgramOperationTypeHistogram" : {
335
+ "Ios18.expandDims" : 25,
336
+ "Ios18.mul" : 218,
337
+ "Ios18.softmax" : 8,
338
+ "Ios18.rsqrt" : 17,
339
+ "Ios18.matmul" : 32,
340
+ "Ios16.reduceMean" : 17,
341
+ "Split" : 3,
342
+ "Ios18.greaterEqual" : 1,
343
+ "Select" : 1,
344
+ "Ios18.sin" : 29,
345
+ "Ios18.gather" : 18,
346
+ "Ios18.add" : 132,
347
+ "Ios16.reduceSum" : 1,
348
+ "Ios18.layerNorm" : 2,
349
+ "Ios18.reshape" : 92,
350
+ "Pad" : 17,
351
+ "Ios18.constexprLutToDense" : 116,
352
+ "Ios18.conv" : 99,
353
+ "Ios18.concat" : 19,
354
+ "Ios18.transpose" : 62,
355
+ "Ios18.sub" : 1,
356
+ "Ios18.cast" : 19,
357
+ "Ios18.silu" : 8,
358
+ "Ios18.gelu" : 2,
359
+ "Ios18.clip" : 1,
360
+ "Ios18.sliceByIndex" : 35,
361
+ "Ios18.squeeze" : 18
362
+ }
363
+ }
364
+ ],
365
+ "mlProgramOperationTypeHistogram" : {
366
+ "Ios18.expandDims" : 25,
367
+ "Ios18.mul" : 218,
368
+ "Ios18.softmax" : 8,
369
+ "Ios18.rsqrt" : 17,
370
+ "Ios18.matmul" : 32,
371
+ "Ios16.reduceMean" : 17,
372
+ "Split" : 3,
373
+ "Ios18.greaterEqual" : 1,
374
+ "Select" : 1,
375
+ "Ios18.sin" : 29,
376
+ "Ios18.gather" : 18,
377
+ "Ios18.add" : 132,
378
+ "Ios16.reduceSum" : 1,
379
+ "Ios18.layerNorm" : 2,
380
+ "Ios18.reshape" : 92,
381
+ "Pad" : 17,
382
+ "Ios18.constexprLutToDense" : 116,
383
+ "Ios18.conv" : 99,
384
+ "Ios18.concat" : 19,
385
+ "Ios18.transpose" : 62,
386
+ "Ios18.sub" : 1,
387
+ "Ios18.cast" : 19,
388
+ "Ios18.silu" : 8,
389
+ "Ios18.gelu" : 2,
390
+ "Ios18.clip" : 1,
391
+ "Ios18.sliceByIndex" : 35,
392
+ "Ios18.squeeze" : 18
393
+ },
394
+ "isUpdatable" : "0",
395
+ "stateSchema" : [
396
+
397
+ ],
398
+ "availability" : {
399
+ "macOS" : "15.0",
400
+ "tvOS" : "18.0",
401
+ "visionOS" : "2.0",
402
+ "watchOS" : "11.0",
403
+ "iOS" : "18.0",
404
+ "macCatalyst" : "18.0"
405
+ },
406
+ "computePrecision" : "Mixed (Float16, Float32, Int16, Int32, UInt16)",
407
+ "modelType" : {
408
+ "name" : "MLModelType_mlProgram"
409
+ },
410
+ "inputSchema" : [
411
+ {
412
+ "hasShapeFlexibility" : "0",
413
+ "isOptional" : "0",
414
+ "dataType" : "Int32",
415
+ "formattedType" : "MultiArray (Int32 1 × 16 × 1)",
416
+ "shortDescription" : "",
417
+ "shape" : "[1, 16, 1]",
418
+ "name" : "audio_codes",
419
+ "type" : "MultiArray"
420
+ },
421
+ {
422
+ "hasShapeFlexibility" : "0",
423
+ "isOptional" : "0",
424
+ "dataType" : "Int32",
425
+ "formattedType" : "MultiArray (Int32 1)",
426
+ "shortDescription" : "",
427
+ "shape" : "[1]",
428
+ "name" : "cache_length",
429
+ "type" : "MultiArray"
430
+ },
431
+ {
432
+ "hasShapeFlexibility" : "0",
433
+ "isOptional" : "0",
434
+ "dataType" : "Float16",
435
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
436
+ "shortDescription" : "",
437
+ "shape" : "[1, 8192, 1, 256]",
438
+ "name" : "key_cache",
439
+ "type" : "MultiArray"
440
+ },
441
+ {
442
+ "hasShapeFlexibility" : "0",
443
+ "isOptional" : "0",
444
+ "dataType" : "Float16",
445
+ "formattedType" : "MultiArray (Float16 1 × 8192 × 1 × 256)",
446
+ "shortDescription" : "",
447
+ "shape" : "[1, 8192, 1, 256]",
448
+ "name" : "value_cache",
449
+ "type" : "MultiArray"
450
+ },
451
+ {
452
+ "hasShapeFlexibility" : "0",
453
+ "isOptional" : "0",
454
+ "dataType" : "Float16",
455
+ "formattedType" : "MultiArray (Float16 1 × 256)",
456
+ "shortDescription" : "",
457
+ "shape" : "[1, 256]",
458
+ "name" : "key_padding_mask",
459
+ "type" : "MultiArray"
460
+ },
461
+ {
462
+ "hasShapeFlexibility" : "0",
463
+ "isOptional" : "0",
464
+ "dataType" : "Float16",
465
+ "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 4)",
466
+ "shortDescription" : "",
467
+ "shape" : "[1, 1024, 1, 4]",
468
+ "name" : "hidden_context",
469
+ "type" : "MultiArray"
470
+ },
471
+ {
472
+ "hasShapeFlexibility" : "0",
473
+ "isOptional" : "0",
474
+ "dataType" : "Float16",
475
+ "formattedType" : "MultiArray (Float16 1 × 1 × 256)",
476
+ "shortDescription" : "",
477
+ "shape" : "[1, 1, 256]",
478
+ "name" : "kv_cache_update_mask",
479
+ "type" : "MultiArray"
480
+ }
481
+ ],
482
+ "defaultFunctionName" : "latency",
483
+ "generatedClassName" : "SpeechDecoder",
484
+ "userDefinedMetadata" : {
485
+
486
+ },
487
+ "method" : "predict"
488
+ }
489
+ ]
qwen3_tts/speech_decoder/12hz-1.7b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
qwen3_tts/speech_decoder/12hz-1.7b-customvoice/W8A16-multifunction/SpeechDecoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc780657305201c968c11456f851d6cbdbf6b477664cc0d92cf09df4a1bb2a3
3
+ size 114238784