aufklarer commited on
Commit
1cbf43b
·
verified ·
1 Parent(s): 6951fad

Add text decoder models (embedding + decoder) for full CoreML pipeline

Browse files
config.json CHANGED
@@ -1,35 +1,19 @@
1
  {
2
- "model_type": "qwen3-asr-encoder-coreml",
3
  "source_model": "Qwen/Qwen3-ASR-0.6B",
4
- "num_mel_bins": 128,
5
- "sample_rate": 16000,
6
- "hop_length": 160,
7
- "encoder_hidden": 896,
8
- "encoder_layers": 18,
9
- "encoder_heads": 14,
10
- "encoder_ffn": 3584,
11
- "output_dim": 1024,
12
- "conv_stride": 8,
13
- "enumerated_mel_lengths": [
14
- 100,
15
- 200,
16
- 400,
17
- 600,
18
- 800,
19
- 1000,
20
- 1500,
21
- 2000,
22
- 3000
23
- ],
24
- "variants": {
25
- "int8": {
26
- "file": "encoder_int8.mlpackage",
27
- "quantization": "int8_palettize"
28
- },
29
- "int4": {
30
- "file": "encoder_int4.mlpackage",
31
- "quantization": "int4_palettize"
32
- }
33
- },
34
- "default_variant": "int8"
35
  }
 
1
  {
2
+ "model_type": "qwen3-asr-decoder-coreml",
3
  "source_model": "Qwen/Qwen3-ASR-0.6B",
4
+ "max_seq_length": 1024,
5
+ "hidden_size": 1024,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "num_kv_heads": 8,
9
+ "head_dim": 128,
10
+ "intermediate_size": 3072,
11
+ "vocab_size": 151936,
12
+ "rms_norm_eps": 1e-06,
13
+ "rope_theta": 1000000.0,
14
+ "quantization": "int8_palettize",
15
+ "files": {
16
+ "embedding": "embedding.mlpackage",
17
+ "decoder": "decoder.mlpackage"
18
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
decoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a79916489c7f48e36f27bba2b338a78f5dcb33136a24523b42cfb7d9bb80643e
3
+ size 243
decoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef85d0192c459be0254385c22fd7d67fad251cf7ff1654128482b312916d57c0
3
+ size 2209
decoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (8 bits), UInt8)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 151936)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 151936]",
13
+ "name" : "logits",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 9,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios18.expandDims" : 56,
23
+ "Ios18.mul" : 619,
24
+ "Ios18.cos" : 1,
25
+ "Ios18.softmax" : 28,
26
+ "Ios18.rsqrt" : 113,
27
+ "Ios18.matmul" : 56,
28
+ "Ios16.reduceMean" : 113,
29
+ "Ios18.sin" : 1,
30
+ "Ios18.readState" : 168,
31
+ "Tile" : 56,
32
+ "Ios18.add" : 309,
33
+ "Ios18.writeState" : 112,
34
+ "Ios18.reshape" : 171,
35
+ "Ios18.constexprLutToDense" : 197,
36
+ "Ios18.linear" : 197,
37
+ "Ios18.concat" : 56,
38
+ "Ios18.transpose" : 112,
39
+ "OneHot" : 1,
40
+ "Ios18.sub" : 57,
41
+ "Ios18.pow" : 113,
42
+ "Ios18.cast" : 4,
43
+ "Ios18.silu" : 28,
44
+ "Ios18.sliceByIndex" : 112
45
+ },
46
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
47
+ "isUpdatable" : "0",
48
+ "stateSchema" : [
49
+ {
50
+ "dataType" : "Float16",
51
+ "isOptional" : "0",
52
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
53
+ "shortDescription" : "",
54
+ "shape" : "[1, 8, 1024, 128]",
55
+ "name" : "k_cache_0",
56
+ "type" : "State"
57
+ },
58
+ {
59
+ "dataType" : "Float16",
60
+ "isOptional" : "0",
61
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
62
+ "shortDescription" : "",
63
+ "shape" : "[1, 8, 1024, 128]",
64
+ "name" : "v_cache_0",
65
+ "type" : "State"
66
+ },
67
+ {
68
+ "dataType" : "Float16",
69
+ "isOptional" : "0",
70
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 8, 1024, 128]",
73
+ "name" : "k_cache_1",
74
+ "type" : "State"
75
+ },
76
+ {
77
+ "dataType" : "Float16",
78
+ "isOptional" : "0",
79
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
80
+ "shortDescription" : "",
81
+ "shape" : "[1, 8, 1024, 128]",
82
+ "name" : "v_cache_1",
83
+ "type" : "State"
84
+ },
85
+ {
86
+ "dataType" : "Float16",
87
+ "isOptional" : "0",
88
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
89
+ "shortDescription" : "",
90
+ "shape" : "[1, 8, 1024, 128]",
91
+ "name" : "k_cache_2",
92
+ "type" : "State"
93
+ },
94
+ {
95
+ "dataType" : "Float16",
96
+ "isOptional" : "0",
97
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
98
+ "shortDescription" : "",
99
+ "shape" : "[1, 8, 1024, 128]",
100
+ "name" : "v_cache_2",
101
+ "type" : "State"
102
+ },
103
+ {
104
+ "dataType" : "Float16",
105
+ "isOptional" : "0",
106
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
107
+ "shortDescription" : "",
108
+ "shape" : "[1, 8, 1024, 128]",
109
+ "name" : "k_cache_3",
110
+ "type" : "State"
111
+ },
112
+ {
113
+ "dataType" : "Float16",
114
+ "isOptional" : "0",
115
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
116
+ "shortDescription" : "",
117
+ "shape" : "[1, 8, 1024, 128]",
118
+ "name" : "v_cache_3",
119
+ "type" : "State"
120
+ },
121
+ {
122
+ "dataType" : "Float16",
123
+ "isOptional" : "0",
124
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
125
+ "shortDescription" : "",
126
+ "shape" : "[1, 8, 1024, 128]",
127
+ "name" : "k_cache_4",
128
+ "type" : "State"
129
+ },
130
+ {
131
+ "dataType" : "Float16",
132
+ "isOptional" : "0",
133
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
134
+ "shortDescription" : "",
135
+ "shape" : "[1, 8, 1024, 128]",
136
+ "name" : "v_cache_4",
137
+ "type" : "State"
138
+ },
139
+ {
140
+ "dataType" : "Float16",
141
+ "isOptional" : "0",
142
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
143
+ "shortDescription" : "",
144
+ "shape" : "[1, 8, 1024, 128]",
145
+ "name" : "k_cache_5",
146
+ "type" : "State"
147
+ },
148
+ {
149
+ "dataType" : "Float16",
150
+ "isOptional" : "0",
151
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
152
+ "shortDescription" : "",
153
+ "shape" : "[1, 8, 1024, 128]",
154
+ "name" : "v_cache_5",
155
+ "type" : "State"
156
+ },
157
+ {
158
+ "dataType" : "Float16",
159
+ "isOptional" : "0",
160
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
161
+ "shortDescription" : "",
162
+ "shape" : "[1, 8, 1024, 128]",
163
+ "name" : "k_cache_6",
164
+ "type" : "State"
165
+ },
166
+ {
167
+ "dataType" : "Float16",
168
+ "isOptional" : "0",
169
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
170
+ "shortDescription" : "",
171
+ "shape" : "[1, 8, 1024, 128]",
172
+ "name" : "v_cache_6",
173
+ "type" : "State"
174
+ },
175
+ {
176
+ "dataType" : "Float16",
177
+ "isOptional" : "0",
178
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
179
+ "shortDescription" : "",
180
+ "shape" : "[1, 8, 1024, 128]",
181
+ "name" : "k_cache_7",
182
+ "type" : "State"
183
+ },
184
+ {
185
+ "dataType" : "Float16",
186
+ "isOptional" : "0",
187
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
188
+ "shortDescription" : "",
189
+ "shape" : "[1, 8, 1024, 128]",
190
+ "name" : "v_cache_7",
191
+ "type" : "State"
192
+ },
193
+ {
194
+ "dataType" : "Float16",
195
+ "isOptional" : "0",
196
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
197
+ "shortDescription" : "",
198
+ "shape" : "[1, 8, 1024, 128]",
199
+ "name" : "k_cache_8",
200
+ "type" : "State"
201
+ },
202
+ {
203
+ "dataType" : "Float16",
204
+ "isOptional" : "0",
205
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
206
+ "shortDescription" : "",
207
+ "shape" : "[1, 8, 1024, 128]",
208
+ "name" : "v_cache_8",
209
+ "type" : "State"
210
+ },
211
+ {
212
+ "dataType" : "Float16",
213
+ "isOptional" : "0",
214
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1, 8, 1024, 128]",
217
+ "name" : "k_cache_9",
218
+ "type" : "State"
219
+ },
220
+ {
221
+ "dataType" : "Float16",
222
+ "isOptional" : "0",
223
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
224
+ "shortDescription" : "",
225
+ "shape" : "[1, 8, 1024, 128]",
226
+ "name" : "v_cache_9",
227
+ "type" : "State"
228
+ },
229
+ {
230
+ "dataType" : "Float16",
231
+ "isOptional" : "0",
232
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
233
+ "shortDescription" : "",
234
+ "shape" : "[1, 8, 1024, 128]",
235
+ "name" : "k_cache_10",
236
+ "type" : "State"
237
+ },
238
+ {
239
+ "dataType" : "Float16",
240
+ "isOptional" : "0",
241
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
242
+ "shortDescription" : "",
243
+ "shape" : "[1, 8, 1024, 128]",
244
+ "name" : "v_cache_10",
245
+ "type" : "State"
246
+ },
247
+ {
248
+ "dataType" : "Float16",
249
+ "isOptional" : "0",
250
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
251
+ "shortDescription" : "",
252
+ "shape" : "[1, 8, 1024, 128]",
253
+ "name" : "k_cache_11",
254
+ "type" : "State"
255
+ },
256
+ {
257
+ "dataType" : "Float16",
258
+ "isOptional" : "0",
259
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
260
+ "shortDescription" : "",
261
+ "shape" : "[1, 8, 1024, 128]",
262
+ "name" : "v_cache_11",
263
+ "type" : "State"
264
+ },
265
+ {
266
+ "dataType" : "Float16",
267
+ "isOptional" : "0",
268
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
269
+ "shortDescription" : "",
270
+ "shape" : "[1, 8, 1024, 128]",
271
+ "name" : "k_cache_12",
272
+ "type" : "State"
273
+ },
274
+ {
275
+ "dataType" : "Float16",
276
+ "isOptional" : "0",
277
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
278
+ "shortDescription" : "",
279
+ "shape" : "[1, 8, 1024, 128]",
280
+ "name" : "v_cache_12",
281
+ "type" : "State"
282
+ },
283
+ {
284
+ "dataType" : "Float16",
285
+ "isOptional" : "0",
286
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
287
+ "shortDescription" : "",
288
+ "shape" : "[1, 8, 1024, 128]",
289
+ "name" : "k_cache_13",
290
+ "type" : "State"
291
+ },
292
+ {
293
+ "dataType" : "Float16",
294
+ "isOptional" : "0",
295
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
296
+ "shortDescription" : "",
297
+ "shape" : "[1, 8, 1024, 128]",
298
+ "name" : "v_cache_13",
299
+ "type" : "State"
300
+ },
301
+ {
302
+ "dataType" : "Float16",
303
+ "isOptional" : "0",
304
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
305
+ "shortDescription" : "",
306
+ "shape" : "[1, 8, 1024, 128]",
307
+ "name" : "k_cache_14",
308
+ "type" : "State"
309
+ },
310
+ {
311
+ "dataType" : "Float16",
312
+ "isOptional" : "0",
313
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
314
+ "shortDescription" : "",
315
+ "shape" : "[1, 8, 1024, 128]",
316
+ "name" : "v_cache_14",
317
+ "type" : "State"
318
+ },
319
+ {
320
+ "dataType" : "Float16",
321
+ "isOptional" : "0",
322
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
323
+ "shortDescription" : "",
324
+ "shape" : "[1, 8, 1024, 128]",
325
+ "name" : "k_cache_15",
326
+ "type" : "State"
327
+ },
328
+ {
329
+ "dataType" : "Float16",
330
+ "isOptional" : "0",
331
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
332
+ "shortDescription" : "",
333
+ "shape" : "[1, 8, 1024, 128]",
334
+ "name" : "v_cache_15",
335
+ "type" : "State"
336
+ },
337
+ {
338
+ "dataType" : "Float16",
339
+ "isOptional" : "0",
340
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
341
+ "shortDescription" : "",
342
+ "shape" : "[1, 8, 1024, 128]",
343
+ "name" : "k_cache_16",
344
+ "type" : "State"
345
+ },
346
+ {
347
+ "dataType" : "Float16",
348
+ "isOptional" : "0",
349
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
350
+ "shortDescription" : "",
351
+ "shape" : "[1, 8, 1024, 128]",
352
+ "name" : "v_cache_16",
353
+ "type" : "State"
354
+ },
355
+ {
356
+ "dataType" : "Float16",
357
+ "isOptional" : "0",
358
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
359
+ "shortDescription" : "",
360
+ "shape" : "[1, 8, 1024, 128]",
361
+ "name" : "k_cache_17",
362
+ "type" : "State"
363
+ },
364
+ {
365
+ "dataType" : "Float16",
366
+ "isOptional" : "0",
367
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
368
+ "shortDescription" : "",
369
+ "shape" : "[1, 8, 1024, 128]",
370
+ "name" : "v_cache_17",
371
+ "type" : "State"
372
+ },
373
+ {
374
+ "dataType" : "Float16",
375
+ "isOptional" : "0",
376
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
377
+ "shortDescription" : "",
378
+ "shape" : "[1, 8, 1024, 128]",
379
+ "name" : "k_cache_18",
380
+ "type" : "State"
381
+ },
382
+ {
383
+ "dataType" : "Float16",
384
+ "isOptional" : "0",
385
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
386
+ "shortDescription" : "",
387
+ "shape" : "[1, 8, 1024, 128]",
388
+ "name" : "v_cache_18",
389
+ "type" : "State"
390
+ },
391
+ {
392
+ "dataType" : "Float16",
393
+ "isOptional" : "0",
394
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
395
+ "shortDescription" : "",
396
+ "shape" : "[1, 8, 1024, 128]",
397
+ "name" : "k_cache_19",
398
+ "type" : "State"
399
+ },
400
+ {
401
+ "dataType" : "Float16",
402
+ "isOptional" : "0",
403
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
404
+ "shortDescription" : "",
405
+ "shape" : "[1, 8, 1024, 128]",
406
+ "name" : "v_cache_19",
407
+ "type" : "State"
408
+ },
409
+ {
410
+ "dataType" : "Float16",
411
+ "isOptional" : "0",
412
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
413
+ "shortDescription" : "",
414
+ "shape" : "[1, 8, 1024, 128]",
415
+ "name" : "k_cache_20",
416
+ "type" : "State"
417
+ },
418
+ {
419
+ "dataType" : "Float16",
420
+ "isOptional" : "0",
421
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
422
+ "shortDescription" : "",
423
+ "shape" : "[1, 8, 1024, 128]",
424
+ "name" : "v_cache_20",
425
+ "type" : "State"
426
+ },
427
+ {
428
+ "dataType" : "Float16",
429
+ "isOptional" : "0",
430
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
431
+ "shortDescription" : "",
432
+ "shape" : "[1, 8, 1024, 128]",
433
+ "name" : "k_cache_21",
434
+ "type" : "State"
435
+ },
436
+ {
437
+ "dataType" : "Float16",
438
+ "isOptional" : "0",
439
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
440
+ "shortDescription" : "",
441
+ "shape" : "[1, 8, 1024, 128]",
442
+ "name" : "v_cache_21",
443
+ "type" : "State"
444
+ },
445
+ {
446
+ "dataType" : "Float16",
447
+ "isOptional" : "0",
448
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
449
+ "shortDescription" : "",
450
+ "shape" : "[1, 8, 1024, 128]",
451
+ "name" : "k_cache_22",
452
+ "type" : "State"
453
+ },
454
+ {
455
+ "dataType" : "Float16",
456
+ "isOptional" : "0",
457
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
458
+ "shortDescription" : "",
459
+ "shape" : "[1, 8, 1024, 128]",
460
+ "name" : "v_cache_22",
461
+ "type" : "State"
462
+ },
463
+ {
464
+ "dataType" : "Float16",
465
+ "isOptional" : "0",
466
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
467
+ "shortDescription" : "",
468
+ "shape" : "[1, 8, 1024, 128]",
469
+ "name" : "k_cache_23",
470
+ "type" : "State"
471
+ },
472
+ {
473
+ "dataType" : "Float16",
474
+ "isOptional" : "0",
475
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
476
+ "shortDescription" : "",
477
+ "shape" : "[1, 8, 1024, 128]",
478
+ "name" : "v_cache_23",
479
+ "type" : "State"
480
+ },
481
+ {
482
+ "dataType" : "Float16",
483
+ "isOptional" : "0",
484
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
485
+ "shortDescription" : "",
486
+ "shape" : "[1, 8, 1024, 128]",
487
+ "name" : "k_cache_24",
488
+ "type" : "State"
489
+ },
490
+ {
491
+ "dataType" : "Float16",
492
+ "isOptional" : "0",
493
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
494
+ "shortDescription" : "",
495
+ "shape" : "[1, 8, 1024, 128]",
496
+ "name" : "v_cache_24",
497
+ "type" : "State"
498
+ },
499
+ {
500
+ "dataType" : "Float16",
501
+ "isOptional" : "0",
502
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
503
+ "shortDescription" : "",
504
+ "shape" : "[1, 8, 1024, 128]",
505
+ "name" : "k_cache_25",
506
+ "type" : "State"
507
+ },
508
+ {
509
+ "dataType" : "Float16",
510
+ "isOptional" : "0",
511
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
512
+ "shortDescription" : "",
513
+ "shape" : "[1, 8, 1024, 128]",
514
+ "name" : "v_cache_25",
515
+ "type" : "State"
516
+ },
517
+ {
518
+ "dataType" : "Float16",
519
+ "isOptional" : "0",
520
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
521
+ "shortDescription" : "",
522
+ "shape" : "[1, 8, 1024, 128]",
523
+ "name" : "k_cache_26",
524
+ "type" : "State"
525
+ },
526
+ {
527
+ "dataType" : "Float16",
528
+ "isOptional" : "0",
529
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
530
+ "shortDescription" : "",
531
+ "shape" : "[1, 8, 1024, 128]",
532
+ "name" : "v_cache_26",
533
+ "type" : "State"
534
+ },
535
+ {
536
+ "dataType" : "Float16",
537
+ "isOptional" : "0",
538
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
539
+ "shortDescription" : "",
540
+ "shape" : "[1, 8, 1024, 128]",
541
+ "name" : "k_cache_27",
542
+ "type" : "State"
543
+ },
544
+ {
545
+ "dataType" : "Float16",
546
+ "isOptional" : "0",
547
+ "formattedType" : "State (Float16 1 × 8 × 1024 × 128)",
548
+ "shortDescription" : "",
549
+ "shape" : "[1, 8, 1024, 128]",
550
+ "name" : "v_cache_27",
551
+ "type" : "State"
552
+ }
553
+ ],
554
+ "availability" : {
555
+ "macOS" : "15.0",
556
+ "tvOS" : "18.0",
557
+ "visionOS" : "2.0",
558
+ "watchOS" : "11.0",
559
+ "iOS" : "18.0",
560
+ "macCatalyst" : "18.0"
561
+ },
562
+ "modelType" : {
563
+ "name" : "MLModelType_mlProgram"
564
+ },
565
+ "userDefinedMetadata" : {
566
+ "com.github.apple.coremltools.conversion_date" : "2026-03-08",
567
+ "com.github.apple.coremltools.source" : "torch==2.10.0",
568
+ "com.github.apple.coremltools.version" : "9.0",
569
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
570
+ },
571
+ "inputSchema" : [
572
+ {
573
+ "hasShapeFlexibility" : "0",
574
+ "isOptional" : "0",
575
+ "dataType" : "Float32",
576
+ "formattedType" : "MultiArray (Float32 1 × 1 × 1024)",
577
+ "shortDescription" : "",
578
+ "shape" : "[1, 1, 1024]",
579
+ "name" : "input_embeds",
580
+ "type" : "MultiArray"
581
+ },
582
+ {
583
+ "hasShapeFlexibility" : "0",
584
+ "isOptional" : "0",
585
+ "dataType" : "Int32",
586
+ "formattedType" : "MultiArray (Int32 1)",
587
+ "shortDescription" : "",
588
+ "shape" : "[1]",
589
+ "name" : "position",
590
+ "type" : "MultiArray"
591
+ },
592
+ {
593
+ "hasShapeFlexibility" : "0",
594
+ "isOptional" : "0",
595
+ "dataType" : "Float32",
596
+ "formattedType" : "MultiArray (Float32 1 × 1 × 1 × 1024)",
597
+ "shortDescription" : "",
598
+ "shape" : "[1, 1, 1, 1024]",
599
+ "name" : "attention_mask",
600
+ "type" : "MultiArray"
601
+ }
602
+ ],
603
+ "generatedClassName" : "decoder",
604
+ "method" : "predict"
605
+ }
606
+ ]
decoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
decoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81068a122be724803230e522dce1a9fabc5e8b93e66e8e243b4ac9c94bc70d26
3
+ size 596565440
embedding.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df63b0390d381b15f40865a7b583cd84f830bd5a7a0489a3b5036240144adc8
3
+ size 243
embedding.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fba05c97b9575dfb6cfa880dd0ac9a97ed25fbcda2559c012ffed9b02ab041d
3
+ size 377
embedding.mlmodelc/metadata.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (8 bits), UInt8)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1024)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1024]",
13
+ "name" : "embedding",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 9,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios18.greaterEqual" : 2,
23
+ "Ios18.constexprLutToDense" : 1,
24
+ "Ios18.add" : 2,
25
+ "Select" : 2,
26
+ "Ios18.gather" : 1
27
+ },
28
+ "computePrecision" : "Mixed (Float16, Int32)",
29
+ "isUpdatable" : "0",
30
+ "stateSchema" : [
31
+
32
+ ],
33
+ "availability" : {
34
+ "macOS" : "15.0",
35
+ "tvOS" : "18.0",
36
+ "visionOS" : "2.0",
37
+ "watchOS" : "11.0",
38
+ "iOS" : "18.0",
39
+ "macCatalyst" : "18.0"
40
+ },
41
+ "modelType" : {
42
+ "name" : "MLModelType_mlProgram"
43
+ },
44
+ "userDefinedMetadata" : {
45
+ "com.github.apple.coremltools.conversion_date" : "2026-03-08",
46
+ "com.github.apple.coremltools.source" : "torch==2.10.0",
47
+ "com.github.apple.coremltools.version" : "9.0",
48
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
49
+ },
50
+ "inputSchema" : [
51
+ {
52
+ "hasShapeFlexibility" : "0",
53
+ "isOptional" : "0",
54
+ "dataType" : "Int32",
55
+ "formattedType" : "MultiArray (Int32 1 × 1)",
56
+ "shortDescription" : "",
57
+ "shape" : "[1, 1]",
58
+ "name" : "token_id",
59
+ "type" : "MultiArray"
60
+ }
61
+ ],
62
+ "generatedClassName" : "embedding",
63
+ "method" : "predict"
64
+ }
65
+ ]
embedding.mlmodelc/model.mil ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})]
3
+ {
4
+ func main<ios18>(tensor<int32, [1, 1]> token_id) {
5
+ int32 var_6_batch_dims_0 = const()[name = string("op_6_batch_dims_0"), val = int32(0)];
6
+ bool var_6_validate_indices_0 = const()[name = string("op_6_validate_indices_0"), val = bool(false)];
7
+ tensor<fp16, [151936, 1024]> embedding_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [151936, 1024]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155582592))))[name = string("embedding_weight_to_fp16_palettized")];
8
+ int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
9
+ tensor<bool, [1, 1]> greater_equal_0 = greater_equal(x = token_id, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
10
+ int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(151936)];
11
+ tensor<int32, [1, 1]> add_0 = add(x = token_id, y = slice_by_index_0)[name = string("add_0")];
12
+ tensor<int32, [1, 1]> select_0 = select(a = token_id, b = add_0, cond = greater_equal_0)[name = string("select_0")];
13
+ int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)];
14
+ tensor<bool, [1, 1]> greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")];
15
+ int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(151936)];
16
+ tensor<int32, [1, 1]> add_0_1 = add(x = select_0, y = slice_by_index_0_1)[name = string("add_0_1")];
17
+ tensor<int32, [1, 1]> select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")];
18
+ int32 op_6_cast_fp16_axis_0 = const()[name = string("op_6_cast_fp16_axis_0"), val = int32(0)];
19
+ tensor<fp16, [1, 1, 1024]> embedding = gather(axis = op_6_cast_fp16_axis_0, batch_dims = var_6_batch_dims_0, indices = select_0_1, validate_indices = var_6_validate_indices_0, x = embedding_weight_to_fp16_palettized)[name = string("op_6_cast_fp16")];
20
+ } -> (embedding);
21
+ }
embedding.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3c98211cd573376f8a2f3a41bab50581fb66d9657e407938db52d288f7df5e7
3
+ size 155583168