mlboydaisuke commited on
Commit
a99990e
·
verified ·
1 Parent(s): 5c83027

Initial upload: 3-chunk merged stateful Linear (Stage 3)

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ hf_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
chunk_1.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d96a0f6b2d6273ed0cfc277a52481c869b7f169b1df15b37c8974bb7f086976e
3
+ size 243
chunk_1.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07bc110005a3c9437dafced7453d31c8e1ad53ddc95024de7f4362ede80c132a
3
+ size 1009
chunk_1.mlmodelc/metadata.json ADDED
@@ -0,0 +1,554 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (12 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1536]",
13
+ "name" : "hidden_states_out",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 1, 8960]",
23
+ "name" : "per_layer_combined_out",
24
+ "type" : "MultiArray"
25
+ }
26
+ ],
27
+ "modelParameters" : [
28
+
29
+ ],
30
+ "specificationVersion" : 9,
31
+ "functions" : [
32
+ {
33
+ "inputSchema" : [
34
+ {
35
+ "hasShapeFlexibility" : "0",
36
+ "isOptional" : "0",
37
+ "dataType" : "Float16",
38
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
39
+ "shortDescription" : "",
40
+ "shape" : "[1, 1, 1536]",
41
+ "name" : "hidden_states",
42
+ "type" : "MultiArray"
43
+ },
44
+ {
45
+ "hasShapeFlexibility" : "0",
46
+ "isOptional" : "0",
47
+ "dataType" : "Float16",
48
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
49
+ "shortDescription" : "",
50
+ "shape" : "[1, 1, 1, 2048]",
51
+ "name" : "causal_mask_full",
52
+ "type" : "MultiArray"
53
+ },
54
+ {
55
+ "hasShapeFlexibility" : "0",
56
+ "isOptional" : "0",
57
+ "dataType" : "Float16",
58
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
59
+ "shortDescription" : "",
60
+ "shape" : "[1, 1, 1, 512]",
61
+ "name" : "causal_mask_sliding",
62
+ "type" : "MultiArray"
63
+ },
64
+ {
65
+ "hasShapeFlexibility" : "0",
66
+ "isOptional" : "0",
67
+ "dataType" : "Float16",
68
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
69
+ "shortDescription" : "",
70
+ "shape" : "[1, 1, 8960]",
71
+ "name" : "per_layer_raw",
72
+ "type" : "MultiArray"
73
+ },
74
+ {
75
+ "hasShapeFlexibility" : "0",
76
+ "isOptional" : "0",
77
+ "dataType" : "Float16",
78
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
79
+ "shortDescription" : "",
80
+ "shape" : "[1, 1, 1, 256]",
81
+ "name" : "cos_s",
82
+ "type" : "MultiArray"
83
+ },
84
+ {
85
+ "hasShapeFlexibility" : "0",
86
+ "isOptional" : "0",
87
+ "dataType" : "Float16",
88
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
89
+ "shortDescription" : "",
90
+ "shape" : "[1, 1, 1, 256]",
91
+ "name" : "sin_s",
92
+ "type" : "MultiArray"
93
+ },
94
+ {
95
+ "hasShapeFlexibility" : "0",
96
+ "isOptional" : "0",
97
+ "dataType" : "Float16",
98
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
99
+ "shortDescription" : "",
100
+ "shape" : "[1, 1, 1, 512]",
101
+ "name" : "cos_f",
102
+ "type" : "MultiArray"
103
+ },
104
+ {
105
+ "hasShapeFlexibility" : "0",
106
+ "isOptional" : "0",
107
+ "dataType" : "Float16",
108
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
109
+ "shortDescription" : "",
110
+ "shape" : "[1, 1, 1, 512]",
111
+ "name" : "sin_f",
112
+ "type" : "MultiArray"
113
+ },
114
+ {
115
+ "hasShapeFlexibility" : "0",
116
+ "isOptional" : "0",
117
+ "dataType" : "Int32",
118
+ "formattedType" : "MultiArray (Int32 1)",
119
+ "shortDescription" : "",
120
+ "shape" : "[1]",
121
+ "name" : "current_pos",
122
+ "type" : "MultiArray"
123
+ },
124
+ {
125
+ "hasShapeFlexibility" : "0",
126
+ "isOptional" : "0",
127
+ "dataType" : "Int32",
128
+ "formattedType" : "MultiArray (Int32 1)",
129
+ "shortDescription" : "",
130
+ "shape" : "[1]",
131
+ "name" : "ring_pos",
132
+ "type" : "MultiArray"
133
+ }
134
+ ],
135
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
136
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (12 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), UInt4)",
137
+ "stateSchema" : [
138
+ {
139
+ "dataType" : "Float16",
140
+ "isOptional" : "0",
141
+ "formattedType" : "State (Float16 14 × 1 × 512 × 512)",
142
+ "shortDescription" : "",
143
+ "shape" : "[14, 1, 512, 512]",
144
+ "name" : "kv_cache_sliding",
145
+ "type" : "State"
146
+ },
147
+ {
148
+ "dataType" : "Float16",
149
+ "isOptional" : "0",
150
+ "formattedType" : "State (Float16 2 × 1 × 2048 × 512)",
151
+ "shortDescription" : "",
152
+ "shape" : "[2, 1, 2048, 512]",
153
+ "name" : "kv_cache_full",
154
+ "type" : "State"
155
+ }
156
+ ],
157
+ "outputSchema" : [
158
+ {
159
+ "hasShapeFlexibility" : "0",
160
+ "isOptional" : "0",
161
+ "dataType" : "Float16",
162
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
163
+ "shortDescription" : "",
164
+ "shape" : "[1, 1, 1536]",
165
+ "name" : "hidden_states_out",
166
+ "type" : "MultiArray"
167
+ },
168
+ {
169
+ "hasShapeFlexibility" : "0",
170
+ "isOptional" : "0",
171
+ "dataType" : "Float16",
172
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
173
+ "shortDescription" : "",
174
+ "shape" : "[1, 1, 8960]",
175
+ "name" : "per_layer_combined_out",
176
+ "type" : "MultiArray"
177
+ }
178
+ ],
179
+ "name" : "infer",
180
+ "mlProgramOperationTypeHistogram" : {
181
+ "Ios18.mul" : 196,
182
+ "Ios18.matmul" : 16,
183
+ "Ios18.rsqrt" : 8,
184
+ "Ios16.reduceMean" : 8,
185
+ "Ios18.exp" : 8,
186
+ "Ios18.realDiv" : 8,
187
+ "Split" : 73,
188
+ "Ios18.readState" : 18,
189
+ "Ios16.reduceMax" : 8,
190
+ "Ios18.add" : 59,
191
+ "Tile" : 16,
192
+ "Ios18.layerNorm" : 57,
193
+ "Ios18.sliceUpdate" : 16,
194
+ "Ios18.writeState" : 16,
195
+ "Pad" : 14,
196
+ "Ios18.reshape" : 98,
197
+ "Ios16.reduceSum" : 8,
198
+ "Ios18.constexprLutToDense" : 73,
199
+ "Ios18.linear" : 73,
200
+ "Ios18.concat" : 105,
201
+ "Ios18.transpose" : 80,
202
+ "Ios18.sub" : 8,
203
+ "Ios18.pow" : 8,
204
+ "Ios18.gelu" : 16,
205
+ "Ios18.sliceByIndex" : 38
206
+ }
207
+ },
208
+ {
209
+ "inputSchema" : [
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float16",
214
+ "formattedType" : "MultiArray (Float16 1 × 8 × 1536)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1, 8, 1536]",
217
+ "name" : "hidden_states",
218
+ "type" : "MultiArray"
219
+ },
220
+ {
221
+ "hasShapeFlexibility" : "0",
222
+ "isOptional" : "0",
223
+ "dataType" : "Float16",
224
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 2048)",
225
+ "shortDescription" : "",
226
+ "shape" : "[1, 1, 8, 2048]",
227
+ "name" : "causal_mask_full",
228
+ "type" : "MultiArray"
229
+ },
230
+ {
231
+ "hasShapeFlexibility" : "0",
232
+ "isOptional" : "0",
233
+ "dataType" : "Float16",
234
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 512)",
235
+ "shortDescription" : "",
236
+ "shape" : "[1, 1, 8, 512]",
237
+ "name" : "causal_mask_sliding",
238
+ "type" : "MultiArray"
239
+ },
240
+ {
241
+ "hasShapeFlexibility" : "0",
242
+ "isOptional" : "0",
243
+ "dataType" : "Float16",
244
+ "formattedType" : "MultiArray (Float16 1 × 8 × 8960)",
245
+ "shortDescription" : "",
246
+ "shape" : "[1, 8, 8960]",
247
+ "name" : "per_layer_raw",
248
+ "type" : "MultiArray"
249
+ },
250
+ {
251
+ "hasShapeFlexibility" : "0",
252
+ "isOptional" : "0",
253
+ "dataType" : "Float16",
254
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 256)",
255
+ "shortDescription" : "",
256
+ "shape" : "[1, 1, 8, 256]",
257
+ "name" : "cos_s",
258
+ "type" : "MultiArray"
259
+ },
260
+ {
261
+ "hasShapeFlexibility" : "0",
262
+ "isOptional" : "0",
263
+ "dataType" : "Float16",
264
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 256)",
265
+ "shortDescription" : "",
266
+ "shape" : "[1, 1, 8, 256]",
267
+ "name" : "sin_s",
268
+ "type" : "MultiArray"
269
+ },
270
+ {
271
+ "hasShapeFlexibility" : "0",
272
+ "isOptional" : "0",
273
+ "dataType" : "Float16",
274
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 512)",
275
+ "shortDescription" : "",
276
+ "shape" : "[1, 1, 8, 512]",
277
+ "name" : "cos_f",
278
+ "type" : "MultiArray"
279
+ },
280
+ {
281
+ "hasShapeFlexibility" : "0",
282
+ "isOptional" : "0",
283
+ "dataType" : "Float16",
284
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 512)",
285
+ "shortDescription" : "",
286
+ "shape" : "[1, 1, 8, 512]",
287
+ "name" : "sin_f",
288
+ "type" : "MultiArray"
289
+ },
290
+ {
291
+ "hasShapeFlexibility" : "0",
292
+ "isOptional" : "0",
293
+ "dataType" : "Int32",
294
+ "formattedType" : "MultiArray (Int32 1)",
295
+ "shortDescription" : "",
296
+ "shape" : "[1]",
297
+ "name" : "current_pos",
298
+ "type" : "MultiArray"
299
+ },
300
+ {
301
+ "hasShapeFlexibility" : "0",
302
+ "isOptional" : "0",
303
+ "dataType" : "Int32",
304
+ "formattedType" : "MultiArray (Int32 1)",
305
+ "shortDescription" : "",
306
+ "shape" : "[1]",
307
+ "name" : "ring_pos",
308
+ "type" : "MultiArray"
309
+ }
310
+ ],
311
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
312
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (12 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), UInt4)",
313
+ "stateSchema" : [
314
+ {
315
+ "dataType" : "Float16",
316
+ "isOptional" : "0",
317
+ "formattedType" : "State (Float16 14 × 1 × 512 × 512)",
318
+ "shortDescription" : "",
319
+ "shape" : "[14, 1, 512, 512]",
320
+ "name" : "kv_cache_sliding",
321
+ "type" : "State"
322
+ },
323
+ {
324
+ "dataType" : "Float16",
325
+ "isOptional" : "0",
326
+ "formattedType" : "State (Float16 2 × 1 × 2048 × 512)",
327
+ "shortDescription" : "",
328
+ "shape" : "[2, 1, 2048, 512]",
329
+ "name" : "kv_cache_full",
330
+ "type" : "State"
331
+ }
332
+ ],
333
+ "outputSchema" : [
334
+ {
335
+ "hasShapeFlexibility" : "0",
336
+ "isOptional" : "0",
337
+ "dataType" : "Float16",
338
+ "formattedType" : "MultiArray (Float16 1 × 8 × 1536)",
339
+ "shortDescription" : "",
340
+ "shape" : "[1, 8, 1536]",
341
+ "name" : "hidden_states_out",
342
+ "type" : "MultiArray"
343
+ },
344
+ {
345
+ "hasShapeFlexibility" : "0",
346
+ "isOptional" : "0",
347
+ "dataType" : "Float16",
348
+ "formattedType" : "MultiArray (Float16 1 × 8 × 8960)",
349
+ "shortDescription" : "",
350
+ "shape" : "[1, 8, 8960]",
351
+ "name" : "per_layer_combined_out",
352
+ "type" : "MultiArray"
353
+ }
354
+ ],
355
+ "name" : "prefill_b8",
356
+ "mlProgramOperationTypeHistogram" : {
357
+ "Ios18.mul" : 196,
358
+ "Ios18.matmul" : 16,
359
+ "Ios18.rsqrt" : 8,
360
+ "Ios16.reduceMean" : 8,
361
+ "Ios18.exp" : 8,
362
+ "Ios18.realDiv" : 8,
363
+ "Split" : 73,
364
+ "Ios18.readState" : 18,
365
+ "Ios16.reduceMax" : 8,
366
+ "Ios18.add" : 59,
367
+ "Tile" : 16,
368
+ "Ios18.layerNorm" : 57,
369
+ "Ios18.sliceUpdate" : 16,
370
+ "Ios18.writeState" : 16,
371
+ "Pad" : 14,
372
+ "Ios18.reshape" : 66,
373
+ "Ios16.reduceSum" : 8,
374
+ "Ios18.constexprLutToDense" : 73,
375
+ "Ios18.linear" : 73,
376
+ "Ios18.concat" : 105,
377
+ "Ios18.transpose" : 80,
378
+ "Ios18.sub" : 8,
379
+ "Ios18.pow" : 8,
380
+ "Ios18.gelu" : 16,
381
+ "Ios18.sliceByIndex" : 38
382
+ }
383
+ }
384
+ ],
385
+ "mlProgramOperationTypeHistogram" : {
386
+ "Ios18.mul" : 196,
387
+ "Ios18.matmul" : 16,
388
+ "Ios18.rsqrt" : 8,
389
+ "Ios16.reduceMean" : 8,
390
+ "Ios18.exp" : 8,
391
+ "Ios18.realDiv" : 8,
392
+ "Split" : 73,
393
+ "Ios18.readState" : 18,
394
+ "Ios16.reduceMax" : 8,
395
+ "Ios18.add" : 59,
396
+ "Tile" : 16,
397
+ "Ios18.layerNorm" : 57,
398
+ "Ios18.sliceUpdate" : 16,
399
+ "Ios18.writeState" : 16,
400
+ "Pad" : 14,
401
+ "Ios18.reshape" : 98,
402
+ "Ios16.reduceSum" : 8,
403
+ "Ios18.constexprLutToDense" : 73,
404
+ "Ios18.linear" : 73,
405
+ "Ios18.concat" : 105,
406
+ "Ios18.transpose" : 80,
407
+ "Ios18.sub" : 8,
408
+ "Ios18.pow" : 8,
409
+ "Ios18.gelu" : 16,
410
+ "Ios18.sliceByIndex" : 38
411
+ },
412
+ "isUpdatable" : "0",
413
+ "stateSchema" : [
414
+ {
415
+ "dataType" : "Float16",
416
+ "isOptional" : "0",
417
+ "formattedType" : "State (Float16 14 × 1 × 512 × 512)",
418
+ "shortDescription" : "",
419
+ "shape" : "[14, 1, 512, 512]",
420
+ "name" : "kv_cache_sliding",
421
+ "type" : "State"
422
+ },
423
+ {
424
+ "dataType" : "Float16",
425
+ "isOptional" : "0",
426
+ "formattedType" : "State (Float16 2 × 1 × 2048 × 512)",
427
+ "shortDescription" : "",
428
+ "shape" : "[2, 1, 2048, 512]",
429
+ "name" : "kv_cache_full",
430
+ "type" : "State"
431
+ }
432
+ ],
433
+ "availability" : {
434
+ "macOS" : "15.0",
435
+ "tvOS" : "18.0",
436
+ "visionOS" : "2.0",
437
+ "watchOS" : "11.0",
438
+ "iOS" : "18.0",
439
+ "macCatalyst" : "18.0"
440
+ },
441
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
442
+ "modelType" : {
443
+ "name" : "MLModelType_mlProgram"
444
+ },
445
+ "inputSchema" : [
446
+ {
447
+ "hasShapeFlexibility" : "0",
448
+ "isOptional" : "0",
449
+ "dataType" : "Float16",
450
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
451
+ "shortDescription" : "",
452
+ "shape" : "[1, 1, 1536]",
453
+ "name" : "hidden_states",
454
+ "type" : "MultiArray"
455
+ },
456
+ {
457
+ "hasShapeFlexibility" : "0",
458
+ "isOptional" : "0",
459
+ "dataType" : "Float16",
460
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
461
+ "shortDescription" : "",
462
+ "shape" : "[1, 1, 1, 2048]",
463
+ "name" : "causal_mask_full",
464
+ "type" : "MultiArray"
465
+ },
466
+ {
467
+ "hasShapeFlexibility" : "0",
468
+ "isOptional" : "0",
469
+ "dataType" : "Float16",
470
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
471
+ "shortDescription" : "",
472
+ "shape" : "[1, 1, 1, 512]",
473
+ "name" : "causal_mask_sliding",
474
+ "type" : "MultiArray"
475
+ },
476
+ {
477
+ "hasShapeFlexibility" : "0",
478
+ "isOptional" : "0",
479
+ "dataType" : "Float16",
480
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
481
+ "shortDescription" : "",
482
+ "shape" : "[1, 1, 8960]",
483
+ "name" : "per_layer_raw",
484
+ "type" : "MultiArray"
485
+ },
486
+ {
487
+ "hasShapeFlexibility" : "0",
488
+ "isOptional" : "0",
489
+ "dataType" : "Float16",
490
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
491
+ "shortDescription" : "",
492
+ "shape" : "[1, 1, 1, 256]",
493
+ "name" : "cos_s",
494
+ "type" : "MultiArray"
495
+ },
496
+ {
497
+ "hasShapeFlexibility" : "0",
498
+ "isOptional" : "0",
499
+ "dataType" : "Float16",
500
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
501
+ "shortDescription" : "",
502
+ "shape" : "[1, 1, 1, 256]",
503
+ "name" : "sin_s",
504
+ "type" : "MultiArray"
505
+ },
506
+ {
507
+ "hasShapeFlexibility" : "0",
508
+ "isOptional" : "0",
509
+ "dataType" : "Float16",
510
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
511
+ "shortDescription" : "",
512
+ "shape" : "[1, 1, 1, 512]",
513
+ "name" : "cos_f",
514
+ "type" : "MultiArray"
515
+ },
516
+ {
517
+ "hasShapeFlexibility" : "0",
518
+ "isOptional" : "0",
519
+ "dataType" : "Float16",
520
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
521
+ "shortDescription" : "",
522
+ "shape" : "[1, 1, 1, 512]",
523
+ "name" : "sin_f",
524
+ "type" : "MultiArray"
525
+ },
526
+ {
527
+ "hasShapeFlexibility" : "0",
528
+ "isOptional" : "0",
529
+ "dataType" : "Int32",
530
+ "formattedType" : "MultiArray (Int32 1)",
531
+ "shortDescription" : "",
532
+ "shape" : "[1]",
533
+ "name" : "current_pos",
534
+ "type" : "MultiArray"
535
+ },
536
+ {
537
+ "hasShapeFlexibility" : "0",
538
+ "isOptional" : "0",
539
+ "dataType" : "Int32",
540
+ "formattedType" : "MultiArray (Int32 1)",
541
+ "shortDescription" : "",
542
+ "shape" : "[1]",
543
+ "name" : "ring_pos",
544
+ "type" : "MultiArray"
545
+ }
546
+ ],
547
+ "defaultFunctionName" : "infer",
548
+ "generatedClassName" : "chunk_1",
549
+ "userDefinedMetadata" : {
550
+
551
+ },
552
+ "method" : "predict"
553
+ }
554
+ ]
chunk_1.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk_1.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dbebbe38ca34dad177f3e9123e1dfb89b0d5b6fac49b3b52868f7c50b08f0cd
3
+ size 155484416
chunk_2.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34163deded8f46eb55c7c05b11036257c91d5262ae348dcc8fb699869261e674
3
+ size 243
chunk_2.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80547a545eb58019882825674410c3084221805dc0b4c5f275deb3e32325e0ed
3
+ size 1147
chunk_2.mlmodelc/metadata.json ADDED
@@ -0,0 +1,644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (12 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1536]",
13
+ "name" : "hidden_states_out",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 1, 512, 256]",
23
+ "name" : "kv13_k",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 1, 512, 256]",
33
+ "name" : "kv13_v",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
41
+ "shortDescription" : "",
42
+ "shape" : "[1, 1, 2048, 512]",
43
+ "name" : "kv14_k",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
51
+ "shortDescription" : "",
52
+ "shape" : "[1, 1, 2048, 512]",
53
+ "name" : "kv14_v",
54
+ "type" : "MultiArray"
55
+ }
56
+ ],
57
+ "modelParameters" : [
58
+
59
+ ],
60
+ "specificationVersion" : 9,
61
+ "functions" : [
62
+ {
63
+ "inputSchema" : [
64
+ {
65
+ "hasShapeFlexibility" : "0",
66
+ "isOptional" : "0",
67
+ "dataType" : "Float16",
68
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
69
+ "shortDescription" : "",
70
+ "shape" : "[1, 1, 1536]",
71
+ "name" : "hidden_states",
72
+ "type" : "MultiArray"
73
+ },
74
+ {
75
+ "hasShapeFlexibility" : "0",
76
+ "isOptional" : "0",
77
+ "dataType" : "Float16",
78
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
79
+ "shortDescription" : "",
80
+ "shape" : "[1, 1, 1, 2048]",
81
+ "name" : "causal_mask_full",
82
+ "type" : "MultiArray"
83
+ },
84
+ {
85
+ "hasShapeFlexibility" : "0",
86
+ "isOptional" : "0",
87
+ "dataType" : "Float16",
88
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
89
+ "shortDescription" : "",
90
+ "shape" : "[1, 1, 1, 512]",
91
+ "name" : "causal_mask_sliding",
92
+ "type" : "MultiArray"
93
+ },
94
+ {
95
+ "hasShapeFlexibility" : "0",
96
+ "isOptional" : "0",
97
+ "dataType" : "Float16",
98
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
99
+ "shortDescription" : "",
100
+ "shape" : "[1, 1, 8960]",
101
+ "name" : "per_layer_combined",
102
+ "type" : "MultiArray"
103
+ },
104
+ {
105
+ "hasShapeFlexibility" : "0",
106
+ "isOptional" : "0",
107
+ "dataType" : "Float16",
108
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
109
+ "shortDescription" : "",
110
+ "shape" : "[1, 1, 1, 256]",
111
+ "name" : "cos_s",
112
+ "type" : "MultiArray"
113
+ },
114
+ {
115
+ "hasShapeFlexibility" : "0",
116
+ "isOptional" : "0",
117
+ "dataType" : "Float16",
118
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
119
+ "shortDescription" : "",
120
+ "shape" : "[1, 1, 1, 256]",
121
+ "name" : "sin_s",
122
+ "type" : "MultiArray"
123
+ },
124
+ {
125
+ "hasShapeFlexibility" : "0",
126
+ "isOptional" : "0",
127
+ "dataType" : "Float16",
128
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
129
+ "shortDescription" : "",
130
+ "shape" : "[1, 1, 1, 512]",
131
+ "name" : "cos_f",
132
+ "type" : "MultiArray"
133
+ },
134
+ {
135
+ "hasShapeFlexibility" : "0",
136
+ "isOptional" : "0",
137
+ "dataType" : "Float16",
138
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
139
+ "shortDescription" : "",
140
+ "shape" : "[1, 1, 1, 512]",
141
+ "name" : "sin_f",
142
+ "type" : "MultiArray"
143
+ },
144
+ {
145
+ "hasShapeFlexibility" : "0",
146
+ "isOptional" : "0",
147
+ "dataType" : "Int32",
148
+ "formattedType" : "MultiArray (Int32 1)",
149
+ "shortDescription" : "",
150
+ "shape" : "[1]",
151
+ "name" : "current_pos",
152
+ "type" : "MultiArray"
153
+ },
154
+ {
155
+ "hasShapeFlexibility" : "0",
156
+ "isOptional" : "0",
157
+ "dataType" : "Int32",
158
+ "formattedType" : "MultiArray (Int32 1)",
159
+ "shortDescription" : "",
160
+ "shape" : "[1]",
161
+ "name" : "ring_pos",
162
+ "type" : "MultiArray"
163
+ }
164
+ ],
165
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
166
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (12 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), UInt4)",
167
+ "stateSchema" : [
168
+ {
169
+ "dataType" : "Float16",
170
+ "isOptional" : "0",
171
+ "formattedType" : "State (Float16 10 × 1 × 512 × 512)",
172
+ "shortDescription" : "",
173
+ "shape" : "[10, 1, 512, 512]",
174
+ "name" : "kv_cache_sliding",
175
+ "type" : "State"
176
+ },
177
+ {
178
+ "dataType" : "Float16",
179
+ "isOptional" : "0",
180
+ "formattedType" : "State (Float16 4 × 1 × 2048 × 512)",
181
+ "shortDescription" : "",
182
+ "shape" : "[4, 1, 2048, 512]",
183
+ "name" : "kv_cache_full",
184
+ "type" : "State"
185
+ }
186
+ ],
187
+ "outputSchema" : [
188
+ {
189
+ "hasShapeFlexibility" : "0",
190
+ "isOptional" : "0",
191
+ "dataType" : "Float16",
192
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
193
+ "shortDescription" : "",
194
+ "shape" : "[1, 1, 1536]",
195
+ "name" : "hidden_states_out",
196
+ "type" : "MultiArray"
197
+ },
198
+ {
199
+ "hasShapeFlexibility" : "0",
200
+ "isOptional" : "0",
201
+ "dataType" : "Float16",
202
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
203
+ "shortDescription" : "",
204
+ "shape" : "[1, 1, 512, 256]",
205
+ "name" : "kv13_k",
206
+ "type" : "MultiArray"
207
+ },
208
+ {
209
+ "hasShapeFlexibility" : "0",
210
+ "isOptional" : "0",
211
+ "dataType" : "Float16",
212
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
213
+ "shortDescription" : "",
214
+ "shape" : "[1, 1, 512, 256]",
215
+ "name" : "kv13_v",
216
+ "type" : "MultiArray"
217
+ },
218
+ {
219
+ "hasShapeFlexibility" : "0",
220
+ "isOptional" : "0",
221
+ "dataType" : "Float16",
222
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
223
+ "shortDescription" : "",
224
+ "shape" : "[1, 1, 2048, 512]",
225
+ "name" : "kv14_k",
226
+ "type" : "MultiArray"
227
+ },
228
+ {
229
+ "hasShapeFlexibility" : "0",
230
+ "isOptional" : "0",
231
+ "dataType" : "Float16",
232
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
233
+ "shortDescription" : "",
234
+ "shape" : "[1, 1, 2048, 512]",
235
+ "name" : "kv14_v",
236
+ "type" : "MultiArray"
237
+ }
238
+ ],
239
+ "name" : "infer",
240
+ "mlProgramOperationTypeHistogram" : {
241
+ "Ios18.mul" : 347,
242
+ "Ios18.matmul" : 34,
243
+ "Ios18.rsqrt" : 7,
244
+ "Ios16.reduceMean" : 7,
245
+ "Ios18.exp" : 17,
246
+ "Ios18.realDiv" : 17,
247
+ "Split" : 133,
248
+ "Ios18.readState" : 16,
249
+ "Ios16.reduceMax" : 17,
250
+ "Ios18.add" : 101,
251
+ "Tile" : 14,
252
+ "Ios18.layerNorm" : 109,
253
+ "Ios18.sliceUpdate" : 14,
254
+ "Ios18.writeState" : 14,
255
+ "Pad" : 10,
256
+ "Ios18.reshape" : 124,
257
+ "Ios16.reduceSum" : 17,
258
+ "Ios18.constexprLutToDense" : 133,
259
+ "Ios18.linear" : 133,
260
+ "Ios18.concat" : 161,
261
+ "Ios18.transpose" : 90,
262
+ "Ios18.sub" : 17,
263
+ "Ios18.pow" : 7,
264
+ "Ios18.gelu" : 34,
265
+ "Ios18.sliceByIndex" : 41
266
+ }
267
+ },
268
+ {
269
+ "inputSchema" : [
270
+ {
271
+ "hasShapeFlexibility" : "0",
272
+ "isOptional" : "0",
273
+ "dataType" : "Float16",
274
+ "formattedType" : "MultiArray (Float16 1 × 8 × 1536)",
275
+ "shortDescription" : "",
276
+ "shape" : "[1, 8, 1536]",
277
+ "name" : "hidden_states",
278
+ "type" : "MultiArray"
279
+ },
280
+ {
281
+ "hasShapeFlexibility" : "0",
282
+ "isOptional" : "0",
283
+ "dataType" : "Float16",
284
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 2048)",
285
+ "shortDescription" : "",
286
+ "shape" : "[1, 1, 8, 2048]",
287
+ "name" : "causal_mask_full",
288
+ "type" : "MultiArray"
289
+ },
290
+ {
291
+ "hasShapeFlexibility" : "0",
292
+ "isOptional" : "0",
293
+ "dataType" : "Float16",
294
+ "formattedType" : "MultiArray (Float16 1 �� 1 × 8 × 512)",
295
+ "shortDescription" : "",
296
+ "shape" : "[1, 1, 8, 512]",
297
+ "name" : "causal_mask_sliding",
298
+ "type" : "MultiArray"
299
+ },
300
+ {
301
+ "hasShapeFlexibility" : "0",
302
+ "isOptional" : "0",
303
+ "dataType" : "Float16",
304
+ "formattedType" : "MultiArray (Float16 1 × 8 × 8960)",
305
+ "shortDescription" : "",
306
+ "shape" : "[1, 8, 8960]",
307
+ "name" : "per_layer_combined",
308
+ "type" : "MultiArray"
309
+ },
310
+ {
311
+ "hasShapeFlexibility" : "0",
312
+ "isOptional" : "0",
313
+ "dataType" : "Float16",
314
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 256)",
315
+ "shortDescription" : "",
316
+ "shape" : "[1, 1, 8, 256]",
317
+ "name" : "cos_s",
318
+ "type" : "MultiArray"
319
+ },
320
+ {
321
+ "hasShapeFlexibility" : "0",
322
+ "isOptional" : "0",
323
+ "dataType" : "Float16",
324
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 256)",
325
+ "shortDescription" : "",
326
+ "shape" : "[1, 1, 8, 256]",
327
+ "name" : "sin_s",
328
+ "type" : "MultiArray"
329
+ },
330
+ {
331
+ "hasShapeFlexibility" : "0",
332
+ "isOptional" : "0",
333
+ "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 512)",
335
+ "shortDescription" : "",
336
+ "shape" : "[1, 1, 8, 512]",
337
+ "name" : "cos_f",
338
+ "type" : "MultiArray"
339
+ },
340
+ {
341
+ "hasShapeFlexibility" : "0",
342
+ "isOptional" : "0",
343
+ "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 512)",
345
+ "shortDescription" : "",
346
+ "shape" : "[1, 1, 8, 512]",
347
+ "name" : "sin_f",
348
+ "type" : "MultiArray"
349
+ },
350
+ {
351
+ "hasShapeFlexibility" : "0",
352
+ "isOptional" : "0",
353
+ "dataType" : "Int32",
354
+ "formattedType" : "MultiArray (Int32 1)",
355
+ "shortDescription" : "",
356
+ "shape" : "[1]",
357
+ "name" : "current_pos",
358
+ "type" : "MultiArray"
359
+ },
360
+ {
361
+ "hasShapeFlexibility" : "0",
362
+ "isOptional" : "0",
363
+ "dataType" : "Int32",
364
+ "formattedType" : "MultiArray (Int32 1)",
365
+ "shortDescription" : "",
366
+ "shape" : "[1]",
367
+ "name" : "ring_pos",
368
+ "type" : "MultiArray"
369
+ }
370
+ ],
371
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
372
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (12 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), UInt4)",
373
+ "stateSchema" : [
374
+ {
375
+ "dataType" : "Float16",
376
+ "isOptional" : "0",
377
+ "formattedType" : "State (Float16 10 × 1 × 512 × 512)",
378
+ "shortDescription" : "",
379
+ "shape" : "[10, 1, 512, 512]",
380
+ "name" : "kv_cache_sliding",
381
+ "type" : "State"
382
+ },
383
+ {
384
+ "dataType" : "Float16",
385
+ "isOptional" : "0",
386
+ "formattedType" : "State (Float16 4 × 1 × 2048 × 512)",
387
+ "shortDescription" : "",
388
+ "shape" : "[4, 1, 2048, 512]",
389
+ "name" : "kv_cache_full",
390
+ "type" : "State"
391
+ }
392
+ ],
393
+ "outputSchema" : [
394
+ {
395
+ "hasShapeFlexibility" : "0",
396
+ "isOptional" : "0",
397
+ "dataType" : "Float16",
398
+ "formattedType" : "MultiArray (Float16 1 × 8 × 1536)",
399
+ "shortDescription" : "",
400
+ "shape" : "[1, 8, 1536]",
401
+ "name" : "hidden_states_out",
402
+ "type" : "MultiArray"
403
+ },
404
+ {
405
+ "hasShapeFlexibility" : "0",
406
+ "isOptional" : "0",
407
+ "dataType" : "Float16",
408
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
409
+ "shortDescription" : "",
410
+ "shape" : "[1, 1, 512, 256]",
411
+ "name" : "kv13_k",
412
+ "type" : "MultiArray"
413
+ },
414
+ {
415
+ "hasShapeFlexibility" : "0",
416
+ "isOptional" : "0",
417
+ "dataType" : "Float16",
418
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
419
+ "shortDescription" : "",
420
+ "shape" : "[1, 1, 512, 256]",
421
+ "name" : "kv13_v",
422
+ "type" : "MultiArray"
423
+ },
424
+ {
425
+ "hasShapeFlexibility" : "0",
426
+ "isOptional" : "0",
427
+ "dataType" : "Float16",
428
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
429
+ "shortDescription" : "",
430
+ "shape" : "[1, 1, 2048, 512]",
431
+ "name" : "kv14_k",
432
+ "type" : "MultiArray"
433
+ },
434
+ {
435
+ "hasShapeFlexibility" : "0",
436
+ "isOptional" : "0",
437
+ "dataType" : "Float16",
438
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
439
+ "shortDescription" : "",
440
+ "shape" : "[1, 1, 2048, 512]",
441
+ "name" : "kv14_v",
442
+ "type" : "MultiArray"
443
+ }
444
+ ],
445
+ "name" : "prefill_b8",
446
+ "mlProgramOperationTypeHistogram" : {
447
+ "Ios18.mul" : 347,
448
+ "Ios18.matmul" : 34,
449
+ "Ios18.rsqrt" : 7,
450
+ "Ios16.reduceMean" : 7,
451
+ "Ios18.exp" : 17,
452
+ "Ios18.realDiv" : 17,
453
+ "Split" : 133,
454
+ "Ios18.readState" : 16,
455
+ "Ios16.reduceMax" : 17,
456
+ "Ios18.add" : 101,
457
+ "Tile" : 14,
458
+ "Ios18.layerNorm" : 109,
459
+ "Ios18.sliceUpdate" : 14,
460
+ "Ios18.writeState" : 14,
461
+ "Pad" : 10,
462
+ "Ios18.reshape" : 76,
463
+ "Ios16.reduceSum" : 17,
464
+ "Ios18.constexprLutToDense" : 133,
465
+ "Ios18.linear" : 133,
466
+ "Ios18.concat" : 161,
467
+ "Ios18.transpose" : 90,
468
+ "Ios18.sub" : 17,
469
+ "Ios18.pow" : 7,
470
+ "Ios18.gelu" : 34,
471
+ "Ios18.sliceByIndex" : 41
472
+ }
473
+ }
474
+ ],
475
+ "mlProgramOperationTypeHistogram" : {
476
+ "Ios18.mul" : 347,
477
+ "Ios18.matmul" : 34,
478
+ "Ios18.rsqrt" : 7,
479
+ "Ios16.reduceMean" : 7,
480
+ "Ios18.exp" : 17,
481
+ "Ios18.realDiv" : 17,
482
+ "Split" : 133,
483
+ "Ios18.readState" : 16,
484
+ "Ios16.reduceMax" : 17,
485
+ "Ios18.add" : 101,
486
+ "Tile" : 14,
487
+ "Ios18.layerNorm" : 109,
488
+ "Ios18.sliceUpdate" : 14,
489
+ "Ios18.writeState" : 14,
490
+ "Pad" : 10,
491
+ "Ios18.reshape" : 124,
492
+ "Ios16.reduceSum" : 17,
493
+ "Ios18.constexprLutToDense" : 133,
494
+ "Ios18.linear" : 133,
495
+ "Ios18.concat" : 161,
496
+ "Ios18.transpose" : 90,
497
+ "Ios18.sub" : 17,
498
+ "Ios18.pow" : 7,
499
+ "Ios18.gelu" : 34,
500
+ "Ios18.sliceByIndex" : 41
501
+ },
502
+ "isUpdatable" : "0",
503
+ "stateSchema" : [
504
+ {
505
+ "dataType" : "Float16",
506
+ "isOptional" : "0",
507
+ "formattedType" : "State (Float16 10 × 1 × 512 × 512)",
508
+ "shortDescription" : "",
509
+ "shape" : "[10, 1, 512, 512]",
510
+ "name" : "kv_cache_sliding",
511
+ "type" : "State"
512
+ },
513
+ {
514
+ "dataType" : "Float16",
515
+ "isOptional" : "0",
516
+ "formattedType" : "State (Float16 4 × 1 × 2048 × 512)",
517
+ "shortDescription" : "",
518
+ "shape" : "[4, 1, 2048, 512]",
519
+ "name" : "kv_cache_full",
520
+ "type" : "State"
521
+ }
522
+ ],
523
+ "availability" : {
524
+ "macOS" : "15.0",
525
+ "tvOS" : "18.0",
526
+ "visionOS" : "2.0",
527
+ "watchOS" : "11.0",
528
+ "iOS" : "18.0",
529
+ "macCatalyst" : "18.0"
530
+ },
531
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
532
+ "modelType" : {
533
+ "name" : "MLModelType_mlProgram"
534
+ },
535
+ "inputSchema" : [
536
+ {
537
+ "hasShapeFlexibility" : "0",
538
+ "isOptional" : "0",
539
+ "dataType" : "Float16",
540
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
541
+ "shortDescription" : "",
542
+ "shape" : "[1, 1, 1536]",
543
+ "name" : "hidden_states",
544
+ "type" : "MultiArray"
545
+ },
546
+ {
547
+ "hasShapeFlexibility" : "0",
548
+ "isOptional" : "0",
549
+ "dataType" : "Float16",
550
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
551
+ "shortDescription" : "",
552
+ "shape" : "[1, 1, 1, 2048]",
553
+ "name" : "causal_mask_full",
554
+ "type" : "MultiArray"
555
+ },
556
+ {
557
+ "hasShapeFlexibility" : "0",
558
+ "isOptional" : "0",
559
+ "dataType" : "Float16",
560
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
561
+ "shortDescription" : "",
562
+ "shape" : "[1, 1, 1, 512]",
563
+ "name" : "causal_mask_sliding",
564
+ "type" : "MultiArray"
565
+ },
566
+ {
567
+ "hasShapeFlexibility" : "0",
568
+ "isOptional" : "0",
569
+ "dataType" : "Float16",
570
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
571
+ "shortDescription" : "",
572
+ "shape" : "[1, 1, 8960]",
573
+ "name" : "per_layer_combined",
574
+ "type" : "MultiArray"
575
+ },
576
+ {
577
+ "hasShapeFlexibility" : "0",
578
+ "isOptional" : "0",
579
+ "dataType" : "Float16",
580
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
581
+ "shortDescription" : "",
582
+ "shape" : "[1, 1, 1, 256]",
583
+ "name" : "cos_s",
584
+ "type" : "MultiArray"
585
+ },
586
+ {
587
+ "hasShapeFlexibility" : "0",
588
+ "isOptional" : "0",
589
+ "dataType" : "Float16",
590
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
591
+ "shortDescription" : "",
592
+ "shape" : "[1, 1, 1, 256]",
593
+ "name" : "sin_s",
594
+ "type" : "MultiArray"
595
+ },
596
+ {
597
+ "hasShapeFlexibility" : "0",
598
+ "isOptional" : "0",
599
+ "dataType" : "Float16",
600
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
601
+ "shortDescription" : "",
602
+ "shape" : "[1, 1, 1, 512]",
603
+ "name" : "cos_f",
604
+ "type" : "MultiArray"
605
+ },
606
+ {
607
+ "hasShapeFlexibility" : "0",
608
+ "isOptional" : "0",
609
+ "dataType" : "Float16",
610
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
611
+ "shortDescription" : "",
612
+ "shape" : "[1, 1, 1, 512]",
613
+ "name" : "sin_f",
614
+ "type" : "MultiArray"
615
+ },
616
+ {
617
+ "hasShapeFlexibility" : "0",
618
+ "isOptional" : "0",
619
+ "dataType" : "Int32",
620
+ "formattedType" : "MultiArray (Int32 1)",
621
+ "shortDescription" : "",
622
+ "shape" : "[1]",
623
+ "name" : "current_pos",
624
+ "type" : "MultiArray"
625
+ },
626
+ {
627
+ "hasShapeFlexibility" : "0",
628
+ "isOptional" : "0",
629
+ "dataType" : "Int32",
630
+ "formattedType" : "MultiArray (Int32 1)",
631
+ "shortDescription" : "",
632
+ "shape" : "[1]",
633
+ "name" : "ring_pos",
634
+ "type" : "MultiArray"
635
+ }
636
+ ],
637
+ "defaultFunctionName" : "infer",
638
+ "generatedClassName" : "chunk_2",
639
+ "userDefinedMetadata" : {
640
+
641
+ },
642
+ "method" : "predict"
643
+ }
644
+ ]
chunk_2.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk_2.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14fb533fe553f98f35702aa320da35edafe1194d09a120635040fa3dc012200
3
+ size 459299328
chunk_3.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4754acc71a325a622165da06fb41f91208ec06b6cafb5ed196db46033a2e7f00
3
+ size 243
chunk_3.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cec10f75acb7f2833c242c96a264165e7246cb166ff6ead0a4f6704a5011fba
3
+ size 993
chunk_3.mlmodelc/metadata.json ADDED
@@ -0,0 +1,587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (17 bits), Palettized (7 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Int32",
10
+ "formattedType" : "MultiArray (Int32 1)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1]",
13
+ "name" : "token_id",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1]",
23
+ "name" : "token_logit",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 1, 1536]",
33
+ "name" : "hidden_normed",
34
+ "type" : "MultiArray"
35
+ }
36
+ ],
37
+ "modelParameters" : [
38
+
39
+ ],
40
+ "specificationVersion" : 9,
41
+ "functions" : [
42
+ {
43
+ "inputSchema" : [
44
+ {
45
+ "hasShapeFlexibility" : "0",
46
+ "isOptional" : "0",
47
+ "dataType" : "Float16",
48
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
49
+ "shortDescription" : "",
50
+ "shape" : "[1, 1, 1536]",
51
+ "name" : "hidden_states",
52
+ "type" : "MultiArray"
53
+ },
54
+ {
55
+ "hasShapeFlexibility" : "0",
56
+ "isOptional" : "0",
57
+ "dataType" : "Float16",
58
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
59
+ "shortDescription" : "",
60
+ "shape" : "[1, 1, 1, 2048]",
61
+ "name" : "causal_mask_full",
62
+ "type" : "MultiArray"
63
+ },
64
+ {
65
+ "hasShapeFlexibility" : "0",
66
+ "isOptional" : "0",
67
+ "dataType" : "Float16",
68
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
69
+ "shortDescription" : "",
70
+ "shape" : "[1, 1, 1, 512]",
71
+ "name" : "causal_mask_sliding",
72
+ "type" : "MultiArray"
73
+ },
74
+ {
75
+ "hasShapeFlexibility" : "0",
76
+ "isOptional" : "0",
77
+ "dataType" : "Float16",
78
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
79
+ "shortDescription" : "",
80
+ "shape" : "[1, 1, 8960]",
81
+ "name" : "per_layer_combined",
82
+ "type" : "MultiArray"
83
+ },
84
+ {
85
+ "hasShapeFlexibility" : "0",
86
+ "isOptional" : "0",
87
+ "dataType" : "Float16",
88
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
89
+ "shortDescription" : "",
90
+ "shape" : "[1, 1, 1, 256]",
91
+ "name" : "cos_s",
92
+ "type" : "MultiArray"
93
+ },
94
+ {
95
+ "hasShapeFlexibility" : "0",
96
+ "isOptional" : "0",
97
+ "dataType" : "Float16",
98
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
99
+ "shortDescription" : "",
100
+ "shape" : "[1, 1, 1, 256]",
101
+ "name" : "sin_s",
102
+ "type" : "MultiArray"
103
+ },
104
+ {
105
+ "hasShapeFlexibility" : "0",
106
+ "isOptional" : "0",
107
+ "dataType" : "Float16",
108
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
109
+ "shortDescription" : "",
110
+ "shape" : "[1, 1, 1, 512]",
111
+ "name" : "cos_f",
112
+ "type" : "MultiArray"
113
+ },
114
+ {
115
+ "hasShapeFlexibility" : "0",
116
+ "isOptional" : "0",
117
+ "dataType" : "Float16",
118
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
119
+ "shortDescription" : "",
120
+ "shape" : "[1, 1, 1, 512]",
121
+ "name" : "sin_f",
122
+ "type" : "MultiArray"
123
+ },
124
+ {
125
+ "hasShapeFlexibility" : "0",
126
+ "isOptional" : "0",
127
+ "dataType" : "Float16",
128
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
129
+ "shortDescription" : "",
130
+ "shape" : "[1, 1, 512, 256]",
131
+ "name" : "kv13_k",
132
+ "type" : "MultiArray"
133
+ },
134
+ {
135
+ "hasShapeFlexibility" : "0",
136
+ "isOptional" : "0",
137
+ "dataType" : "Float16",
138
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
139
+ "shortDescription" : "",
140
+ "shape" : "[1, 1, 512, 256]",
141
+ "name" : "kv13_v",
142
+ "type" : "MultiArray"
143
+ },
144
+ {
145
+ "hasShapeFlexibility" : "0",
146
+ "isOptional" : "0",
147
+ "dataType" : "Float16",
148
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
149
+ "shortDescription" : "",
150
+ "shape" : "[1, 1, 2048, 512]",
151
+ "name" : "kv14_k",
152
+ "type" : "MultiArray"
153
+ },
154
+ {
155
+ "hasShapeFlexibility" : "0",
156
+ "isOptional" : "0",
157
+ "dataType" : "Float16",
158
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
159
+ "shortDescription" : "",
160
+ "shape" : "[1, 1, 2048, 512]",
161
+ "name" : "kv14_v",
162
+ "type" : "MultiArray"
163
+ }
164
+ ],
165
+ "computePrecision" : "Mixed (Float16, Int32)",
166
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (17 bits), Palettized (7 bits), UInt4)",
167
+ "stateSchema" : [
168
+
169
+ ],
170
+ "outputSchema" : [
171
+ {
172
+ "hasShapeFlexibility" : "0",
173
+ "isOptional" : "0",
174
+ "dataType" : "Int32",
175
+ "formattedType" : "MultiArray (Int32 1)",
176
+ "shortDescription" : "",
177
+ "shape" : "[1]",
178
+ "name" : "token_id",
179
+ "type" : "MultiArray"
180
+ },
181
+ {
182
+ "hasShapeFlexibility" : "0",
183
+ "isOptional" : "0",
184
+ "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1)",
186
+ "shortDescription" : "",
187
+ "shape" : "[1]",
188
+ "name" : "token_logit",
189
+ "type" : "MultiArray"
190
+ },
191
+ {
192
+ "hasShapeFlexibility" : "0",
193
+ "isOptional" : "0",
194
+ "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
196
+ "shortDescription" : "",
197
+ "shape" : "[1, 1, 1536]",
198
+ "name" : "hidden_normed",
199
+ "type" : "MultiArray"
200
+ }
201
+ ],
202
+ "name" : "infer",
203
+ "mlProgramOperationTypeHistogram" : {
204
+ "Ios18.expandDims" : 1,
205
+ "Ios18.mul" : 184,
206
+ "Ios18.matmul" : 20,
207
+ "Ios18.exp" : 10,
208
+ "Ios18.realDiv" : 10,
209
+ "Split" : 71,
210
+ "Ios18.gatherAlongAxis" : 1,
211
+ "Ios16.reduceMax" : 10,
212
+ "Tile" : 4,
213
+ "Ios16.reduceSum" : 10,
214
+ "Ios18.add" : 50,
215
+ "Ios18.layerNorm" : 61,
216
+ "Ios18.reduceArgmax" : 1,
217
+ "Ios18.reshape" : 48,
218
+ "Ios18.constexprLutToDense" : 71,
219
+ "Ios18.linear" : 71,
220
+ "Ios18.gelu" : 20,
221
+ "Ios18.concat" : 71,
222
+ "Ios18.sub" : 10,
223
+ "Ios18.tanh" : 1,
224
+ "Ios18.transpose" : 32,
225
+ "Ios18.squeeze" : 2,
226
+ "Ios18.sliceByIndex" : 10
227
+ }
228
+ },
229
+ {
230
+ "inputSchema" : [
231
+ {
232
+ "hasShapeFlexibility" : "0",
233
+ "isOptional" : "0",
234
+ "dataType" : "Float16",
235
+ "formattedType" : "MultiArray (Float16 1 × 8 × 1536)",
236
+ "shortDescription" : "",
237
+ "shape" : "[1, 8, 1536]",
238
+ "name" : "hidden_states",
239
+ "type" : "MultiArray"
240
+ },
241
+ {
242
+ "hasShapeFlexibility" : "0",
243
+ "isOptional" : "0",
244
+ "dataType" : "Float16",
245
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 2048)",
246
+ "shortDescription" : "",
247
+ "shape" : "[1, 1, 8, 2048]",
248
+ "name" : "causal_mask_full",
249
+ "type" : "MultiArray"
250
+ },
251
+ {
252
+ "hasShapeFlexibility" : "0",
253
+ "isOptional" : "0",
254
+ "dataType" : "Float16",
255
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 512)",
256
+ "shortDescription" : "",
257
+ "shape" : "[1, 1, 8, 512]",
258
+ "name" : "causal_mask_sliding",
259
+ "type" : "MultiArray"
260
+ },
261
+ {
262
+ "hasShapeFlexibility" : "0",
263
+ "isOptional" : "0",
264
+ "dataType" : "Float16",
265
+ "formattedType" : "MultiArray (Float16 1 × 8 × 8960)",
266
+ "shortDescription" : "",
267
+ "shape" : "[1, 8, 8960]",
268
+ "name" : "per_layer_combined",
269
+ "type" : "MultiArray"
270
+ },
271
+ {
272
+ "hasShapeFlexibility" : "0",
273
+ "isOptional" : "0",
274
+ "dataType" : "Float16",
275
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 256)",
276
+ "shortDescription" : "",
277
+ "shape" : "[1, 1, 8, 256]",
278
+ "name" : "cos_s",
279
+ "type" : "MultiArray"
280
+ },
281
+ {
282
+ "hasShapeFlexibility" : "0",
283
+ "isOptional" : "0",
284
+ "dataType" : "Float16",
285
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 256)",
286
+ "shortDescription" : "",
287
+ "shape" : "[1, 1, 8, 256]",
288
+ "name" : "sin_s",
289
+ "type" : "MultiArray"
290
+ },
291
+ {
292
+ "hasShapeFlexibility" : "0",
293
+ "isOptional" : "0",
294
+ "dataType" : "Float16",
295
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 512)",
296
+ "shortDescription" : "",
297
+ "shape" : "[1, 1, 8, 512]",
298
+ "name" : "cos_f",
299
+ "type" : "MultiArray"
300
+ },
301
+ {
302
+ "hasShapeFlexibility" : "0",
303
+ "isOptional" : "0",
304
+ "dataType" : "Float16",
305
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8 × 512)",
306
+ "shortDescription" : "",
307
+ "shape" : "[1, 1, 8, 512]",
308
+ "name" : "sin_f",
309
+ "type" : "MultiArray"
310
+ },
311
+ {
312
+ "hasShapeFlexibility" : "0",
313
+ "isOptional" : "0",
314
+ "dataType" : "Float16",
315
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
316
+ "shortDescription" : "",
317
+ "shape" : "[1, 1, 512, 256]",
318
+ "name" : "kv13_k",
319
+ "type" : "MultiArray"
320
+ },
321
+ {
322
+ "hasShapeFlexibility" : "0",
323
+ "isOptional" : "0",
324
+ "dataType" : "Float16",
325
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
326
+ "shortDescription" : "",
327
+ "shape" : "[1, 1, 512, 256]",
328
+ "name" : "kv13_v",
329
+ "type" : "MultiArray"
330
+ },
331
+ {
332
+ "hasShapeFlexibility" : "0",
333
+ "isOptional" : "0",
334
+ "dataType" : "Float16",
335
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
336
+ "shortDescription" : "",
337
+ "shape" : "[1, 1, 2048, 512]",
338
+ "name" : "kv14_k",
339
+ "type" : "MultiArray"
340
+ },
341
+ {
342
+ "hasShapeFlexibility" : "0",
343
+ "isOptional" : "0",
344
+ "dataType" : "Float16",
345
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
346
+ "shortDescription" : "",
347
+ "shape" : "[1, 1, 2048, 512]",
348
+ "name" : "kv14_v",
349
+ "type" : "MultiArray"
350
+ }
351
+ ],
352
+ "computePrecision" : "Mixed (Float16, Int32)",
353
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (17 bits), Palettized (7 bits), UInt4)",
354
+ "stateSchema" : [
355
+
356
+ ],
357
+ "outputSchema" : [
358
+ {
359
+ "hasShapeFlexibility" : "0",
360
+ "isOptional" : "0",
361
+ "dataType" : "Int32",
362
+ "formattedType" : "MultiArray (Int32 1)",
363
+ "shortDescription" : "",
364
+ "shape" : "[1]",
365
+ "name" : "token_id",
366
+ "type" : "MultiArray"
367
+ },
368
+ {
369
+ "hasShapeFlexibility" : "0",
370
+ "isOptional" : "0",
371
+ "dataType" : "Float16",
372
+ "formattedType" : "MultiArray (Float16 1)",
373
+ "shortDescription" : "",
374
+ "shape" : "[1]",
375
+ "name" : "token_logit",
376
+ "type" : "MultiArray"
377
+ },
378
+ {
379
+ "hasShapeFlexibility" : "0",
380
+ "isOptional" : "0",
381
+ "dataType" : "Float16",
382
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
383
+ "shortDescription" : "",
384
+ "shape" : "[1, 1, 1536]",
385
+ "name" : "hidden_normed",
386
+ "type" : "MultiArray"
387
+ }
388
+ ],
389
+ "name" : "prefill_b8",
390
+ "mlProgramOperationTypeHistogram" : {
391
+ "Ios18.expandDims" : 1,
392
+ "Ios18.mul" : 184,
393
+ "Ios18.matmul" : 20,
394
+ "Ios18.exp" : 10,
395
+ "Ios18.realDiv" : 10,
396
+ "Split" : 71,
397
+ "Ios18.gatherAlongAxis" : 1,
398
+ "Ios16.reduceMax" : 10,
399
+ "Tile" : 4,
400
+ "Ios16.reduceSum" : 10,
401
+ "Ios18.add" : 50,
402
+ "Ios18.layerNorm" : 61,
403
+ "Ios18.reduceArgmax" : 1,
404
+ "Ios18.reshape" : 28,
405
+ "Ios18.constexprLutToDense" : 71,
406
+ "Ios18.linear" : 71,
407
+ "Ios18.gelu" : 20,
408
+ "Ios18.concat" : 71,
409
+ "Ios18.sub" : 10,
410
+ "Ios18.tanh" : 1,
411
+ "Ios18.transpose" : 32,
412
+ "Ios18.squeeze" : 2,
413
+ "Ios18.sliceByIndex" : 11
414
+ }
415
+ }
416
+ ],
417
+ "mlProgramOperationTypeHistogram" : {
418
+ "Ios18.expandDims" : 1,
419
+ "Ios18.mul" : 184,
420
+ "Ios18.matmul" : 20,
421
+ "Ios18.exp" : 10,
422
+ "Ios18.realDiv" : 10,
423
+ "Split" : 71,
424
+ "Ios18.gatherAlongAxis" : 1,
425
+ "Ios16.reduceMax" : 10,
426
+ "Tile" : 4,
427
+ "Ios16.reduceSum" : 10,
428
+ "Ios18.add" : 50,
429
+ "Ios18.layerNorm" : 61,
430
+ "Ios18.reduceArgmax" : 1,
431
+ "Ios18.reshape" : 48,
432
+ "Ios18.constexprLutToDense" : 71,
433
+ "Ios18.linear" : 71,
434
+ "Ios18.gelu" : 20,
435
+ "Ios18.concat" : 71,
436
+ "Ios18.sub" : 10,
437
+ "Ios18.tanh" : 1,
438
+ "Ios18.transpose" : 32,
439
+ "Ios18.squeeze" : 2,
440
+ "Ios18.sliceByIndex" : 10
441
+ },
442
+ "isUpdatable" : "0",
443
+ "stateSchema" : [
444
+
445
+ ],
446
+ "availability" : {
447
+ "macOS" : "15.0",
448
+ "tvOS" : "18.0",
449
+ "visionOS" : "2.0",
450
+ "watchOS" : "11.0",
451
+ "iOS" : "18.0",
452
+ "macCatalyst" : "18.0"
453
+ },
454
+ "computePrecision" : "Mixed (Float16, Int32)",
455
+ "modelType" : {
456
+ "name" : "MLModelType_mlProgram"
457
+ },
458
+ "inputSchema" : [
459
+ {
460
+ "hasShapeFlexibility" : "0",
461
+ "isOptional" : "0",
462
+ "dataType" : "Float16",
463
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
464
+ "shortDescription" : "",
465
+ "shape" : "[1, 1, 1536]",
466
+ "name" : "hidden_states",
467
+ "type" : "MultiArray"
468
+ },
469
+ {
470
+ "hasShapeFlexibility" : "0",
471
+ "isOptional" : "0",
472
+ "dataType" : "Float16",
473
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
474
+ "shortDescription" : "",
475
+ "shape" : "[1, 1, 1, 2048]",
476
+ "name" : "causal_mask_full",
477
+ "type" : "MultiArray"
478
+ },
479
+ {
480
+ "hasShapeFlexibility" : "0",
481
+ "isOptional" : "0",
482
+ "dataType" : "Float16",
483
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
484
+ "shortDescription" : "",
485
+ "shape" : "[1, 1, 1, 512]",
486
+ "name" : "causal_mask_sliding",
487
+ "type" : "MultiArray"
488
+ },
489
+ {
490
+ "hasShapeFlexibility" : "0",
491
+ "isOptional" : "0",
492
+ "dataType" : "Float16",
493
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
494
+ "shortDescription" : "",
495
+ "shape" : "[1, 1, 8960]",
496
+ "name" : "per_layer_combined",
497
+ "type" : "MultiArray"
498
+ },
499
+ {
500
+ "hasShapeFlexibility" : "0",
501
+ "isOptional" : "0",
502
+ "dataType" : "Float16",
503
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
504
+ "shortDescription" : "",
505
+ "shape" : "[1, 1, 1, 256]",
506
+ "name" : "cos_s",
507
+ "type" : "MultiArray"
508
+ },
509
+ {
510
+ "hasShapeFlexibility" : "0",
511
+ "isOptional" : "0",
512
+ "dataType" : "Float16",
513
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
514
+ "shortDescription" : "",
515
+ "shape" : "[1, 1, 1, 256]",
516
+ "name" : "sin_s",
517
+ "type" : "MultiArray"
518
+ },
519
+ {
520
+ "hasShapeFlexibility" : "0",
521
+ "isOptional" : "0",
522
+ "dataType" : "Float16",
523
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
524
+ "shortDescription" : "",
525
+ "shape" : "[1, 1, 1, 512]",
526
+ "name" : "cos_f",
527
+ "type" : "MultiArray"
528
+ },
529
+ {
530
+ "hasShapeFlexibility" : "0",
531
+ "isOptional" : "0",
532
+ "dataType" : "Float16",
533
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
534
+ "shortDescription" : "",
535
+ "shape" : "[1, 1, 1, 512]",
536
+ "name" : "sin_f",
537
+ "type" : "MultiArray"
538
+ },
539
+ {
540
+ "hasShapeFlexibility" : "0",
541
+ "isOptional" : "0",
542
+ "dataType" : "Float16",
543
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
544
+ "shortDescription" : "",
545
+ "shape" : "[1, 1, 512, 256]",
546
+ "name" : "kv13_k",
547
+ "type" : "MultiArray"
548
+ },
549
+ {
550
+ "hasShapeFlexibility" : "0",
551
+ "isOptional" : "0",
552
+ "dataType" : "Float16",
553
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
554
+ "shortDescription" : "",
555
+ "shape" : "[1, 1, 512, 256]",
556
+ "name" : "kv13_v",
557
+ "type" : "MultiArray"
558
+ },
559
+ {
560
+ "hasShapeFlexibility" : "0",
561
+ "isOptional" : "0",
562
+ "dataType" : "Float16",
563
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
564
+ "shortDescription" : "",
565
+ "shape" : "[1, 1, 2048, 512]",
566
+ "name" : "kv14_k",
567
+ "type" : "MultiArray"
568
+ },
569
+ {
570
+ "hasShapeFlexibility" : "0",
571
+ "isOptional" : "0",
572
+ "dataType" : "Float16",
573
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 512)",
574
+ "shortDescription" : "",
575
+ "shape" : "[1, 1, 2048, 512]",
576
+ "name" : "kv14_v",
577
+ "type" : "MultiArray"
578
+ }
579
+ ],
580
+ "defaultFunctionName" : "infer",
581
+ "generatedClassName" : "chunk_3",
582
+ "userDefinedMetadata" : {
583
+
584
+ },
585
+ "method" : "predict"
586
+ }
587
+ ]
chunk_3.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk_3.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf9130197bb9e56e106b559ef2cb767f54fef7eb5e621e8cd27111303ceac44
3
+ size 527440000
cos_full.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4d3fc1ee45657134fda48f64d388bf70a68efe6d137abf2881ab4b6948af6d7
3
+ size 8388736
cos_sliding.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a63098efaaf56e18c98f862987cf50202f0bb71d6f0def960fe1565260a7ffc
3
+ size 4194432
embed_tokens_per_layer_q8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1af367cde6fcb67a48925c0c566b0aac11946248d8e3cfebc4027550947cd8
3
+ size 2348810240
embed_tokens_per_layer_scales.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a439606004e3372158eed3efd4c671cefea056122afb93c546108411ea41b057
3
+ size 524288
embed_tokens_q8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2286db227b17e71fb3908983999876625c811295055457b6f798e0f09969797a
3
+ size 402653184
embed_tokens_scales.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db47a8808d782d3c2b1ab722833b425e0bb4755360409d766acb89a4a8443461
3
+ size 524288
hf_model/config.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": {
6
+ "_name_or_path": "",
7
+ "architectures": null,
8
+ "attention_chunk_size": 12,
9
+ "attention_context_left": 13,
10
+ "attention_context_right": 0,
11
+ "attention_invalid_logits_value": -1000000000.0,
12
+ "attention_logit_cap": 50.0,
13
+ "chunk_size_feed_forward": 0,
14
+ "conv_kernel_size": 5,
15
+ "dtype": "bfloat16",
16
+ "gradient_clipping": 10000000000.0,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 1024,
19
+ "id2label": {
20
+ "0": "LABEL_0",
21
+ "1": "LABEL_1"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "is_encoder_decoder": false,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1
28
+ },
29
+ "model_type": "gemma4_audio",
30
+ "num_attention_heads": 8,
31
+ "num_hidden_layers": 12,
32
+ "output_attentions": false,
33
+ "output_hidden_states": false,
34
+ "output_proj_dims": 1536,
35
+ "problem_type": null,
36
+ "residual_weight": 0.5,
37
+ "return_dict": true,
38
+ "rms_norm_eps": 1e-06,
39
+ "subsampling_conv_channels": [
40
+ 128,
41
+ 32
42
+ ],
43
+ "use_clipped_linears": true
44
+ },
45
+ "audio_token_id": 258881,
46
+ "boa_token_id": 256000,
47
+ "boi_token_id": 255999,
48
+ "dtype": "bfloat16",
49
+ "eoa_token_id": 258883,
50
+ "eoa_token_index": 258883,
51
+ "eoi_token_id": 258882,
52
+ "eos_token_id": [
53
+ 1,
54
+ 106
55
+ ],
56
+ "image_token_id": 258880,
57
+ "initializer_range": 0.02,
58
+ "model_type": "gemma4",
59
+ "text_config": {
60
+ "attention_bias": false,
61
+ "attention_dropout": 0.0,
62
+ "attention_k_eq_v": false,
63
+ "bos_token_id": 2,
64
+ "dtype": "bfloat16",
65
+ "enable_moe_block": false,
66
+ "eos_token_id": 1,
67
+ "expert_intermediate_size": null,
68
+ "final_logit_softcapping": 30.0,
69
+ "global_head_dim": 512,
70
+ "head_dim": 256,
71
+ "hidden_activation": "gelu_pytorch_tanh",
72
+ "hidden_size": 1536,
73
+ "hidden_size_per_layer_input": 256,
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 6144,
76
+ "layer_types": [
77
+ "sliding_attention",
78
+ "sliding_attention",
79
+ "sliding_attention",
80
+ "sliding_attention",
81
+ "full_attention",
82
+ "sliding_attention",
83
+ "sliding_attention",
84
+ "sliding_attention",
85
+ "sliding_attention",
86
+ "full_attention",
87
+ "sliding_attention",
88
+ "sliding_attention",
89
+ "sliding_attention",
90
+ "sliding_attention",
91
+ "full_attention",
92
+ "sliding_attention",
93
+ "sliding_attention",
94
+ "sliding_attention",
95
+ "sliding_attention",
96
+ "full_attention",
97
+ "sliding_attention",
98
+ "sliding_attention",
99
+ "sliding_attention",
100
+ "sliding_attention",
101
+ "full_attention",
102
+ "sliding_attention",
103
+ "sliding_attention",
104
+ "sliding_attention",
105
+ "sliding_attention",
106
+ "full_attention",
107
+ "sliding_attention",
108
+ "sliding_attention",
109
+ "sliding_attention",
110
+ "sliding_attention",
111
+ "full_attention"
112
+ ],
113
+ "max_position_embeddings": 131072,
114
+ "model_type": "gemma4_text",
115
+ "num_attention_heads": 8,
116
+ "num_experts": null,
117
+ "num_global_key_value_heads": null,
118
+ "num_hidden_layers": 35,
119
+ "num_key_value_heads": 1,
120
+ "num_kv_shared_layers": 20,
121
+ "pad_token_id": 0,
122
+ "rms_norm_eps": 1e-06,
123
+ "rope_parameters": {
124
+ "full_attention": {
125
+ "partial_rotary_factor": 0.25,
126
+ "rope_theta": 1000000.0,
127
+ "rope_type": "proportional"
128
+ },
129
+ "sliding_attention": {
130
+ "rope_theta": 10000.0,
131
+ "rope_type": "default"
132
+ }
133
+ },
134
+ "sliding_window": 512,
135
+ "tie_word_embeddings": true,
136
+ "top_k_experts": null,
137
+ "use_bidirectional_attention": null,
138
+ "use_cache": true,
139
+ "use_double_wide_mlp": true,
140
+ "vocab_size": 262144,
141
+ "vocab_size_per_layer_input": 262144
142
+ },
143
+ "tie_word_embeddings": true,
144
+ "transformers_version": "5.5.0.dev0",
145
+ "video_token_id": 258884,
146
+ "vision_config": {
147
+ "_name_or_path": "",
148
+ "architectures": null,
149
+ "attention_bias": false,
150
+ "attention_dropout": 0.0,
151
+ "chunk_size_feed_forward": 0,
152
+ "default_output_length": 280,
153
+ "dtype": "bfloat16",
154
+ "global_head_dim": 64,
155
+ "head_dim": 64,
156
+ "hidden_activation": "gelu_pytorch_tanh",
157
+ "hidden_size": 768,
158
+ "id2label": {
159
+ "0": "LABEL_0",
160
+ "1": "LABEL_1"
161
+ },
162
+ "initializer_range": 0.02,
163
+ "intermediate_size": 3072,
164
+ "is_encoder_decoder": false,
165
+ "label2id": {
166
+ "LABEL_0": 0,
167
+ "LABEL_1": 1
168
+ },
169
+ "max_position_embeddings": 131072,
170
+ "model_type": "gemma4_vision",
171
+ "num_attention_heads": 12,
172
+ "num_hidden_layers": 16,
173
+ "num_key_value_heads": 12,
174
+ "output_attentions": false,
175
+ "output_hidden_states": false,
176
+ "patch_size": 16,
177
+ "pooling_kernel_size": 3,
178
+ "position_embedding_size": 10240,
179
+ "problem_type": null,
180
+ "return_dict": true,
181
+ "rms_norm_eps": 1e-06,
182
+ "rope_parameters": {
183
+ "rope_theta": 100.0,
184
+ "rope_type": "default"
185
+ },
186
+ "standardize": false,
187
+ "use_clipped_linears": true
188
+ },
189
+ "vision_soft_tokens_per_image": 280
190
+ }
hf_model/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
hf_model/tokenizer_config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
+ "image_token": "<|image|>",
20
+ "mask_token": "<mask>",
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": "<pad>",
23
+ "padding_side": "left",
24
+ "processor_class": "Gemma4Processor",
25
+ "response_schema": {
26
+ "type": "object",
27
+ "properties": {
28
+ "role": {
29
+ "const": "assistant"
30
+ },
31
+ "thinking": {
32
+ "type": "string"
33
+ },
34
+ "content": {
35
+ "type": "string"
36
+ },
37
+ "tool_calls": {
38
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
+ "type": "array",
40
+ "items": {
41
+ "type": "object",
42
+ "properties": {
43
+ "type": {
44
+ "const": "function"
45
+ },
46
+ "function": {
47
+ "type": "object",
48
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
+ "properties": {
50
+ "name": {
51
+ "type": "string"
52
+ },
53
+ "arguments": {
54
+ "type": "object",
55
+ "x-parser": "gemma4-tool-call",
56
+ "additionalProperties": {}
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
62
+ }
63
+ },
64
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
65
+ },
66
+ "soc_token": "<|channel>",
67
+ "sot_token": "<|turn>",
68
+ "stc_token": "<|tool_call>",
69
+ "std_token": "<|tool>",
70
+ "str_token": "<|tool_response>",
71
+ "think_token": "<|think|>",
72
+ "tokenizer_class": "GemmaTokenizer",
73
+ "unk_token": "<unk>"
74
+ }
model_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gemma4-e2b-swa-ple",
3
+ "architecture": "gemma4",
4
+ "hidden_size": 1536,
5
+ "num_hidden_layers": 35,
6
+ "context_length": 2048,
7
+ "sliding_window": 512,
8
+ "vocab_size": 262144,
9
+ "bos_token_id": 2,
10
+ "eos_token_id": 1,
11
+ "per_layer_dim": 256,
12
+ "max_head_dim": 512,
13
+ "embed_scale": 39.191835884530846,
14
+ "per_layer_model_projection_scale": 0.02551551815399144,
15
+ "per_layer_input_scale": 0.7071067811865476,
16
+ "per_layer_embed_scale": 16.0,
17
+ "external_embeddings": true,
18
+ "has_multimodal": true,
19
+ "stateless": true,
20
+ "sliding_window_attention": true,
21
+ "ple_inside_chunk1": true,
22
+ "num_chunks": 4
23
+ }
per_layer_norm_weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc90ef48ae4554761ff0c3a802ac0c6f08f4818fca7a7ecb7409a5a20da92a77
3
+ size 1024
per_layer_projection.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d3db373cb38470e868fbf45bbddb13198fd61bbe57073340cfd9035028bb062
3
+ size 27525120
sin_full.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b947cd56c5043e1cae19d914dfc13051743229c95a6ed29a5ce03f4091e26f
3
+ size 8388736
sin_sliding.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26fd591b8da63bf361713be334ec64dbd1d9809643fa8e74c359415d458f159c
3
+ size 4194432