sp00ktober commited on
Commit
6ada18d
·
1 Parent(s): d3f9a42

Remove old lut6 models and .mlpackage files

Browse files

Cleanup to reduce download size and prevent memory issues:
- Removed lut6 .mlmodelc files (replaced by lut8)
- Removed all .mlpackage files (only .mlmodelc needed for inference)
- Removed meta_progress.yaml (not needed)

Files changed (26) hide show
  1. llama_FFN_PF_lut6_chunk_01of01.mlmodelc/analytics/coremldata.bin +0 -3
  2. llama_FFN_PF_lut6_chunk_01of01.mlmodelc/coremldata.bin +0 -3
  3. llama_FFN_PF_lut6_chunk_01of01.mlmodelc/metadata.json +0 -333
  4. llama_FFN_PF_lut6_chunk_01of01.mlmodelc/model.mil +0 -0
  5. llama_FFN_PF_lut6_chunk_01of01.mlmodelc/weights/weight.bin +0 -3
  6. llama_FFN_PF_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -3
  7. llama_FFN_PF_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin +0 -3
  8. llama_FFN_PF_lut6_chunk_01of01.mlpackage/Manifest.json +0 -18
  9. llama_FFN_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -3
  10. llama_FFN_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin +0 -3
  11. llama_FFN_lut6_chunk_01of01.mlpackage/Manifest.json +0 -18
  12. llama_embeddings.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -3
  13. llama_embeddings.mlpackage/Data/com.apple.CoreML/weights/weight.bin +0 -3
  14. llama_embeddings.mlpackage/Manifest.json +0 -18
  15. llama_lm_head_lut6.mlmodelc/analytics/coremldata.bin +0 -3
  16. llama_lm_head_lut6.mlmodelc/coremldata.bin +0 -3
  17. llama_lm_head_lut6.mlmodelc/metadata.json +0 -143
  18. llama_lm_head_lut6.mlmodelc/model.mil +0 -98
  19. llama_lm_head_lut6.mlmodelc/weights/weight.bin +0 -3
  20. llama_lm_head_lut6.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -3
  21. llama_lm_head_lut6.mlpackage/Data/com.apple.CoreML/weights/weight.bin +0 -3
  22. llama_lm_head_lut6.mlpackage/Manifest.json +0 -18
  23. llama_prefill_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -3
  24. llama_prefill_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin +0 -3
  25. llama_prefill_lut6_chunk_01of01.mlpackage/Manifest.json +0 -18
  26. meta_progress.yaml +0 -51
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/analytics/coremldata.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e53b9306ddaf9d32f54326b70d13396584a1cd78c5a4bcf99b4b3cdc4011e2a7
3
- size 243
 
 
 
 
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/coremldata.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9421991cfae970be73591364d1b1d6b7b41cdd5f508b597bee227cf92c8dd386
3
- size 981
 
 
 
 
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/metadata.json DELETED
@@ -1,333 +0,0 @@
1
- [
2
- {
3
- "metadataOutputVersion" : "3.0",
4
- "userDefinedMetadata" : {
5
- "com.anemll.chunk_no" : "1",
6
- "com.github.apple.coremltools.source" : "torch==2.5.0",
7
- "com.anemll.context_length" : "2048",
8
- "com.github.apple.coremltools.version" : "9.0",
9
- "com.github.apple.coremltools.source_dialect" : "TorchScript",
10
- "com.anemll.num_chunks" : "1",
11
- "com.anemll.info" : "Converted with Anemll v0.1.1",
12
- "com.anemll.batch_size" : "64",
13
- "com.anemll.lut_bits" : "6"
14
- },
15
- "availability" : {
16
- "macOS" : "15.0",
17
- "tvOS" : "18.0",
18
- "visionOS" : "2.0",
19
- "watchOS" : "11.0",
20
- "iOS" : "18.0",
21
- "macCatalyst" : "18.0"
22
- },
23
- "inputSchema" : [
24
- {
25
- "hasShapeFlexibility" : "0",
26
- "isOptional" : "0",
27
- "dataType" : "Float16",
28
- "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
29
- "shortDescription" : "",
30
- "shape" : "[1, 1, 2048]",
31
- "name" : "hidden_states",
32
- "type" : "MultiArray"
33
- },
34
- {
35
- "hasShapeFlexibility" : "0",
36
- "isOptional" : "0",
37
- "dataType" : "Int32",
38
- "formattedType" : "MultiArray (Int32 1)",
39
- "shortDescription" : "",
40
- "shape" : "[1]",
41
- "name" : "position_ids",
42
- "type" : "MultiArray"
43
- },
44
- {
45
- "hasShapeFlexibility" : "0",
46
- "isOptional" : "0",
47
- "dataType" : "Float16",
48
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
49
- "shortDescription" : "",
50
- "shape" : "[1, 1, 1, 2048]",
51
- "name" : "causal_mask",
52
- "type" : "MultiArray"
53
- },
54
- {
55
- "hasShapeFlexibility" : "0",
56
- "isOptional" : "0",
57
- "dataType" : "Int32",
58
- "formattedType" : "MultiArray (Int32 1)",
59
- "shortDescription" : "",
60
- "shape" : "[1]",
61
- "name" : "current_pos",
62
- "type" : "MultiArray"
63
- }
64
- ],
65
- "outputSchema" : [
66
- {
67
- "hasShapeFlexibility" : "0",
68
- "isOptional" : "0",
69
- "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
71
- "shortDescription" : "",
72
- "shape" : "[1, 1, 2048]",
73
- "name" : "output_hidden_states",
74
- "type" : "MultiArray"
75
- }
76
- ],
77
- "modelParameters" : [
78
-
79
- ],
80
- "storagePrecision" : "Mixed (Float16, Palettized (14 bits), Palettized (16 bits), UInt6)",
81
- "method" : "predict",
82
- "functions" : [
83
- {
84
- "inputSchema" : [
85
- {
86
- "hasShapeFlexibility" : "0",
87
- "isOptional" : "0",
88
- "dataType" : "Float16",
89
- "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
90
- "shortDescription" : "",
91
- "shape" : "[1, 1, 2048]",
92
- "name" : "hidden_states",
93
- "type" : "MultiArray"
94
- },
95
- {
96
- "hasShapeFlexibility" : "0",
97
- "isOptional" : "0",
98
- "dataType" : "Int32",
99
- "formattedType" : "MultiArray (Int32 1)",
100
- "shortDescription" : "",
101
- "shape" : "[1]",
102
- "name" : "position_ids",
103
- "type" : "MultiArray"
104
- },
105
- {
106
- "hasShapeFlexibility" : "0",
107
- "isOptional" : "0",
108
- "dataType" : "Float16",
109
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
110
- "shortDescription" : "",
111
- "shape" : "[1, 1, 1, 2048]",
112
- "name" : "causal_mask",
113
- "type" : "MultiArray"
114
- },
115
- {
116
- "hasShapeFlexibility" : "0",
117
- "isOptional" : "0",
118
- "dataType" : "Int32",
119
- "formattedType" : "MultiArray (Int32 1)",
120
- "shortDescription" : "",
121
- "shape" : "[1]",
122
- "name" : "current_pos",
123
- "type" : "MultiArray"
124
- }
125
- ],
126
- "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
127
- "storagePrecision" : "Mixed (Float16, Palettized (14 bits), Palettized (16 bits), UInt6)",
128
- "stateSchema" : [
129
- {
130
- "dataType" : "Float16",
131
- "isOptional" : "0",
132
- "formattedType" : "State (Float16 48 × 16 × 2048 × 128)",
133
- "shortDescription" : "",
134
- "shape" : "[48, 16, 2048, 128]",
135
- "name" : "model_model_kv_cache_0",
136
- "type" : "State"
137
- }
138
- ],
139
- "outputSchema" : [
140
- {
141
- "hasShapeFlexibility" : "0",
142
- "isOptional" : "0",
143
- "dataType" : "Float16",
144
- "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
145
- "shortDescription" : "",
146
- "shape" : "[1, 1, 2048]",
147
- "name" : "output_hidden_states",
148
- "type" : "MultiArray"
149
- }
150
- ],
151
- "name" : "infer",
152
- "mlProgramOperationTypeHistogram" : {
153
- "Ios18.expandDims" : 96,
154
- "Ios18.mul" : 338,
155
- "Ios18.matmul" : 48,
156
- "Identity" : 1,
157
- "Ios18.exp" : 24,
158
- "Ios18.realDiv" : 24,
159
- "Ios18.greaterEqual" : 2,
160
- "Select" : 2,
161
- "Ios18.readState" : 49,
162
- "Ios16.reduceMax" : 24,
163
- "Ios18.gather" : 2,
164
- "Ios18.add" : 123,
165
- "Ios18.layerNorm" : 49,
166
- "Ios18.sliceUpdate" : 48,
167
- "Ios18.writeState" : 48,
168
- "Ios18.reshape" : 146,
169
- "Ios16.reduceSum" : 24,
170
- "Ios18.constexprLutToDense" : 168,
171
- "Ios18.conv" : 144,
172
- "Ios18.concat" : 193,
173
- "Ios18.transpose" : 96,
174
- "Ios18.sub" : 72,
175
- "Ios18.cast" : 5,
176
- "Ios18.linear" : 24,
177
- "Ios18.silu" : 24,
178
- "Ios18.sliceByIndex" : 195,
179
- "Ios18.squeeze" : 72
180
- }
181
- },
182
- {
183
- "inputSchema" : [
184
- {
185
- "hasShapeFlexibility" : "0",
186
- "isOptional" : "0",
187
- "dataType" : "Float16",
188
- "formattedType" : "MultiArray (Float16 1 × 64 × 2048)",
189
- "shortDescription" : "",
190
- "shape" : "[1, 64, 2048]",
191
- "name" : "hidden_states",
192
- "type" : "MultiArray"
193
- },
194
- {
195
- "hasShapeFlexibility" : "0",
196
- "isOptional" : "0",
197
- "dataType" : "Int32",
198
- "formattedType" : "MultiArray (Int32 64)",
199
- "shortDescription" : "",
200
- "shape" : "[64]",
201
- "name" : "position_ids",
202
- "type" : "MultiArray"
203
- },
204
- {
205
- "hasShapeFlexibility" : "0",
206
- "isOptional" : "0",
207
- "dataType" : "Float16",
208
- "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 2048)",
209
- "shortDescription" : "",
210
- "shape" : "[1, 1, 64, 2048]",
211
- "name" : "causal_mask",
212
- "type" : "MultiArray"
213
- },
214
- {
215
- "hasShapeFlexibility" : "0",
216
- "isOptional" : "0",
217
- "dataType" : "Int32",
218
- "formattedType" : "MultiArray (Int32 1)",
219
- "shortDescription" : "",
220
- "shape" : "[1]",
221
- "name" : "current_pos",
222
- "type" : "MultiArray"
223
- }
224
- ],
225
- "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
226
- "storagePrecision" : "Mixed (Float16, Palettized (14 bits), Palettized (16 bits), UInt6)",
227
- "stateSchema" : [
228
- {
229
- "dataType" : "Float16",
230
- "isOptional" : "0",
231
- "formattedType" : "State (Float16 48 × 16 × 2048 × 128)",
232
- "shortDescription" : "",
233
- "shape" : "[48, 16, 2048, 128]",
234
- "name" : "model_model_kv_cache_0",
235
- "type" : "State"
236
- }
237
- ],
238
- "outputSchema" : [
239
- {
240
- "hasShapeFlexibility" : "0",
241
- "isOptional" : "0",
242
- "dataType" : "Float16",
243
- "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
244
- "shortDescription" : "",
245
- "shape" : "[1, 1, 2048]",
246
- "name" : "output_hidden_states",
247
- "type" : "MultiArray"
248
- }
249
- ],
250
- "name" : "prefill",
251
- "mlProgramOperationTypeHistogram" : {
252
- "Ios18.expandDims" : 95,
253
- "Ios18.mul" : 333,
254
- "Ios18.matmul" : 48,
255
- "Ios18.exp" : 24,
256
- "Ios18.realDiv" : 24,
257
- "Ios18.greaterEqual" : 2,
258
- "Select" : 2,
259
- "Ios18.readState" : 49,
260
- "Ios16.reduceMax" : 24,
261
- "Ios18.gather" : 2,
262
- "Ios18.add" : 122,
263
- "Ios18.layerNorm" : 47,
264
- "Ios18.sliceUpdate" : 48,
265
- "Ios18.writeState" : 48,
266
- "Ios18.reshape" : 194,
267
- "Ios16.reduceSum" : 24,
268
- "Ios18.constexprLutToDense" : 165,
269
- "Ios18.conv" : 141,
270
- "Ios18.concat" : 191,
271
- "Ios18.transpose" : 168,
272
- "Ios18.sub" : 72,
273
- "Ios18.cast" : 5,
274
- "Ios18.linear" : 24,
275
- "Ios18.silu" : 23,
276
- "Ios18.sliceByIndex" : 194,
277
- "Ios18.squeeze" : 71
278
- }
279
- }
280
- ],
281
- "version" : "0.1.1",
282
- "isUpdatable" : "0",
283
- "defaultFunctionName" : "infer",
284
- "specificationVersion" : 9,
285
- "stateSchema" : [
286
- {
287
- "dataType" : "Float16",
288
- "isOptional" : "0",
289
- "formattedType" : "State (Float16 48 × 16 × 2048 × 128)",
290
- "shortDescription" : "",
291
- "shape" : "[48, 16, 2048, 128]",
292
- "name" : "model_model_kv_cache_0",
293
- "type" : "State"
294
- }
295
- ],
296
- "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
297
- "mlProgramOperationTypeHistogram" : {
298
- "Ios18.expandDims" : 96,
299
- "Ios18.mul" : 338,
300
- "Ios18.matmul" : 48,
301
- "Identity" : 1,
302
- "Ios18.exp" : 24,
303
- "Ios18.realDiv" : 24,
304
- "Ios18.greaterEqual" : 2,
305
- "Select" : 2,
306
- "Ios18.readState" : 49,
307
- "Ios16.reduceMax" : 24,
308
- "Ios18.gather" : 2,
309
- "Ios18.add" : 123,
310
- "Ios18.layerNorm" : 49,
311
- "Ios18.sliceUpdate" : 48,
312
- "Ios18.writeState" : 48,
313
- "Ios18.reshape" : 146,
314
- "Ios16.reduceSum" : 24,
315
- "Ios18.constexprLutToDense" : 168,
316
- "Ios18.conv" : 144,
317
- "Ios18.concat" : 193,
318
- "Ios18.transpose" : 96,
319
- "Ios18.sub" : 72,
320
- "Ios18.cast" : 5,
321
- "Ios18.linear" : 24,
322
- "Ios18.silu" : 24,
323
- "Ios18.sliceByIndex" : 195,
324
- "Ios18.squeeze" : 72
325
- },
326
- "shortDescription" : "Anemll Model: Multifunction FFN+Prefill",
327
- "generatedClassName" : "llama_FFN_PF_lut6_chunk_01of01",
328
- "author" : "Converted with Anemll v0.1.1",
329
- "modelType" : {
330
- "name" : "MLModelType_mlProgram"
331
- }
332
- }
333
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/model.mil DELETED
The diff for this file is too large to render. See raw diff
 
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0659bdab44ee574404cb208e209af0f6c56eabd9256114562844c1964c23355
3
- size 921174336
 
 
 
 
llama_FFN_PF_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:abc54c5b07b17c136e4ab1ceea4abd524f7e1fc6e4b705648911a6302b0f32e6
3
- size 1614653
 
 
 
 
llama_FFN_PF_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0659bdab44ee574404cb208e209af0f6c56eabd9256114562844c1964c23355
3
- size 921174336
 
 
 
 
llama_FFN_PF_lut6_chunk_01of01.mlpackage/Manifest.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "fileFormatVersion": "1.0.0",
3
- "itemInfoEntries": {
4
- "364C6207-AD1B-4B33-A195-BF5B841ABF74": {
5
- "author": "com.apple.CoreML",
6
- "description": "CoreML Model Weights",
7
- "name": "weights",
8
- "path": "com.apple.CoreML/weights"
9
- },
10
- "B22A275C-8BF9-4736-B0C1-DA2C81FDEF1D": {
11
- "author": "com.apple.CoreML",
12
- "description": "CoreML Model Specification",
13
- "name": "model.mlmodel",
14
- "path": "com.apple.CoreML/model.mlmodel"
15
- }
16
- },
17
- "rootModelIdentifier": "B22A275C-8BF9-4736-B0C1-DA2C81FDEF1D"
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama_FFN_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:14d4c5586b55eb3202a31ee84dacd16a76a58dbc0a97aee400a75ba19bd9a9c8
3
- size 799512
 
 
 
 
llama_FFN_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0659bdab44ee574404cb208e209af0f6c56eabd9256114562844c1964c23355
3
- size 921174336
 
 
 
 
llama_FFN_lut6_chunk_01of01.mlpackage/Manifest.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "fileFormatVersion": "1.0.0",
3
- "itemInfoEntries": {
4
- "53F9A283-E453-4A93-ABBB-513F65C9575B": {
5
- "author": "com.apple.CoreML",
6
- "description": "CoreML Model Weights",
7
- "name": "weights",
8
- "path": "com.apple.CoreML/weights"
9
- },
10
- "DDDD6E01-E6E6-411A-83B9-93FBB2D79385": {
11
- "author": "com.apple.CoreML",
12
- "description": "CoreML Model Specification",
13
- "name": "model.mlmodel",
14
- "path": "com.apple.CoreML/model.mlmodel"
15
- }
16
- },
17
- "rootModelIdentifier": "DDDD6E01-E6E6-411A-83B9-93FBB2D79385"
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama_embeddings.mlpackage/Data/com.apple.CoreML/model.mlmodel DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9d75ab28c2cde20f1ad2517130b3d465c2bcad497502fd9922ee9f474096ede
3
- size 3061
 
 
 
 
llama_embeddings.mlpackage/Data/com.apple.CoreML/weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:852a384d0c577ec63eb8faf2f7d7afa24aaca03b1d3e373fb5bb46fc07af283b
3
- size 132120704
 
 
 
 
llama_embeddings.mlpackage/Manifest.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "fileFormatVersion": "1.0.0",
3
- "itemInfoEntries": {
4
- "85625E63-BF8B-467B-957B-DF5327D861BA": {
5
- "author": "com.apple.CoreML",
6
- "description": "CoreML Model Weights",
7
- "name": "weights",
8
- "path": "com.apple.CoreML/weights"
9
- },
10
- "ED954E18-4E2C-435F-98E4-D9B190F21DF3": {
11
- "author": "com.apple.CoreML",
12
- "description": "CoreML Model Specification",
13
- "name": "model.mlmodel",
14
- "path": "com.apple.CoreML/model.mlmodel"
15
- }
16
- },
17
- "rootModelIdentifier": "ED954E18-4E2C-435F-98E4-D9B190F21DF3"
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama_lm_head_lut6.mlmodelc/analytics/coremldata.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c882753e4853952ee503752cc3509b7343777c48e1fa6189b67a093ada772c7
3
- size 243
 
 
 
 
llama_lm_head_lut6.mlmodelc/coremldata.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a088381332bc8837634b408eb51f8acc6c63381326aa92211a406ee56c1ba97
3
- size 859
 
 
 
 
llama_lm_head_lut6.mlmodelc/metadata.json DELETED
@@ -1,143 +0,0 @@
1
- [
2
- {
3
- "shortDescription" : "Anemll Model (LM Head) converted to CoreML",
4
- "metadataOutputVersion" : "3.0",
5
- "outputSchema" : [
6
- {
7
- "hasShapeFlexibility" : "0",
8
- "isOptional" : "0",
9
- "dataType" : "Float16",
10
- "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
11
- "shortDescription" : "",
12
- "shape" : "[1, 1, 4032]",
13
- "name" : "logits1",
14
- "type" : "MultiArray"
15
- },
16
- {
17
- "hasShapeFlexibility" : "0",
18
- "isOptional" : "0",
19
- "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
21
- "shortDescription" : "",
22
- "shape" : "[1, 1, 4032]",
23
- "name" : "logits2",
24
- "type" : "MultiArray"
25
- },
26
- {
27
- "hasShapeFlexibility" : "0",
28
- "isOptional" : "0",
29
- "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
31
- "shortDescription" : "",
32
- "shape" : "[1, 1, 4032]",
33
- "name" : "logits3",
34
- "type" : "MultiArray"
35
- },
36
- {
37
- "hasShapeFlexibility" : "0",
38
- "isOptional" : "0",
39
- "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
41
- "shortDescription" : "",
42
- "shape" : "[1, 1, 4032]",
43
- "name" : "logits4",
44
- "type" : "MultiArray"
45
- },
46
- {
47
- "hasShapeFlexibility" : "0",
48
- "isOptional" : "0",
49
- "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
51
- "shortDescription" : "",
52
- "shape" : "[1, 1, 4032]",
53
- "name" : "logits5",
54
- "type" : "MultiArray"
55
- },
56
- {
57
- "hasShapeFlexibility" : "0",
58
- "isOptional" : "0",
59
- "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
61
- "shortDescription" : "",
62
- "shape" : "[1, 1, 4032]",
63
- "name" : "logits6",
64
- "type" : "MultiArray"
65
- },
66
- {
67
- "hasShapeFlexibility" : "0",
68
- "isOptional" : "0",
69
- "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
71
- "shortDescription" : "",
72
- "shape" : "[1, 1, 4032]",
73
- "name" : "logits7",
74
- "type" : "MultiArray"
75
- },
76
- {
77
- "hasShapeFlexibility" : "0",
78
- "isOptional" : "0",
79
- "dataType" : "Float16",
80
- "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
81
- "shortDescription" : "",
82
- "shape" : "[1, 1, 4032]",
83
- "name" : "logits8",
84
- "type" : "MultiArray"
85
- }
86
- ],
87
- "version" : "0.1.1",
88
- "modelParameters" : [
89
-
90
- ],
91
- "author" : "Converted with Anemll v0.1.1",
92
- "specificationVersion" : 9,
93
- "storagePrecision" : "Mixed (Float16, Palettized (15 bits), UInt6)",
94
- "mlProgramOperationTypeHistogram" : {
95
- "Ios18.transpose" : 9,
96
- "Ios18.constexprLutToDense" : 8,
97
- "Ios18.expandDims" : 1,
98
- "Ios18.conv" : 8,
99
- "Ios18.squeeze" : 8
100
- },
101
- "computePrecision" : "Mixed (Float16, Int32)",
102
- "stateSchema" : [
103
-
104
- ],
105
- "isUpdatable" : "0",
106
- "availability" : {
107
- "macOS" : "15.0",
108
- "tvOS" : "18.0",
109
- "visionOS" : "2.0",
110
- "watchOS" : "11.0",
111
- "iOS" : "18.0",
112
- "macCatalyst" : "18.0"
113
- },
114
- "modelType" : {
115
- "name" : "MLModelType_mlProgram"
116
- },
117
- "inputSchema" : [
118
- {
119
- "hasShapeFlexibility" : "0",
120
- "isOptional" : "0",
121
- "dataType" : "Float16",
122
- "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
123
- "shortDescription" : "",
124
- "shape" : "[1, 1, 2048]",
125
- "name" : "hidden_states",
126
- "type" : "MultiArray"
127
- }
128
- ],
129
- "userDefinedMetadata" : {
130
- "com.github.apple.coremltools.version" : "9.0",
131
- "com.github.apple.coremltools.source_dialect" : "TorchScript",
132
- "com.github.apple.coremltools.conversion_date" : "2026-03-17",
133
- "com.anemll.context_length" : "2048",
134
- "com.anemll.lm_head_chunk_sizes" : "4032,4032,4032,4032,4032,4032,4032,4032",
135
- "com.github.apple.coremltools.source" : "torch==2.5.0",
136
- "com.anemll.vocab_size" : "32256",
137
- "com.anemll.info" : "Converted with Anemll v0.1.1",
138
- "com.anemll.lut_bits" : "6"
139
- },
140
- "generatedClassName" : "llama_lm_head_lut6",
141
- "method" : "predict"
142
- }
143
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama_lm_head_lut6.mlmodelc/model.mil DELETED
@@ -1,98 +0,0 @@
1
- program(1.3)
2
- [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})]
3
- {
4
- func main<ios18>(tensor<fp16, [1, 1, 2048]> hidden_states) {
5
- tensor<int32, [3]> var_5 = const()[name = string("op_5"), val = tensor<int32, [3]>([0, 2, 1])];
6
- tensor<int32, [1]> input_axes_0 = const()[name = string("input_axes_0"), val = tensor<int32, [1]>([2])];
7
- tensor<fp16, [1, 2048, 1]> var_6_cast_fp16 = transpose(perm = var_5, x = hidden_states)[name = string("transpose_8")];
8
- tensor<fp16, [1, 2048, 1, 1]> input_cast_fp16 = expand_dims(axes = input_axes_0, x = var_6_cast_fp16)[name = string("input_cast_fp16")];
9
- string var_29_pad_type_0 = const()[name = string("op_29_pad_type_0"), val = string("valid")];
10
- tensor<int32, [2]> var_29_strides_0 = const()[name = string("op_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
11
- tensor<int32, [4]> var_29_pad_0 = const()[name = string("op_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
12
- tensor<int32, [2]> var_29_dilations_0 = const()[name = string("op_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
13
- int32 var_29_groups_0 = const()[name = string("op_29_groups_0"), val = int32(1)];
14
- tensor<fp16, [4032, 2048, 1, 1]> op_9_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6193280))))[name = string("op_9_promoted_to_fp16_palettized")];
15
- tensor<fp16, [1, 4032, 1, 1]> var_29_cast_fp16 = conv(dilations = var_29_dilations_0, groups = var_29_groups_0, pad = var_29_pad_0, pad_type = var_29_pad_type_0, strides = var_29_strides_0, weight = op_9_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_29_cast_fp16")];
16
- tensor<int32, [1]> var_31_axes_0 = const()[name = string("op_31_axes_0"), val = tensor<int32, [1]>([2])];
17
- tensor<fp16, [1, 4032, 1]> var_31_cast_fp16 = squeeze(axes = var_31_axes_0, x = var_29_cast_fp16)[name = string("op_31_cast_fp16")];
18
- tensor<int32, [3]> var_34_perm_0 = const()[name = string("op_34_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
19
- string var_55_pad_type_0 = const()[name = string("op_55_pad_type_0"), val = string("valid")];
20
- tensor<int32, [2]> var_55_strides_0 = const()[name = string("op_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
21
- tensor<int32, [4]> var_55_pad_0 = const()[name = string("op_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
22
- tensor<int32, [2]> var_55_dilations_0 = const()[name = string("op_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
23
- int32 var_55_groups_0 = const()[name = string("op_55_groups_0"), val = int32(1)];
24
- tensor<fp16, [4032, 2048, 1, 1]> op_35_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6257856))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12451072))))[name = string("op_35_promoted_to_fp16_palettized")];
25
- tensor<fp16, [1, 4032, 1, 1]> var_55_cast_fp16 = conv(dilations = var_55_dilations_0, groups = var_55_groups_0, pad = var_55_pad_0, pad_type = var_55_pad_type_0, strides = var_55_strides_0, weight = op_35_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_55_cast_fp16")];
26
- tensor<int32, [1]> var_57_axes_0 = const()[name = string("op_57_axes_0"), val = tensor<int32, [1]>([2])];
27
- tensor<fp16, [1, 4032, 1]> var_57_cast_fp16 = squeeze(axes = var_57_axes_0, x = var_55_cast_fp16)[name = string("op_57_cast_fp16")];
28
- tensor<int32, [3]> var_60_perm_0 = const()[name = string("op_60_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
29
- string var_81_pad_type_0 = const()[name = string("op_81_pad_type_0"), val = string("valid")];
30
- tensor<int32, [2]> var_81_strides_0 = const()[name = string("op_81_strides_0"), val = tensor<int32, [2]>([1, 1])];
31
- tensor<int32, [4]> var_81_pad_0 = const()[name = string("op_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
32
- tensor<int32, [2]> var_81_dilations_0 = const()[name = string("op_81_dilations_0"), val = tensor<int32, [2]>([1, 1])];
33
- int32 var_81_groups_0 = const()[name = string("op_81_groups_0"), val = int32(1)];
34
- tensor<fp16, [4032, 2048, 1, 1]> op_61_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12515648))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18708864))))[name = string("op_61_promoted_to_fp16_palettized")];
35
- tensor<fp16, [1, 4032, 1, 1]> var_81_cast_fp16 = conv(dilations = var_81_dilations_0, groups = var_81_groups_0, pad = var_81_pad_0, pad_type = var_81_pad_type_0, strides = var_81_strides_0, weight = op_61_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_81_cast_fp16")];
36
- tensor<int32, [1]> var_83_axes_0 = const()[name = string("op_83_axes_0"), val = tensor<int32, [1]>([2])];
37
- tensor<fp16, [1, 4032, 1]> var_83_cast_fp16 = squeeze(axes = var_83_axes_0, x = var_81_cast_fp16)[name = string("op_83_cast_fp16")];
38
- tensor<int32, [3]> var_86_perm_0 = const()[name = string("op_86_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
39
- string var_107_pad_type_0 = const()[name = string("op_107_pad_type_0"), val = string("valid")];
40
- tensor<int32, [2]> var_107_strides_0 = const()[name = string("op_107_strides_0"), val = tensor<int32, [2]>([1, 1])];
41
- tensor<int32, [4]> var_107_pad_0 = const()[name = string("op_107_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
42
- tensor<int32, [2]> var_107_dilations_0 = const()[name = string("op_107_dilations_0"), val = tensor<int32, [2]>([1, 1])];
43
- int32 var_107_groups_0 = const()[name = string("op_107_groups_0"), val = int32(1)];
44
- tensor<fp16, [4032, 2048, 1, 1]> op_87_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18773440))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24966656))))[name = string("op_87_promoted_to_fp16_palettized")];
45
- tensor<fp16, [1, 4032, 1, 1]> var_107_cast_fp16 = conv(dilations = var_107_dilations_0, groups = var_107_groups_0, pad = var_107_pad_0, pad_type = var_107_pad_type_0, strides = var_107_strides_0, weight = op_87_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_107_cast_fp16")];
46
- tensor<int32, [1]> var_109_axes_0 = const()[name = string("op_109_axes_0"), val = tensor<int32, [1]>([2])];
47
- tensor<fp16, [1, 4032, 1]> var_109_cast_fp16 = squeeze(axes = var_109_axes_0, x = var_107_cast_fp16)[name = string("op_109_cast_fp16")];
48
- tensor<int32, [3]> var_112_perm_0 = const()[name = string("op_112_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
49
- string var_133_pad_type_0 = const()[name = string("op_133_pad_type_0"), val = string("valid")];
50
- tensor<int32, [2]> var_133_strides_0 = const()[name = string("op_133_strides_0"), val = tensor<int32, [2]>([1, 1])];
51
- tensor<int32, [4]> var_133_pad_0 = const()[name = string("op_133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
52
- tensor<int32, [2]> var_133_dilations_0 = const()[name = string("op_133_dilations_0"), val = tensor<int32, [2]>([1, 1])];
53
- int32 var_133_groups_0 = const()[name = string("op_133_groups_0"), val = int32(1)];
54
- tensor<fp16, [4032, 2048, 1, 1]> op_113_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25031232))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31224448))))[name = string("op_113_promoted_to_fp16_palettized")];
55
- tensor<fp16, [1, 4032, 1, 1]> var_133_cast_fp16 = conv(dilations = var_133_dilations_0, groups = var_133_groups_0, pad = var_133_pad_0, pad_type = var_133_pad_type_0, strides = var_133_strides_0, weight = op_113_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_133_cast_fp16")];
56
- tensor<int32, [1]> var_135_axes_0 = const()[name = string("op_135_axes_0"), val = tensor<int32, [1]>([2])];
57
- tensor<fp16, [1, 4032, 1]> var_135_cast_fp16 = squeeze(axes = var_135_axes_0, x = var_133_cast_fp16)[name = string("op_135_cast_fp16")];
58
- tensor<int32, [3]> var_138_perm_0 = const()[name = string("op_138_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
59
- string var_159_pad_type_0 = const()[name = string("op_159_pad_type_0"), val = string("valid")];
60
- tensor<int32, [2]> var_159_strides_0 = const()[name = string("op_159_strides_0"), val = tensor<int32, [2]>([1, 1])];
61
- tensor<int32, [4]> var_159_pad_0 = const()[name = string("op_159_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
62
- tensor<int32, [2]> var_159_dilations_0 = const()[name = string("op_159_dilations_0"), val = tensor<int32, [2]>([1, 1])];
63
- int32 var_159_groups_0 = const()[name = string("op_159_groups_0"), val = int32(1)];
64
- tensor<fp16, [4032, 2048, 1, 1]> op_139_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31289024))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37482240))))[name = string("op_139_promoted_to_fp16_palettized")];
65
- tensor<fp16, [1, 4032, 1, 1]> var_159_cast_fp16 = conv(dilations = var_159_dilations_0, groups = var_159_groups_0, pad = var_159_pad_0, pad_type = var_159_pad_type_0, strides = var_159_strides_0, weight = op_139_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_159_cast_fp16")];
66
- tensor<int32, [1]> var_161_axes_0 = const()[name = string("op_161_axes_0"), val = tensor<int32, [1]>([2])];
67
- tensor<fp16, [1, 4032, 1]> var_161_cast_fp16 = squeeze(axes = var_161_axes_0, x = var_159_cast_fp16)[name = string("op_161_cast_fp16")];
68
- tensor<int32, [3]> var_164_perm_0 = const()[name = string("op_164_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
69
- string var_185_pad_type_0 = const()[name = string("op_185_pad_type_0"), val = string("valid")];
70
- tensor<int32, [2]> var_185_strides_0 = const()[name = string("op_185_strides_0"), val = tensor<int32, [2]>([1, 1])];
71
- tensor<int32, [4]> var_185_pad_0 = const()[name = string("op_185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
72
- tensor<int32, [2]> var_185_dilations_0 = const()[name = string("op_185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
73
- int32 var_185_groups_0 = const()[name = string("op_185_groups_0"), val = int32(1)];
74
- tensor<fp16, [4032, 2048, 1, 1]> op_165_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37546816))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43740032))))[name = string("op_165_promoted_to_fp16_palettized")];
75
- tensor<fp16, [1, 4032, 1, 1]> var_185_cast_fp16 = conv(dilations = var_185_dilations_0, groups = var_185_groups_0, pad = var_185_pad_0, pad_type = var_185_pad_type_0, strides = var_185_strides_0, weight = op_165_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_185_cast_fp16")];
76
- tensor<int32, [1]> var_187_axes_0 = const()[name = string("op_187_axes_0"), val = tensor<int32, [1]>([2])];
77
- tensor<fp16, [1, 4032, 1]> var_187_cast_fp16 = squeeze(axes = var_187_axes_0, x = var_185_cast_fp16)[name = string("op_187_cast_fp16")];
78
- tensor<int32, [3]> var_190_perm_0 = const()[name = string("op_190_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
79
- string var_211_pad_type_0 = const()[name = string("op_211_pad_type_0"), val = string("valid")];
80
- tensor<int32, [2]> var_211_strides_0 = const()[name = string("op_211_strides_0"), val = tensor<int32, [2]>([1, 1])];
81
- tensor<int32, [4]> var_211_pad_0 = const()[name = string("op_211_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
82
- tensor<int32, [2]> var_211_dilations_0 = const()[name = string("op_211_dilations_0"), val = tensor<int32, [2]>([1, 1])];
83
- int32 var_211_groups_0 = const()[name = string("op_211_groups_0"), val = int32(1)];
84
- tensor<fp16, [4032, 2048, 1, 1]> op_191_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43804608))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49997824))))[name = string("op_191_promoted_to_fp16_palettized")];
85
- tensor<fp16, [1, 4032, 1, 1]> var_211_cast_fp16 = conv(dilations = var_211_dilations_0, groups = var_211_groups_0, pad = var_211_pad_0, pad_type = var_211_pad_type_0, strides = var_211_strides_0, weight = op_191_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_211_cast_fp16")];
86
- tensor<int32, [1]> var_213_axes_0 = const()[name = string("op_213_axes_0"), val = tensor<int32, [1]>([2])];
87
- tensor<fp16, [1, 4032, 1]> var_213_cast_fp16 = squeeze(axes = var_213_axes_0, x = var_211_cast_fp16)[name = string("op_213_cast_fp16")];
88
- tensor<int32, [3]> var_216_perm_0 = const()[name = string("op_216_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
89
- tensor<fp16, [1, 1, 4032]> logits1 = transpose(perm = var_34_perm_0, x = var_31_cast_fp16)[name = string("transpose_0")];
90
- tensor<fp16, [1, 1, 4032]> logits2 = transpose(perm = var_60_perm_0, x = var_57_cast_fp16)[name = string("transpose_1")];
91
- tensor<fp16, [1, 1, 4032]> logits3 = transpose(perm = var_86_perm_0, x = var_83_cast_fp16)[name = string("transpose_2")];
92
- tensor<fp16, [1, 1, 4032]> logits4 = transpose(perm = var_112_perm_0, x = var_109_cast_fp16)[name = string("transpose_3")];
93
- tensor<fp16, [1, 1, 4032]> logits5 = transpose(perm = var_138_perm_0, x = var_135_cast_fp16)[name = string("transpose_4")];
94
- tensor<fp16, [1, 1, 4032]> logits6 = transpose(perm = var_164_perm_0, x = var_161_cast_fp16)[name = string("transpose_5")];
95
- tensor<fp16, [1, 1, 4032]> logits7 = transpose(perm = var_190_perm_0, x = var_187_cast_fp16)[name = string("transpose_6")];
96
- tensor<fp16, [1, 1, 4032]> logits8 = transpose(perm = var_216_perm_0, x = var_213_cast_fp16)[name = string("transpose_7")];
97
- } -> (logits1, logits2, logits3, logits4, logits5, logits6, logits7, logits8);
98
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama_lm_head_lut6.mlmodelc/weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8194981a11603b43da42a4d396dc17c54f1c5c637b990c78b38b9bf542b9acf
3
- size 50062400
 
 
 
 
llama_lm_head_lut6.mlpackage/Data/com.apple.CoreML/model.mlmodel DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b666cbadf27c962fab9d7e421586a134120b334107382eac2f20a88cb474813b
3
- size 15426
 
 
 
 
llama_lm_head_lut6.mlpackage/Data/com.apple.CoreML/weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8194981a11603b43da42a4d396dc17c54f1c5c637b990c78b38b9bf542b9acf
3
- size 50062400
 
 
 
 
llama_lm_head_lut6.mlpackage/Manifest.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "fileFormatVersion": "1.0.0",
3
- "itemInfoEntries": {
4
- "14014917-4E03-4FF7-A25C-7D37CE6A58AA": {
5
- "author": "com.apple.CoreML",
6
- "description": "CoreML Model Weights",
7
- "name": "weights",
8
- "path": "com.apple.CoreML/weights"
9
- },
10
- "9357E2E6-C079-4015-B45E-2FD97B2DAE46": {
11
- "author": "com.apple.CoreML",
12
- "description": "CoreML Model Specification",
13
- "name": "model.mlmodel",
14
- "path": "com.apple.CoreML/model.mlmodel"
15
- }
16
- },
17
- "rootModelIdentifier": "9357E2E6-C079-4015-B45E-2FD97B2DAE46"
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama_prefill_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:923df0f8f9f36dd15f611332ef4d5ac432f47e801ca5db1ccee3e7a3301ad4e9
3
- size 815469
 
 
 
 
llama_prefill_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfff805ee70f63a0bc87249cfc794d7b652460c385a05877276bed63d5e21f75
3
- size 895594304
 
 
 
 
llama_prefill_lut6_chunk_01of01.mlpackage/Manifest.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "fileFormatVersion": "1.0.0",
3
- "itemInfoEntries": {
4
- "46B7EB73-4FFD-4A6A-85A6-70EE30E0419E": {
5
- "author": "com.apple.CoreML",
6
- "description": "CoreML Model Weights",
7
- "name": "weights",
8
- "path": "com.apple.CoreML/weights"
9
- },
10
- "8BA31F55-D521-48BF-B65B-543B036F89D5": {
11
- "author": "com.apple.CoreML",
12
- "description": "CoreML Model Specification",
13
- "name": "model.mlmodel",
14
- "path": "com.apple.CoreML/model.mlmodel"
15
- }
16
- },
17
- "rootModelIdentifier": "8BA31F55-D521-48BF-B65B-543B036F89D5"
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
meta_progress.yaml DELETED
@@ -1,51 +0,0 @@
1
- # Conversion in progress - this file is for monitoring only
2
- # Final meta.yaml will be created at step 7
3
- conversion:
4
- status: in_progress
5
- start_time: 2026-03-17T16:30:31Z
6
- model_path: /tmp/ios_models/downloads/Prem-1B-SQL
7
- output_dir: /tmp/ios_models/Prem-1B-SQL-ctx2048
8
- context_length: 2048
9
- batch_size: 64
10
- num_chunks: 1
11
- prefix: llama
12
- architecture: llama
13
- lut_part1: none
14
- lut_part2: 6
15
- lut_part3: 6
16
- fp16_scale: none
17
- argmax: false
18
- split_rotate: false
19
- steps:
20
- - name: embeddings
21
- part: 1
22
- status: pending
23
- - name: lm_head
24
- part: 3
25
- status: pending
26
- - name: ffn
27
- part: 2
28
- status: pending
29
- - name: prefill
30
- part: 2_prefill
31
- status: pending
32
- - name: ffn_rotate
33
- part: 2_rotate
34
- status: pending
35
- gemma3_only: true
36
- - name: prefill_rotate
37
- part: 2_prefill_rotate
38
- status: pending
39
- gemma3_only: true
40
- - name: combine
41
- part: 5
42
- status: pending
43
- - name: compile
44
- part: 6
45
- status: pending
46
- - name: tokenizer
47
- part: 7
48
- status: pending
49
- - name: test
50
- part: 8
51
- status: pending