sp00ktober commited on Mar 16

Commit

6e953d1

verified ·

1 Parent(s): f08c7b2

Upload folder using huggingface_hub

Browse files

Files changed (37) hide show

config.json +4 -0
generation_config.json +6 -0
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/analytics/coremldata.bin +3 -0
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/coremldata.bin +3 -0
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/metadata.json +333 -0
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/model.mil +0 -0
llama_FFN_PF_lut6_chunk_01of01.mlmodelc/weights/weight.bin +3 -0
llama_FFN_PF_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
llama_FFN_PF_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
llama_FFN_PF_lut6_chunk_01of01.mlpackage/Manifest.json +18 -0
llama_FFN_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
llama_FFN_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
llama_FFN_lut6_chunk_01of01.mlpackage/Manifest.json +18 -0
llama_embeddings.mlmodelc/analytics/coremldata.bin +3 -0
llama_embeddings.mlmodelc/coremldata.bin +3 -0
llama_embeddings.mlmodelc/metadata.json +72 -0
llama_embeddings.mlmodelc/model.mil +22 -0
llama_embeddings.mlmodelc/weights/weight.bin +3 -0
llama_embeddings.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
llama_embeddings.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
llama_embeddings.mlpackage/Manifest.json +18 -0
llama_lm_head_lut6.mlmodelc/analytics/coremldata.bin +3 -0
llama_lm_head_lut6.mlmodelc/coremldata.bin +3 -0
llama_lm_head_lut6.mlmodelc/metadata.json +143 -0
llama_lm_head_lut6.mlmodelc/model.mil +98 -0
llama_lm_head_lut6.mlmodelc/weights/weight.bin +3 -0
llama_lm_head_lut6.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
llama_lm_head_lut6.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
llama_lm_head_lut6.mlpackage/Manifest.json +18 -0
llama_prefill_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
llama_prefill_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
llama_prefill_lut6_chunk_01of01.mlpackage/Manifest.json +18 -0
meta.yaml +54 -0
meta_progress.yaml +51 -0
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer_config.json +199 -0

config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "tokenizer_class": "LlamaTokenizer",
+  "model_type": "llama"
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 32013,
+  "eos_token_id": 32021,
+  "transformers_version": "4.44.0"
+}

llama_FFN_PF_lut6_chunk_01of01.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17938c53011ac313c0025a2c5a965fb4410522ea19bf92333b055f5b502ec584
+size 243

llama_FFN_PF_lut6_chunk_01of01.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8225ce129cbaa2bce30c233864a2ece535c6ee4b5a43b806836ca37026f7939
+size 981

llama_FFN_PF_lut6_chunk_01of01.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,333 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source" : "torch==2.5.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.anemll.chunk_no" : "1",
+      "com.anemll.context_length" : "1024",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.anemll.num_chunks" : "1",
+      "com.anemll.batch_size" : "64",
+      "com.anemll.info" : "Converted with Anemll v0.1.1",
+      "com.anemll.lut_bits" : "6"
+    },
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2048]",
+        "name" : "hidden_states",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "position_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 1024]",
+        "name" : "causal_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "current_pos",
+        "type" : "MultiArray"
+      }
+    ],
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2048]",
+        "name" : "output_hidden_states",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "storagePrecision" : "Mixed (Float16, Palettized (14 bits), Palettized (16 bits), UInt6)",
+    "method" : "predict",
+    "functions" : [
+      {
+        "inputSchema" : [
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Float16",
+            "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
+            "shortDescription" : "",
+            "shape" : "[1, 1, 2048]",
+            "name" : "hidden_states",
+            "type" : "MultiArray"
+          },
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Int32",
+            "formattedType" : "MultiArray (Int32 1)",
+            "shortDescription" : "",
+            "shape" : "[1]",
+            "name" : "position_ids",
+            "type" : "MultiArray"
+          },
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Float16",
+            "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 1024)",
+            "shortDescription" : "",
+            "shape" : "[1, 1, 1, 1024]",
+            "name" : "causal_mask",
+            "type" : "MultiArray"
+          },
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Int32",
+            "formattedType" : "MultiArray (Int32 1)",
+            "shortDescription" : "",
+            "shape" : "[1]",
+            "name" : "current_pos",
+            "type" : "MultiArray"
+          }
+        ],
+        "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+        "storagePrecision" : "Mixed (Float16, Palettized (14 bits), Palettized (16 bits), UInt6)",
+        "stateSchema" : [
+          {
+            "dataType" : "Float16",
+            "isOptional" : "0",
+            "formattedType" : "State (Float16 48 × 16 × 1024 × 128)",
+            "shortDescription" : "",
+            "shape" : "[48, 16, 1024, 128]",
+            "name" : "model_model_kv_cache_0",
+            "type" : "State"
+          }
+        ],
+        "outputSchema" : [
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Float16",
+            "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
+            "shortDescription" : "",
+            "shape" : "[1, 1, 2048]",
+            "name" : "output_hidden_states",
+            "type" : "MultiArray"
+          }
+        ],
+        "name" : "infer",
+        "mlProgramOperationTypeHistogram" : {
+          "Ios18.expandDims" : 96,
+          "Ios18.mul" : 338,
+          "Ios18.matmul" : 48,
+          "Identity" : 1,
+          "Ios18.exp" : 24,
+          "Ios18.realDiv" : 24,
+          "Ios18.greaterEqual" : 2,
+          "Select" : 2,
+          "Ios18.readState" : 49,
+          "Ios16.reduceMax" : 24,
+          "Ios18.gather" : 2,
+          "Ios18.add" : 123,
+          "Ios18.layerNorm" : 49,
+          "Ios18.sliceUpdate" : 48,
+          "Ios18.writeState" : 48,
+          "Ios18.reshape" : 146,
+          "Ios16.reduceSum" : 24,
+          "Ios18.constexprLutToDense" : 168,
+          "Ios18.conv" : 144,
+          "Ios18.concat" : 193,
+          "Ios18.transpose" : 96,
+          "Ios18.sub" : 72,
+          "Ios18.cast" : 5,
+          "Ios18.linear" : 24,
+          "Ios18.silu" : 24,
+          "Ios18.sliceByIndex" : 195,
+          "Ios18.squeeze" : 72
+        }
+      },
+      {
+        "inputSchema" : [
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Float16",
+            "formattedType" : "MultiArray (Float16 1 × 64 × 2048)",
+            "shortDescription" : "",
+            "shape" : "[1, 64, 2048]",
+            "name" : "hidden_states",
+            "type" : "MultiArray"
+          },
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Int32",
+            "formattedType" : "MultiArray (Int32 64)",
+            "shortDescription" : "",
+            "shape" : "[64]",
+            "name" : "position_ids",
+            "type" : "MultiArray"
+          },
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Float16",
+            "formattedType" : "MultiArray (Float16 1 × 1 × 64 × 1024)",
+            "shortDescription" : "",
+            "shape" : "[1, 1, 64, 1024]",
+            "name" : "causal_mask",
+            "type" : "MultiArray"
+          },
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Int32",
+            "formattedType" : "MultiArray (Int32 1)",
+            "shortDescription" : "",
+            "shape" : "[1]",
+            "name" : "current_pos",
+            "type" : "MultiArray"
+          }
+        ],
+        "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+        "storagePrecision" : "Mixed (Float16, Palettized (14 bits), Palettized (16 bits), UInt6)",
+        "stateSchema" : [
+          {
+            "dataType" : "Float16",
+            "isOptional" : "0",
+            "formattedType" : "State (Float16 48 × 16 × 1024 × 128)",
+            "shortDescription" : "",
+            "shape" : "[48, 16, 1024, 128]",
+            "name" : "model_model_kv_cache_0",
+            "type" : "State"
+          }
+        ],
+        "outputSchema" : [
+          {
+            "hasShapeFlexibility" : "0",
+            "isOptional" : "0",
+            "dataType" : "Float16",
+            "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
+            "shortDescription" : "",
+            "shape" : "[1, 1, 2048]",
+            "name" : "output_hidden_states",
+            "type" : "MultiArray"
+          }
+        ],
+        "name" : "prefill",
+        "mlProgramOperationTypeHistogram" : {
+          "Ios18.expandDims" : 95,
+          "Ios18.mul" : 333,
+          "Ios18.matmul" : 48,
+          "Ios18.exp" : 24,
+          "Ios18.realDiv" : 24,
+          "Ios18.greaterEqual" : 2,
+          "Select" : 2,
+          "Ios18.readState" : 49,
+          "Ios16.reduceMax" : 24,
+          "Ios18.gather" : 2,
+          "Ios18.add" : 122,
+          "Ios18.layerNorm" : 47,
+          "Ios18.sliceUpdate" : 48,
+          "Ios18.writeState" : 48,
+          "Ios18.reshape" : 194,
+          "Ios16.reduceSum" : 24,
+          "Ios18.constexprLutToDense" : 165,
+          "Ios18.conv" : 141,
+          "Ios18.concat" : 191,
+          "Ios18.transpose" : 168,
+          "Ios18.sub" : 72,
+          "Ios18.cast" : 5,
+          "Ios18.linear" : 24,
+          "Ios18.silu" : 23,
+          "Ios18.sliceByIndex" : 194,
+          "Ios18.squeeze" : 71
+        }
+      }
+    ],
+    "version" : "0.1.1",
+    "isUpdatable" : "0",
+    "defaultFunctionName" : "infer",
+    "specificationVersion" : 9,
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 48 × 16 × 1024 × 128)",
+        "shortDescription" : "",
+        "shape" : "[48, 16, 1024, 128]",
+        "name" : "model_model_kv_cache_0",
+        "type" : "State"
+      }
+    ],
+    "computePrecision" : "Mixed (Float16, Int16, Int32, UInt16)",
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 96,
+      "Ios18.mul" : 338,
+      "Ios18.matmul" : 48,
+      "Identity" : 1,
+      "Ios18.exp" : 24,
+      "Ios18.realDiv" : 24,
+      "Ios18.greaterEqual" : 2,
+      "Select" : 2,
+      "Ios18.readState" : 49,
+      "Ios16.reduceMax" : 24,
+      "Ios18.gather" : 2,
+      "Ios18.add" : 123,
+      "Ios18.layerNorm" : 49,
+      "Ios18.sliceUpdate" : 48,
+      "Ios18.writeState" : 48,
+      "Ios18.reshape" : 146,
+      "Ios16.reduceSum" : 24,
+      "Ios18.constexprLutToDense" : 168,
+      "Ios18.conv" : 144,
+      "Ios18.concat" : 193,
+      "Ios18.transpose" : 96,
+      "Ios18.sub" : 72,
+      "Ios18.cast" : 5,
+      "Ios18.linear" : 24,
+      "Ios18.silu" : 24,
+      "Ios18.sliceByIndex" : 195,
+      "Ios18.squeeze" : 72
+    },
+    "shortDescription" : "Anemll Model: Multifunction FFN+Prefill",
+    "generatedClassName" : "llama_FFN_PF_lut6_chunk_01of01",
+    "author" : "Converted with Anemll v0.1.1",
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    }
+  }
+]

llama_FFN_PF_lut6_chunk_01of01.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

llama_FFN_PF_lut6_chunk_01of01.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ab7d9694da65b9cd8a5d99c562a28b0498188ae7e538920a3f406985fcece2c
+size 920125760

llama_FFN_PF_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52c8672f083aacdae7f87be7e6c5bc73ee01f8a03f5e864fbfc03824cea0d1e3
+size 1614653

llama_FFN_PF_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ab7d9694da65b9cd8a5d99c562a28b0498188ae7e538920a3f406985fcece2c
+size 920125760

llama_FFN_PF_lut6_chunk_01of01.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "5E6563AB-A377-430B-89CB-BE475D879D8A": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "AC66F985-E4E6-412A-B887-C17CCC4A5C83": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "AC66F985-E4E6-412A-B887-C17CCC4A5C83"
+}

llama_FFN_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f28bf2f88c1f443421055e42a1611da8386c8a6e5d900dc9faccf65bd81b260
+size 799512

llama_FFN_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ab7d9694da65b9cd8a5d99c562a28b0498188ae7e538920a3f406985fcece2c
+size 920125760

llama_FFN_lut6_chunk_01of01.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "17709A97-98C6-4B44-A8F2-956BB64BDDC2": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "CC80579D-0AFC-4EF2-A2F5-0DB15E3A99C6": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "17709A97-98C6-4B44-A8F2-956BB64BDDC2"
+}

llama_embeddings.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac37b9d556cd88e169770a2f1c53b97f74d632bdaa80bdb4e0ab0a0ca2212e8c
+size 243

llama_embeddings.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26456d242e64e2bc27cf5ba609e9b49f77b8a5d174068db67b74d1187b469d24
+size 560

llama_embeddings.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,72 @@

+[
+  {
+    "shortDescription" : "Anemll Model (Embeddings) converted to CoreML",
+    "metadataOutputVersion" : "3.0",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "hidden_states",
+        "type" : "MultiArray"
+      }
+    ],
+    "version" : "0.1.1",
+    "modelParameters" : [
+    ],
+    "author" : "Converted with Anemll v0.1.1",
+    "specificationVersion" : 9,
+    "storagePrecision" : "Float16",
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.greaterEqual" : 1,
+      "Ios18.add" : 1,
+      "Ios18.cast" : 3,
+      "Select" : 1,
+      "Ios18.gather" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32)",
+    "stateSchema" : [
+    ],
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "inputSchema" : [
+      {
+        "shortDescription" : "",
+        "dataType" : "Int32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1 | 1 × 64",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1]",
+        "name" : "input_ids",
+        "enumeratedShapes" : "[[1, 1], [1, 64]]"
+      }
+    ],
+    "userDefinedMetadata" : {
+      "com.anemll.info" : "Converted with Anemll v0.1.1",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.conversion_date" : "2026-03-16",
+      "com.github.apple.coremltools.source" : "torch==2.5.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.anemll.context_length" : "1024"
+    },
+    "generatedClassName" : "llama_embeddings",
+    "method" : "predict"
+  }
+]

llama_embeddings.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,22 @@

+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
+{
+    func main<ios18>(tensor<int32, [1, ?]> input_ids) [FlexibleShapeInformation = tuple<tuple<string, dict<string, tensor<int32, [?]>>>, tuple<string, dict<string, dict<string, tensor<int32, [?]>>>>>((("DefaultShapes", {{"input_ids", [1, 1]}}), ("EnumeratedShapes", {{"79ae981e", {{"input_ids", [1, 1]}}}, {"ed9b58c8", {{"input_ids", [1, 64]}}}})))] {
+            int32 hidden_states_batch_dims_0 = const()[name = string("hidden_states_batch_dims_0"), val = int32(0)];
+            bool hidden_states_validate_indices_0 = const()[name = string("hidden_states_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [32256, 2048]> embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor<fp16, [32256, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            string input_ids_to_int16_dtype_0 = const()[name = string("input_ids_to_int16_dtype_0"), val = string("int16")];
+            string cast_1_dtype_0 = const()[name = string("cast_1_dtype_0"), val = string("int32")];
+            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
+            tensor<int16, [1, ?]> input_ids_to_int16 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = string("cast_4")];
+            tensor<int32, [1, ?]> cast_1 = cast(dtype = cast_1_dtype_0, x = input_ids_to_int16)[name = string("cast_3")];
+            tensor<bool, [1, ?]> greater_equal_0 = greater_equal(x = cast_1, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
+            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(32256)];
+            tensor<int32, [1, ?]> add_0 = add(x = cast_1, y = slice_by_index_0)[name = string("add_0")];
+            tensor<int32, [1, ?]> select_0 = select(a = cast_1, b = add_0, cond = greater_equal_0)[name = string("select_0")];
+            int32 hidden_states_cast_fp16_cast_uint16_axis_0 = const()[name = string("hidden_states_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
+            string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
+            tensor<int16, [1, ?]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_2")];
+            tensor<fp16, [1, ?, 2048]> hidden_states = gather(axis = hidden_states_cast_fp16_cast_uint16_axis_0, batch_dims = hidden_states_batch_dims_0, indices = select_0_to_int16, validate_indices = hidden_states_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("hidden_states_cast_fp16_cast_uint16_cast_uint16")];
+        } -> (hidden_states);
+}

llama_embeddings.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:852a384d0c577ec63eb8faf2f7d7afa24aaca03b1d3e373fb5bb46fc07af283b
+size 132120704

llama_embeddings.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:add2c2ca8fc5c414a622ea3c4edcf95c1b76826cdf3967b181a52522a979f1f8
+size 3061

llama_embeddings.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:852a384d0c577ec63eb8faf2f7d7afa24aaca03b1d3e373fb5bb46fc07af283b
+size 132120704

llama_embeddings.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "268450C4-E730-4EC7-A687-801422F8A707": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "A9E16A8E-F35B-4363-A969-610989B4D6BD": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "A9E16A8E-F35B-4363-A969-610989B4D6BD"
+}

llama_lm_head_lut6.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:623184d69c0a04ba476276153492df1b10e3462375b86ced39ad1136ae5aace0
+size 243

llama_lm_head_lut6.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:339f31ecd0d721647b8812000e0fd986fa01cd24f95b3db3ff45d2b0a90f240e
+size 859

llama_lm_head_lut6.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,143 @@

+[
+  {
+    "shortDescription" : "Anemll Model (LM Head) converted to CoreML",
+    "metadataOutputVersion" : "3.0",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 4032]",
+        "name" : "logits1",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 4032]",
+        "name" : "logits2",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 4032]",
+        "name" : "logits3",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 4032]",
+        "name" : "logits4",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 4032]",
+        "name" : "logits5",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 4032]",
+        "name" : "logits6",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 4032]",
+        "name" : "logits7",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 4032)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 4032]",
+        "name" : "logits8",
+        "type" : "MultiArray"
+      }
+    ],
+    "version" : "0.1.1",
+    "modelParameters" : [
+    ],
+    "author" : "Converted with Anemll v0.1.1",
+    "specificationVersion" : 9,
+    "storagePrecision" : "Mixed (Float16, Palettized (15 bits), UInt6)",
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.transpose" : 9,
+      "Ios18.constexprLutToDense" : 8,
+      "Ios18.expandDims" : 1,
+      "Ios18.conv" : 8,
+      "Ios18.squeeze" : 8
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "stateSchema" : [
+    ],
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2048)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2048]",
+        "name" : "hidden_states",
+        "type" : "MultiArray"
+      }
+    ],
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source" : "torch==2.5.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.anemll.context_length" : "1024",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.anemll.lm_head_chunk_sizes" : "4032,4032,4032,4032,4032,4032,4032,4032",
+      "com.github.apple.coremltools.conversion_date" : "2026-03-16",
+      "com.anemll.vocab_size" : "32256",
+      "com.anemll.info" : "Converted with Anemll v0.1.1",
+      "com.anemll.lut_bits" : "6"
+    },
+    "generatedClassName" : "llama_lm_head_lut6",
+    "method" : "predict"
+  }
+]

llama_lm_head_lut6.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,98 @@

+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 1, 2048]> hidden_states) {
+            tensor<int32, [3]> var_5 = const()[name = string("op_5"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<int32, [1]> input_axes_0 = const()[name = string("input_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 2048, 1]> var_6_cast_fp16 = transpose(perm = var_5, x = hidden_states)[name = string("transpose_8")];
+            tensor<fp16, [1, 2048, 1, 1]> input_cast_fp16 = expand_dims(axes = input_axes_0, x = var_6_cast_fp16)[name = string("input_cast_fp16")];
+            string var_29_pad_type_0 = const()[name = string("op_29_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_29_strides_0 = const()[name = string("op_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_29_pad_0 = const()[name = string("op_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_29_dilations_0 = const()[name = string("op_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_29_groups_0 = const()[name = string("op_29_groups_0"), val = int32(1)];
+            tensor<fp16, [4032, 2048, 1, 1]> op_9_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6193280))))[name = string("op_9_promoted_to_fp16_palettized")];
+            tensor<fp16, [1, 4032, 1, 1]> var_29_cast_fp16 = conv(dilations = var_29_dilations_0, groups = var_29_groups_0, pad = var_29_pad_0, pad_type = var_29_pad_type_0, strides = var_29_strides_0, weight = op_9_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_29_cast_fp16")];
+            tensor<int32, [1]> var_31_axes_0 = const()[name = string("op_31_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 4032, 1]> var_31_cast_fp16 = squeeze(axes = var_31_axes_0, x = var_29_cast_fp16)[name = string("op_31_cast_fp16")];
+            tensor<int32, [3]> var_34_perm_0 = const()[name = string("op_34_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            string var_55_pad_type_0 = const()[name = string("op_55_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_55_strides_0 = const()[name = string("op_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_55_pad_0 = const()[name = string("op_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_55_dilations_0 = const()[name = string("op_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_55_groups_0 = const()[name = string("op_55_groups_0"), val = int32(1)];
+            tensor<fp16, [4032, 2048, 1, 1]> op_35_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6257856))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12451072))))[name = string("op_35_promoted_to_fp16_palettized")];
+            tensor<fp16, [1, 4032, 1, 1]> var_55_cast_fp16 = conv(dilations = var_55_dilations_0, groups = var_55_groups_0, pad = var_55_pad_0, pad_type = var_55_pad_type_0, strides = var_55_strides_0, weight = op_35_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_55_cast_fp16")];
+            tensor<int32, [1]> var_57_axes_0 = const()[name = string("op_57_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 4032, 1]> var_57_cast_fp16 = squeeze(axes = var_57_axes_0, x = var_55_cast_fp16)[name = string("op_57_cast_fp16")];
+            tensor<int32, [3]> var_60_perm_0 = const()[name = string("op_60_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            string var_81_pad_type_0 = const()[name = string("op_81_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_81_strides_0 = const()[name = string("op_81_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_81_pad_0 = const()[name = string("op_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_81_dilations_0 = const()[name = string("op_81_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_81_groups_0 = const()[name = string("op_81_groups_0"), val = int32(1)];
+            tensor<fp16, [4032, 2048, 1, 1]> op_61_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12515648))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18708864))))[name = string("op_61_promoted_to_fp16_palettized")];
+            tensor<fp16, [1, 4032, 1, 1]> var_81_cast_fp16 = conv(dilations = var_81_dilations_0, groups = var_81_groups_0, pad = var_81_pad_0, pad_type = var_81_pad_type_0, strides = var_81_strides_0, weight = op_61_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_81_cast_fp16")];
+            tensor<int32, [1]> var_83_axes_0 = const()[name = string("op_83_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 4032, 1]> var_83_cast_fp16 = squeeze(axes = var_83_axes_0, x = var_81_cast_fp16)[name = string("op_83_cast_fp16")];
+            tensor<int32, [3]> var_86_perm_0 = const()[name = string("op_86_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            string var_107_pad_type_0 = const()[name = string("op_107_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_107_strides_0 = const()[name = string("op_107_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_107_pad_0 = const()[name = string("op_107_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_107_dilations_0 = const()[name = string("op_107_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_107_groups_0 = const()[name = string("op_107_groups_0"), val = int32(1)];
+            tensor<fp16, [4032, 2048, 1, 1]> op_87_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18773440))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24966656))))[name = string("op_87_promoted_to_fp16_palettized")];
+            tensor<fp16, [1, 4032, 1, 1]> var_107_cast_fp16 = conv(dilations = var_107_dilations_0, groups = var_107_groups_0, pad = var_107_pad_0, pad_type = var_107_pad_type_0, strides = var_107_strides_0, weight = op_87_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_107_cast_fp16")];
+            tensor<int32, [1]> var_109_axes_0 = const()[name = string("op_109_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 4032, 1]> var_109_cast_fp16 = squeeze(axes = var_109_axes_0, x = var_107_cast_fp16)[name = string("op_109_cast_fp16")];
+            tensor<int32, [3]> var_112_perm_0 = const()[name = string("op_112_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            string var_133_pad_type_0 = const()[name = string("op_133_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_133_strides_0 = const()[name = string("op_133_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_133_pad_0 = const()[name = string("op_133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_133_dilations_0 = const()[name = string("op_133_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_133_groups_0 = const()[name = string("op_133_groups_0"), val = int32(1)];
+            tensor<fp16, [4032, 2048, 1, 1]> op_113_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25031232))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31224448))))[name = string("op_113_promoted_to_fp16_palettized")];
+            tensor<fp16, [1, 4032, 1, 1]> var_133_cast_fp16 = conv(dilations = var_133_dilations_0, groups = var_133_groups_0, pad = var_133_pad_0, pad_type = var_133_pad_type_0, strides = var_133_strides_0, weight = op_113_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_133_cast_fp16")];
+            tensor<int32, [1]> var_135_axes_0 = const()[name = string("op_135_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 4032, 1]> var_135_cast_fp16 = squeeze(axes = var_135_axes_0, x = var_133_cast_fp16)[name = string("op_135_cast_fp16")];
+            tensor<int32, [3]> var_138_perm_0 = const()[name = string("op_138_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            string var_159_pad_type_0 = const()[name = string("op_159_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_159_strides_0 = const()[name = string("op_159_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_159_pad_0 = const()[name = string("op_159_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_159_dilations_0 = const()[name = string("op_159_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_159_groups_0 = const()[name = string("op_159_groups_0"), val = int32(1)];
+            tensor<fp16, [4032, 2048, 1, 1]> op_139_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31289024))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37482240))))[name = string("op_139_promoted_to_fp16_palettized")];
+            tensor<fp16, [1, 4032, 1, 1]> var_159_cast_fp16 = conv(dilations = var_159_dilations_0, groups = var_159_groups_0, pad = var_159_pad_0, pad_type = var_159_pad_type_0, strides = var_159_strides_0, weight = op_139_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_159_cast_fp16")];
+            tensor<int32, [1]> var_161_axes_0 = const()[name = string("op_161_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 4032, 1]> var_161_cast_fp16 = squeeze(axes = var_161_axes_0, x = var_159_cast_fp16)[name = string("op_161_cast_fp16")];
+            tensor<int32, [3]> var_164_perm_0 = const()[name = string("op_164_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            string var_185_pad_type_0 = const()[name = string("op_185_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_185_strides_0 = const()[name = string("op_185_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_185_pad_0 = const()[name = string("op_185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_185_dilations_0 = const()[name = string("op_185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_185_groups_0 = const()[name = string("op_185_groups_0"), val = int32(1)];
+            tensor<fp16, [4032, 2048, 1, 1]> op_165_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37546816))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43740032))))[name = string("op_165_promoted_to_fp16_palettized")];
+            tensor<fp16, [1, 4032, 1, 1]> var_185_cast_fp16 = conv(dilations = var_185_dilations_0, groups = var_185_groups_0, pad = var_185_pad_0, pad_type = var_185_pad_type_0, strides = var_185_strides_0, weight = op_165_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_185_cast_fp16")];
+            tensor<int32, [1]> var_187_axes_0 = const()[name = string("op_187_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 4032, 1]> var_187_cast_fp16 = squeeze(axes = var_187_axes_0, x = var_185_cast_fp16)[name = string("op_187_cast_fp16")];
+            tensor<int32, [3]> var_190_perm_0 = const()[name = string("op_190_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            string var_211_pad_type_0 = const()[name = string("op_211_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_211_strides_0 = const()[name = string("op_211_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_211_pad_0 = const()[name = string("op_211_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_211_dilations_0 = const()[name = string("op_211_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_211_groups_0 = const()[name = string("op_211_groups_0"), val = int32(1)];
+            tensor<fp16, [4032, 2048, 1, 1]> op_191_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4032, 2048, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43804608))), lut = tensor<fp16, [504, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49997824))))[name = string("op_191_promoted_to_fp16_palettized")];
+            tensor<fp16, [1, 4032, 1, 1]> var_211_cast_fp16 = conv(dilations = var_211_dilations_0, groups = var_211_groups_0, pad = var_211_pad_0, pad_type = var_211_pad_type_0, strides = var_211_strides_0, weight = op_191_promoted_to_fp16_palettized, x = input_cast_fp16)[name = string("op_211_cast_fp16")];
+            tensor<int32, [1]> var_213_axes_0 = const()[name = string("op_213_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 4032, 1]> var_213_cast_fp16 = squeeze(axes = var_213_axes_0, x = var_211_cast_fp16)[name = string("op_213_cast_fp16")];
+            tensor<int32, [3]> var_216_perm_0 = const()[name = string("op_216_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [1, 1, 4032]> logits1 = transpose(perm = var_34_perm_0, x = var_31_cast_fp16)[name = string("transpose_0")];
+            tensor<fp16, [1, 1, 4032]> logits2 = transpose(perm = var_60_perm_0, x = var_57_cast_fp16)[name = string("transpose_1")];
+            tensor<fp16, [1, 1, 4032]> logits3 = transpose(perm = var_86_perm_0, x = var_83_cast_fp16)[name = string("transpose_2")];
+            tensor<fp16, [1, 1, 4032]> logits4 = transpose(perm = var_112_perm_0, x = var_109_cast_fp16)[name = string("transpose_3")];
+            tensor<fp16, [1, 1, 4032]> logits5 = transpose(perm = var_138_perm_0, x = var_135_cast_fp16)[name = string("transpose_4")];
+            tensor<fp16, [1, 1, 4032]> logits6 = transpose(perm = var_164_perm_0, x = var_161_cast_fp16)[name = string("transpose_5")];
+            tensor<fp16, [1, 1, 4032]> logits7 = transpose(perm = var_190_perm_0, x = var_187_cast_fp16)[name = string("transpose_6")];
+            tensor<fp16, [1, 1, 4032]> logits8 = transpose(perm = var_216_perm_0, x = var_213_cast_fp16)[name = string("transpose_7")];
+        } -> (logits1, logits2, logits3, logits4, logits5, logits6, logits7, logits8);
+}

llama_lm_head_lut6.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8194981a11603b43da42a4d396dc17c54f1c5c637b990c78b38b9bf542b9acf
+size 50062400

llama_lm_head_lut6.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65606ee6e0e0a887b8569deccba6febe7778206768d6d3685a921729d585ec13
+size 15426

llama_lm_head_lut6.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8194981a11603b43da42a4d396dc17c54f1c5c637b990c78b38b9bf542b9acf
+size 50062400

llama_lm_head_lut6.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "4321CC1B-5826-4BD9-8829-91AF793943AB": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "EE098C18-E716-451B-AD13-2BB790200715": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "EE098C18-E716-451B-AD13-2BB790200715"
+}

llama_prefill_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77e3bb2cc6c422f9bfb6967cfb411da41b53bb8f4395966916d3b7110f827a1d
+size 815469

llama_prefill_lut6_chunk_01of01.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7adf027ba3d13d28df912eccaf78d1c131fac5f95eca6c8e2f44c299d3370d26
+size 894545728

llama_prefill_lut6_chunk_01of01.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "66205458-4F89-4B4F-B7B7-199ECD4384FF": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "6CD10062-0555-40A2-A11B-0ED3A96265F4": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "6CD10062-0555-40A2-A11B-0ED3A96265F4"
+}

meta.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+model_info:
+  name: anemll-Prem-1B-SQL-ctx1024
+  version: 0.3.5
+  description: |
+    Demonstarates running Prem-1B-SQL on Apple Neural Engine
+    Context length: 1024
+    Batch size: 64
+    Chunks: 1
+  license: MIT
+  author: Anemll
+  framework: Core ML
+  language: Python
+  architecture: llama
+  parameters:
+    context_length: 1024
+    batch_size: 64
+    lut_embeddings: none
+    lut_ffn: 6
+    lut_lmhead: 6
+    num_chunks: 1
+    model_prefix: llama
+    embeddings: llama_embeddings.mlmodelc
+    lm_head: llama_lm_head_lut6.mlmodelc
+    ffn: llama_FFN_PF_lut6_chunk_01of01.mlmodelc
+    split_lm_head: 8
+    vocab_size: 32256
+    lm_head_chunk_sizes: [4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032]
+    prefill_dynamic_slice: true
+# =============================================================================
+# Conversion Parameters (for troubleshooting)
+# =============================================================================
+# Generated: 2026-03-16 19:55:12
+#
+# model_path: /tmp/ios_models/downloads/Prem-1B-SQL
+# output_dir: /tmp/ios_models/Prem-1B-SQL-ctx1024
+# command_line: ./anemll/utils/convert_model.sh --model /tmp/ios_models/downloads/Prem-1B-SQL --output /tmp/ios_models/Prem-1B-SQL-ctx1024 --context 1024 --batch 64 --chunk 1 --lut2 6 --lut3 6 --prefix llama
+# context_length: 1024
+# batch_size: 64
+# num_chunks: 1
+# lut_part1: none
+# lut_part2: 6
+# lut_part3: 6
+# prefix: llama
+# architecture: llama
+# argmax_in_model: false
+# split_rotate: false
+# single_cache: false
+# dynamic_prefill_slice: true
+# monolithic: false
+# anemll_version: 0.3.5
+# vocab_size: 32256
+# lm_head_chunk_sizes: "[4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032]"
+# =============================================================================

meta_progress.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+# Conversion in progress - this file is for monitoring only
+# Final meta.yaml will be created at step 7
+conversion:
+  status: in_progress
+  start_time: 2026-03-16T16:14:15Z
+  model_path: /tmp/ios_models/downloads/Prem-1B-SQL
+  output_dir: /tmp/ios_models/Prem-1B-SQL-ctx1024
+  context_length: 1024
+  batch_size: 64
+  num_chunks: 1
+  prefix: llama
+  architecture: llama
+  lut_part1: none
+  lut_part2: 6
+  lut_part3: 6
+  fp16_scale: none
+  argmax: false
+  split_rotate: false
+steps:
+  - name: embeddings
+    part: 1
+    status: pending
+  - name: lm_head
+    part: 3
+    status: pending
+  - name: ffn
+    part: 2
+    status: pending
+  - name: prefill
+    part: 2_prefill
+    status: pending
+  - name: ffn_rotate
+    part: 2_rotate
+    status: pending
+    gemma3_only: true
+  - name: prefill_rotate
+    part: 2_prefill_rotate
+    status: pending
+    gemma3_only: true
+  - name: combine
+    part: 5
+    status: pending
+  - name: compile
+    part: 6
+    status: pending
+  - name: tokenizer
+    part: 7
+    status: pending
+  - name: test
+    part: 8
+    status: pending

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|EOT|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,199 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "32000": {
+      "content": "õ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32001": {
+      "content": "÷",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32002": {
+      "content": "Á",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32003": {
+      "content": "ý",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32004": {
+      "content": "À",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32005": {
+      "content": "ÿ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32006": {
+      "content": "ø",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32007": {
+      "content": "ú",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32008": {
+      "content": "þ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32009": {
+      "content": "ü",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32010": {
+      "content": "ù",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32011": {
+      "content": "ö",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32012": {
+      "content": "û",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32013": {
+      "content": "<｜begin▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32014": {
+      "content": "<｜end▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32015": {
+      "content": "<｜fim▁hole｜>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32016": {
+      "content": "<｜fim▁begin｜>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32017": {
+      "content": "<｜fim▁end｜>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32018": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32019": {
+      "content": "<|User|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32020": {
+      "content": "<|Assistant|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32021": {
+      "content": "<|EOT|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<｜begin▁of▁sentence｜>",
+  "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|EOT|>",
+  "legacy": true,
+  "max_length": null,
+  "model_max_length": 16384,
+  "pad_to_multiple_of": null,
+  "pad_token": "<｜end▁of▁sentence｜>",
+  "pad_token_type_id": 0,
+  "padding_side": "left",
+  "padding_size": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": null,
+  "use_default_system_prompt": false
+}