Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.gitattributes +1 -0
README.md +58 -0
hf_model/tokenizer.json +3 -0
hf_model/tokenizer_config.json +74 -0
model.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
model.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
model.mlpackage/Manifest.json +18 -0
model_config.json +20 -0
vision.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
vision.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
vision.mlpackage/Manifest.json +18 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+hf_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+license: apache-2.0
+tags:
+  - coreml
+  - gemma4
+  - multimodal
+  - vision
+  - on-device
+  - ane
+base_model: google/gemma-4-E2B-it
+pipeline_tag: image-text-to-text
+---
+# Gemma 4 E2B — CoreML (ANE+GPU Optimized)
+Converted from [google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it) for on-device inference on Apple devices via CoreML.
+## Models
+| File | Size | Description |
+|------|------|-------------|
+| `model.mlpackage` | 2.4 GB | Text decoder with stateful KV cache (int4 quantized) |
+| `vision.mlpackage` | 322 MB | Vision encoder (SigLIP-based, 16 transformer layers) |
+| `model_config.json` | — | Model configuration |
+| `hf_model/tokenizer.json` | 31 MB | Tokenizer |
+## Features
+- **Multimodal**: Image + text input → text output
+- **ANE-optimized**: Conv2d linear layers, ANE RMSNorm, in-model argmax
+- **Stateful KV cache**: MLState API (iOS 18+)
+- **Int4 quantized**: Block-wise palettization (group_size=32)
+- **HF-exact match**: "solid red square centered on white background" ✅
+## Usage
+```python
+import coremltools as ct
+import numpy as np
+# Load models
+vision = ct.models.MLModel('vision.mlpackage')
+decoder = ct.models.MLModel('model.mlpackage')
+state = decoder.make_state()
+# Process image → vision features → text generation
+```
+See [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) for the full conversion pipeline and iOS sample app.
+## Conversion
+```bash
+git clone https://github.com/john-rocky/CoreML-LLM
+cd CoreML-LLM/conversion
+pip install -r requirements.txt
+python convert.py --model gemma4-e2b --context-length 512 --output ./output/gemma4-e2b
+```

hf_model/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
+size 32169626

hf_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "response_schema": {
+    "type": "object",
+    "properties": {
+      "role": {
+        "const": "assistant"
+      },
+      "thinking": {
+        "type": "string"
+      },
+      "content": {
+        "type": "string"
+      },
+      "tool_calls": {
+        "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
+        "type": "array",
+        "items": {
+          "type": "object",
+          "properties": {
+            "type": {
+              "const": "function"
+            },
+            "function": {
+              "type": "object",
+              "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
+              "properties": {
+                "name": {
+                  "type": "string"
+                },
+                "arguments": {
+                  "type": "object",
+                  "x-parser": "gemma4-tool-call",
+                  "additionalProperties": {}
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
+  },
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

model.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f789a84c767c3b5fa8f3b294c881ed7c992219def201a437a75885e91cb157a6
+size 1381053

model.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e574be929a357f1fdf0e2e25ec784eeeb93c520441349d1177b56fd6e871b3d
+size 2518622912

model.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "7A93380B-5D08-43C0-8B8D-68D2ACD08A4E": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "D3C832CC-AC75-4898-B6F6-136858D00041": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "D3C832CC-AC75-4898-B6F6-136858D00041"
+}

model_config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "model_name": "gemma4-e2b",
+  "architecture": "gemma4",
+  "hidden_size": 1536,
+  "num_hidden_layers": 35,
+  "num_attention_heads": 8,
+  "num_key_value_heads": 1,
+  "head_dim": 256,
+  "vocab_size": 262144,
+  "context_length": 512,
+  "rms_norm_eps": 1e-06,
+  "bos_token_id": 2,
+  "eos_token_id": 1,
+  "quantization": "int4",
+  "compute_units": "ALL",
+  "parts": {
+    "model": "model.mlpackage"
+  },
+  "tokenizer_repo": "google/gemma-4-E2B-it"
+}

vision.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a664e7f50ad4677227b09a71ae66569942a383e31920a7dbd6e763444f55edb7
+size 592021

vision.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28f26bc5854a2412b3313d3a7d66e34b90aa268c2ad8a169a51f5a2e3e8e54a8
+size 337549248

vision.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "31C47271-B7C9-4C3A-94CB-9A69D952F033": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "BF78F538-2727-4BD4-A199-6EA781CEA323": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "BF78F538-2727-4BD4-A199-6EA781CEA323"
+}