Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- README.md +58 -0
- hf_model/tokenizer.json +3 -0
- hf_model/tokenizer_config.json +74 -0
- model.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- model.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- model.mlpackage/Manifest.json +18 -0
- model_config.json +20 -0
- vision.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- vision.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- vision.mlpackage/Manifest.json +18 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
hf_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
tags:
|
| 4 |
+
- coreml
|
| 5 |
+
- gemma4
|
| 6 |
+
- multimodal
|
| 7 |
+
- vision
|
| 8 |
+
- on-device
|
| 9 |
+
- ane
|
| 10 |
+
base_model: google/gemma-4-E2B-it
|
| 11 |
+
pipeline_tag: image-text-to-text
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Gemma 4 E2B — CoreML (ANE+GPU Optimized)
|
| 15 |
+
|
| 16 |
+
Converted from [google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it) for on-device inference on Apple devices via CoreML.
|
| 17 |
+
|
| 18 |
+
## Models
|
| 19 |
+
|
| 20 |
+
| File | Size | Description |
|
| 21 |
+
|------|------|-------------|
|
| 22 |
+
| `model.mlpackage` | 2.4 GB | Text decoder with stateful KV cache (int4 quantized) |
|
| 23 |
+
| `vision.mlpackage` | 322 MB | Vision encoder (SigLIP-based, 16 transformer layers) |
|
| 24 |
+
| `model_config.json` | — | Model configuration |
|
| 25 |
+
| `hf_model/tokenizer.json` | 31 MB | Tokenizer |
|
| 26 |
+
|
| 27 |
+
## Features
|
| 28 |
+
|
| 29 |
+
- **Multimodal**: Image + text input → text output
|
| 30 |
+
- **ANE-optimized**: Conv2d linear layers, ANE RMSNorm, in-model argmax
|
| 31 |
+
- **Stateful KV cache**: MLState API (iOS 18+)
|
| 32 |
+
- **Int4 quantized**: Block-wise palettization (group_size=32)
|
| 33 |
+
- **HF-exact match**: "solid red square centered on white background" ✅
|
| 34 |
+
|
| 35 |
+
## Usage
|
| 36 |
+
|
| 37 |
+
```python
|
| 38 |
+
import coremltools as ct
|
| 39 |
+
import numpy as np
|
| 40 |
+
|
| 41 |
+
# Load models
|
| 42 |
+
vision = ct.models.MLModel('vision.mlpackage')
|
| 43 |
+
decoder = ct.models.MLModel('model.mlpackage')
|
| 44 |
+
state = decoder.make_state()
|
| 45 |
+
|
| 46 |
+
# Process image → vision features → text generation
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
See [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) for the full conversion pipeline and iOS sample app.
|
| 50 |
+
|
| 51 |
+
## Conversion
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
git clone https://github.com/john-rocky/CoreML-LLM
|
| 55 |
+
cd CoreML-LLM/conversion
|
| 56 |
+
pip install -r requirements.txt
|
| 57 |
+
python convert.py --model gemma4-e2b --context-length 512 --output ./output/gemma4-e2b
|
| 58 |
+
```
|
hf_model/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
|
| 3 |
+
size 32169626
|
hf_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"audio_token": "<|audio|>",
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"boa_token": "<|audio>",
|
| 5 |
+
"boi_token": "<|image>",
|
| 6 |
+
"bos_token": "<bos>",
|
| 7 |
+
"eoa_token": "<audio|>",
|
| 8 |
+
"eoc_token": "<channel|>",
|
| 9 |
+
"eoi_token": "<image|>",
|
| 10 |
+
"eos_token": "<eos>",
|
| 11 |
+
"eot_token": "<turn|>",
|
| 12 |
+
"escape_token": "<|\"|>",
|
| 13 |
+
"etc_token": "<tool_call|>",
|
| 14 |
+
"etd_token": "<tool|>",
|
| 15 |
+
"etr_token": "<tool_response|>",
|
| 16 |
+
"extra_special_tokens": [
|
| 17 |
+
"<|video|>"
|
| 18 |
+
],
|
| 19 |
+
"image_token": "<|image|>",
|
| 20 |
+
"mask_token": "<mask>",
|
| 21 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 22 |
+
"pad_token": "<pad>",
|
| 23 |
+
"padding_side": "left",
|
| 24 |
+
"processor_class": "Gemma4Processor",
|
| 25 |
+
"response_schema": {
|
| 26 |
+
"type": "object",
|
| 27 |
+
"properties": {
|
| 28 |
+
"role": {
|
| 29 |
+
"const": "assistant"
|
| 30 |
+
},
|
| 31 |
+
"thinking": {
|
| 32 |
+
"type": "string"
|
| 33 |
+
},
|
| 34 |
+
"content": {
|
| 35 |
+
"type": "string"
|
| 36 |
+
},
|
| 37 |
+
"tool_calls": {
|
| 38 |
+
"x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
|
| 39 |
+
"type": "array",
|
| 40 |
+
"items": {
|
| 41 |
+
"type": "object",
|
| 42 |
+
"properties": {
|
| 43 |
+
"type": {
|
| 44 |
+
"const": "function"
|
| 45 |
+
},
|
| 46 |
+
"function": {
|
| 47 |
+
"type": "object",
|
| 48 |
+
"x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
|
| 49 |
+
"properties": {
|
| 50 |
+
"name": {
|
| 51 |
+
"type": "string"
|
| 52 |
+
},
|
| 53 |
+
"arguments": {
|
| 54 |
+
"type": "object",
|
| 55 |
+
"x-parser": "gemma4-tool-call",
|
| 56 |
+
"additionalProperties": {}
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
},
|
| 64 |
+
"x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
|
| 65 |
+
},
|
| 66 |
+
"soc_token": "<|channel>",
|
| 67 |
+
"sot_token": "<|turn>",
|
| 68 |
+
"stc_token": "<|tool_call>",
|
| 69 |
+
"std_token": "<|tool>",
|
| 70 |
+
"str_token": "<|tool_response>",
|
| 71 |
+
"think_token": "<|think|>",
|
| 72 |
+
"tokenizer_class": "GemmaTokenizer",
|
| 73 |
+
"unk_token": "<unk>"
|
| 74 |
+
}
|
model.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f789a84c767c3b5fa8f3b294c881ed7c992219def201a437a75885e91cb157a6
|
| 3 |
+
size 1381053
|
model.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e574be929a357f1fdf0e2e25ec784eeeb93c520441349d1177b56fd6e871b3d
|
| 3 |
+
size 2518622912
|
model.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"7A93380B-5D08-43C0-8B8D-68D2ACD08A4E": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Weights",
|
| 7 |
+
"name": "weights",
|
| 8 |
+
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"D3C832CC-AC75-4898-B6F6-136858D00041": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "D3C832CC-AC75-4898-B6F6-136858D00041"
|
| 18 |
+
}
|
model_config.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "gemma4-e2b",
|
| 3 |
+
"architecture": "gemma4",
|
| 4 |
+
"hidden_size": 1536,
|
| 5 |
+
"num_hidden_layers": 35,
|
| 6 |
+
"num_attention_heads": 8,
|
| 7 |
+
"num_key_value_heads": 1,
|
| 8 |
+
"head_dim": 256,
|
| 9 |
+
"vocab_size": 262144,
|
| 10 |
+
"context_length": 512,
|
| 11 |
+
"rms_norm_eps": 1e-06,
|
| 12 |
+
"bos_token_id": 2,
|
| 13 |
+
"eos_token_id": 1,
|
| 14 |
+
"quantization": "int4",
|
| 15 |
+
"compute_units": "ALL",
|
| 16 |
+
"parts": {
|
| 17 |
+
"model": "model.mlpackage"
|
| 18 |
+
},
|
| 19 |
+
"tokenizer_repo": "google/gemma-4-E2B-it"
|
| 20 |
+
}
|
vision.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a664e7f50ad4677227b09a71ae66569942a383e31920a7dbd6e763444f55edb7
|
| 3 |
+
size 592021
|
vision.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28f26bc5854a2412b3313d3a7d66e34b90aa268c2ad8a169a51f5a2e3e8e54a8
|
| 3 |
+
size 337549248
|
vision.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"31C47271-B7C9-4C3A-94CB-9A69D952F033": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Weights",
|
| 7 |
+
"name": "weights",
|
| 8 |
+
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"BF78F538-2727-4BD4-A199-6EA781CEA323": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "BF78F538-2727-4BD4-A199-6EA781CEA323"
|
| 18 |
+
}
|