mlboydaisuke commited on
Commit
385d64e
·
verified ·
1 Parent(s): a3d3bad

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ hf_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - coreml
5
+ - gemma4
6
+ - multimodal
7
+ - vision
8
+ - on-device
9
+ - ane
10
+ base_model: google/gemma-4-E2B-it
11
+ pipeline_tag: image-text-to-text
12
+ ---
13
+
14
+ # Gemma 4 E2B — CoreML (ANE+GPU Optimized)
15
+
16
+ Converted from [google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it) for on-device inference on Apple devices via CoreML.
17
+
18
+ ## Models
19
+
20
+ | File | Size | Description |
21
+ |------|------|-------------|
22
+ | `model.mlpackage` | 2.4 GB | Text decoder with stateful KV cache (int4 quantized) |
23
+ | `vision.mlpackage` | 322 MB | Vision encoder (SigLIP-based, 16 transformer layers) |
24
+ | `model_config.json` | — | Model configuration |
25
+ | `hf_model/tokenizer.json` | 31 MB | Tokenizer |
26
+
27
+ ## Features
28
+
29
+ - **Multimodal**: Image + text input → text output
30
+ - **ANE-optimized**: Conv2d linear layers, ANE RMSNorm, in-model argmax
31
+ - **Stateful KV cache**: MLState API (iOS 18+)
32
+ - **Int4 quantized**: Block-wise palettization (group_size=32)
33
+ - **HF-exact match**: "solid red square centered on white background" ✅
34
+
35
+ ## Usage
36
+
37
+ ```python
38
+ import coremltools as ct
39
+ import numpy as np
40
+
41
+ # Load models
42
+ vision = ct.models.MLModel('vision.mlpackage')
43
+ decoder = ct.models.MLModel('model.mlpackage')
44
+ state = decoder.make_state()
45
+
46
+ # Process image → vision features → text generation
47
+ ```
48
+
49
+ See [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) for the full conversion pipeline and iOS sample app.
50
+
51
+ ## Conversion
52
+
53
+ ```bash
54
+ git clone https://github.com/john-rocky/CoreML-LLM
55
+ cd CoreML-LLM/conversion
56
+ pip install -r requirements.txt
57
+ python convert.py --model gemma4-e2b --context-length 512 --output ./output/gemma4-e2b
58
+ ```
hf_model/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
hf_model/tokenizer_config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
+ "image_token": "<|image|>",
20
+ "mask_token": "<mask>",
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": "<pad>",
23
+ "padding_side": "left",
24
+ "processor_class": "Gemma4Processor",
25
+ "response_schema": {
26
+ "type": "object",
27
+ "properties": {
28
+ "role": {
29
+ "const": "assistant"
30
+ },
31
+ "thinking": {
32
+ "type": "string"
33
+ },
34
+ "content": {
35
+ "type": "string"
36
+ },
37
+ "tool_calls": {
38
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
+ "type": "array",
40
+ "items": {
41
+ "type": "object",
42
+ "properties": {
43
+ "type": {
44
+ "const": "function"
45
+ },
46
+ "function": {
47
+ "type": "object",
48
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
+ "properties": {
50
+ "name": {
51
+ "type": "string"
52
+ },
53
+ "arguments": {
54
+ "type": "object",
55
+ "x-parser": "gemma4-tool-call",
56
+ "additionalProperties": {}
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
62
+ }
63
+ },
64
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
65
+ },
66
+ "soc_token": "<|channel>",
67
+ "sot_token": "<|turn>",
68
+ "stc_token": "<|tool_call>",
69
+ "std_token": "<|tool>",
70
+ "str_token": "<|tool_response>",
71
+ "think_token": "<|think|>",
72
+ "tokenizer_class": "GemmaTokenizer",
73
+ "unk_token": "<unk>"
74
+ }
model.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f789a84c767c3b5fa8f3b294c881ed7c992219def201a437a75885e91cb157a6
3
+ size 1381053
model.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e574be929a357f1fdf0e2e25ec784eeeb93c520441349d1177b56fd6e871b3d
3
+ size 2518622912
model.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "7A93380B-5D08-43C0-8B8D-68D2ACD08A4E": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "D3C832CC-AC75-4898-B6F6-136858D00041": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "D3C832CC-AC75-4898-B6F6-136858D00041"
18
+ }
model_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gemma4-e2b",
3
+ "architecture": "gemma4",
4
+ "hidden_size": 1536,
5
+ "num_hidden_layers": 35,
6
+ "num_attention_heads": 8,
7
+ "num_key_value_heads": 1,
8
+ "head_dim": 256,
9
+ "vocab_size": 262144,
10
+ "context_length": 512,
11
+ "rms_norm_eps": 1e-06,
12
+ "bos_token_id": 2,
13
+ "eos_token_id": 1,
14
+ "quantization": "int4",
15
+ "compute_units": "ALL",
16
+ "parts": {
17
+ "model": "model.mlpackage"
18
+ },
19
+ "tokenizer_repo": "google/gemma-4-E2B-it"
20
+ }
vision.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a664e7f50ad4677227b09a71ae66569942a383e31920a7dbd6e763444f55edb7
3
+ size 592021
vision.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28f26bc5854a2412b3313d3a7d66e34b90aa268c2ad8a169a51f5a2e3e8e54a8
3
+ size 337549248
vision.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "31C47271-B7C9-4C3A-94CB-9A69D952F033": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "BF78F538-2727-4BD4-A199-6EA781CEA323": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "BF78F538-2727-4BD4-A199-6EA781CEA323"
18
+ }