mlboydaisuke commited on
Commit
2196468
·
verified ·
1 Parent(s): a307e93

Add Gemma 4 E4B, Qwen3.5 2B/0.8B, Qwen3-VL 2B (CoreML-LLM v1.3); refresh Gemma 4 E2B (audio/video, 3.1 GB)

Browse files
Files changed (1) hide show
  1. models.json +173 -5
models.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "manifest_version": 1,
3
- "updated_at": "2026-04-20T04:18:56Z",
4
  "min_app_version": "1.0",
5
  "categories": [
6
  {
@@ -88,7 +88,7 @@
88
  "name": "Gemma 4 E2B",
89
  "subtitle": "Google DeepMind, 2025",
90
  "category_id": "llm",
91
- "description_md": "Google's latest on-device multimodal LLM. 2.3B effective parameters with Per-Layer Embeddings. Text + image input, streaming text output. Runs on Apple Neural Engine at ~31 tok/s decode. Supports multi-turn conversations, image understanding, and reasoning.",
92
  "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/gemma4.jpg",
93
  "demo": {
94
  "template": "chat",
@@ -102,7 +102,7 @@
102
  "name": "gemma4-e2b-coreml.zip",
103
  "url": "https://huggingface.co/mlboydaisuke/gemma-4-E2B-coreml/resolve/main/gemma4-e2b-coreml.zip",
104
  "archive": "zip",
105
- "size_bytes": 2700000000,
106
  "sha256": "TODO",
107
  "compute_units": "cpuAndNeuralEngine",
108
  "kind": "model"
@@ -120,8 +120,176 @@
120
  "url": "https://ai.google.dev/gemma/terms"
121
  },
122
  "upstream": {
123
- "name": "google/gemma-4-e2b",
124
- "url": "https://huggingface.co/google/gemma-4-e2b",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  "year": 2025
126
  }
127
  },
 
1
  {
2
  "manifest_version": 1,
3
+ "updated_at": "2026-04-24T10:44:41Z",
4
  "min_app_version": "1.0",
5
  "categories": [
6
  {
 
88
  "name": "Gemma 4 E2B",
89
  "subtitle": "Google DeepMind, 2025",
90
  "category_id": "llm",
91
+ "description_md": "Google's on-device multimodal LLM (Gemma 3n E2B, 2.3B effective params with Per-Layer Embeddings). **Text + image + audio + video** input, streaming text output. Runs on Apple Neural Engine at ~31 tok/s decode on iPhone 17 Pro (34 tok/s in 3-chunk mode). Native 384x384 vision encoder (64 tokens/frame) handles video; 12-layer Conformer encoder handles audio. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
92
  "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/gemma4.jpg",
93
  "demo": {
94
  "template": "chat",
 
102
  "name": "gemma4-e2b-coreml.zip",
103
  "url": "https://huggingface.co/mlboydaisuke/gemma-4-E2B-coreml/resolve/main/gemma4-e2b-coreml.zip",
104
  "archive": "zip",
105
+ "size_bytes": 3100000000,
106
  "sha256": "TODO",
107
  "compute_units": "cpuAndNeuralEngine",
108
  "kind": "model"
 
120
  "url": "https://ai.google.dev/gemma/terms"
121
  },
122
  "upstream": {
123
+ "name": "google/gemma-3n-E2B-it",
124
+ "url": "https://huggingface.co/google/gemma-3n-E2B-it",
125
+ "year": 2025
126
+ }
127
+ },
128
+ {
129
+ "id": "gemma4_e4b",
130
+ "name": "Gemma 4 E4B",
131
+ "subtitle": "Google DeepMind, 2025",
132
+ "category_id": "llm",
133
+ "description_md": "Larger Gemma 4 variant: 42-layer text decoder, ~4B effective params, 100% ANE-resident. Text-only (no vision/audio). ~14 tok/s decode on iPhone 17 Pro at 2048-token context. Use when you want maximum text quality and have the storage budget. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
134
+ "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/gemma4.jpg",
135
+ "demo": {
136
+ "template": "chat",
137
+ "config": {
138
+ "max_tokens": 1024,
139
+ "multimodal": false
140
+ }
141
+ },
142
+ "files": [
143
+ {
144
+ "name": "gemma4-e4b-coreml.zip",
145
+ "url": "https://huggingface.co/mlboydaisuke/gemma-4-E4B-coreml/resolve/main/gemma4-e4b-coreml.zip",
146
+ "archive": "zip",
147
+ "size_bytes": 5500000000,
148
+ "sha256": "TODO",
149
+ "compute_units": "cpuAndNeuralEngine",
150
+ "kind": "model"
151
+ }
152
+ ],
153
+ "requirements": {
154
+ "min_ios": "18.0",
155
+ "min_ram_mb": 3000,
156
+ "device_capabilities": [
157
+ "arm64"
158
+ ]
159
+ },
160
+ "license": {
161
+ "name": "Gemma",
162
+ "url": "https://ai.google.dev/gemma/terms"
163
+ },
164
+ "upstream": {
165
+ "name": "google/gemma-3n-E4B-it",
166
+ "url": "https://huggingface.co/google/gemma-3n-E4B-it",
167
+ "year": 2025
168
+ }
169
+ },
170
+ {
171
+ "id": "qwen3.5-2b",
172
+ "name": "Qwen3.5 2B",
173
+ "subtitle": "Alibaba Qwen, 2025",
174
+ "category_id": "llm",
175
+ "description_md": "Hybrid Gated-DeltaNet SSM + attention architecture, shipped as 4 INT8 chunks to fit the iPhone ANE single-mlprogram budget. ~17 tok/s decode on iPhone 17 Pro with ~200 MB RSS — exceptional memory efficiency for a 2B-param model. Text-only. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
176
+ "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/qwen3_5.jpg",
177
+ "demo": {
178
+ "template": "chat",
179
+ "config": {
180
+ "max_tokens": 1024,
181
+ "multimodal": false
182
+ }
183
+ },
184
+ "files": [
185
+ {
186
+ "name": "qwen3.5-2B-CoreML.zip",
187
+ "url": "https://huggingface.co/mlboydaisuke/qwen3.5-2B-CoreML/resolve/main/qwen3.5-2B-CoreML.zip",
188
+ "archive": "zip",
189
+ "size_bytes": 2400000000,
190
+ "sha256": "TODO",
191
+ "compute_units": "cpuAndNeuralEngine",
192
+ "kind": "model"
193
+ }
194
+ ],
195
+ "requirements": {
196
+ "min_ios": "18.0",
197
+ "min_ram_mb": 1200,
198
+ "device_capabilities": [
199
+ "arm64"
200
+ ]
201
+ },
202
+ "license": {
203
+ "name": "Apache-2.0",
204
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
205
+ },
206
+ "upstream": {
207
+ "name": "Qwen/Qwen3.5-2B",
208
+ "url": "https://huggingface.co/Qwen/Qwen3.5-2B",
209
+ "year": 2025
210
+ }
211
+ },
212
+ {
213
+ "id": "qwen3.5-0.8b",
214
+ "name": "Qwen3.5 0.8B",
215
+ "subtitle": "Alibaba Qwen, 2025",
216
+ "category_id": "llm",
217
+ "description_md": "Compact hybrid SSM+attention model, INT8 palettized (same semantic precision as fp16 — 100% top-3 parity vs fp32 oracle). ~20 tok/s decode on iPhone 17 Pro. Smallest and fastest option in the CoreML-LLM lineup. Text-only. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
218
+ "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/qwen3_5.jpg",
219
+ "demo": {
220
+ "template": "chat",
221
+ "config": {
222
+ "max_tokens": 1024,
223
+ "multimodal": false
224
+ }
225
+ },
226
+ "files": [
227
+ {
228
+ "name": "qwen3.5-0.8B-CoreML.zip",
229
+ "url": "https://huggingface.co/mlboydaisuke/qwen3.5-0.8B-CoreML/resolve/main/qwen3.5-0.8B-CoreML.zip",
230
+ "archive": "zip",
231
+ "size_bytes": 754000000,
232
+ "sha256": "TODO",
233
+ "compute_units": "cpuAndNeuralEngine",
234
+ "kind": "model"
235
+ }
236
+ ],
237
+ "requirements": {
238
+ "min_ios": "18.0",
239
+ "min_ram_mb": 600,
240
+ "device_capabilities": [
241
+ "arm64"
242
+ ]
243
+ },
244
+ "license": {
245
+ "name": "Apache-2.0",
246
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
247
+ },
248
+ "upstream": {
249
+ "name": "Qwen/Qwen3.5-0.8B",
250
+ "url": "https://huggingface.co/Qwen/Qwen3.5-0.8B",
251
+ "year": 2025
252
+ }
253
+ },
254
+ {
255
+ "id": "qwen3-vl-2b",
256
+ "name": "Qwen3-VL 2B",
257
+ "subtitle": "Alibaba Qwen, 2025",
258
+ "category_id": "llm",
259
+ "description_md": "Qwen3-VL multimodal model — **text + image** input (DeepStack injection at L0/1/2, interleaved mRoPE for image tokens, 196 image tokens). 28-layer GQA text backbone, 6 INT8 body chunks + fp16 embed sidecar. ~7.5 tok/s decode on iPhone 17 Pro. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
260
+ "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/qwen3_vl.jpg",
261
+ "demo": {
262
+ "template": "chat",
263
+ "config": {
264
+ "max_tokens": 1024,
265
+ "multimodal": true
266
+ }
267
+ },
268
+ "files": [
269
+ {
270
+ "name": "qwen3-vl-2b-coreml.zip",
271
+ "url": "https://huggingface.co/mlboydaisuke/qwen3-vl-2b-coreml/resolve/main/qwen3-vl-2b-coreml.zip",
272
+ "archive": "zip",
273
+ "size_bytes": 4700000000,
274
+ "sha256": "TODO",
275
+ "compute_units": "cpuAndNeuralEngine",
276
+ "kind": "model"
277
+ }
278
+ ],
279
+ "requirements": {
280
+ "min_ios": "18.0",
281
+ "min_ram_mb": 2500,
282
+ "device_capabilities": [
283
+ "arm64"
284
+ ]
285
+ },
286
+ "license": {
287
+ "name": "Apache-2.0",
288
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
289
+ },
290
+ "upstream": {
291
+ "name": "Qwen/Qwen3-VL-2B-Instruct",
292
+ "url": "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct",
293
  "year": 2025
294
  }
295
  },