mlboydaisuke
/

coreml-zoo

Model card Files Files and versions

xet

Community

mlboydaisuke commited on Apr 24

Commit

2196468

verified ·

1 Parent(s): a307e93

Add Gemma 4 E4B, Qwen3.5 2B/0.8B, Qwen3-VL 2B (CoreML-LLM v1.3); refresh Gemma 4 E2B (audio/video, 3.1 GB)

Browse files

Files changed (1) hide show

models.json +173 -5

models.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "manifest_version": 1,
-  "updated_at": "2026-04-20T04:18:56Z",
   "min_app_version": "1.0",
   "categories": [
     {
@@ -88,7 +88,7 @@
       "name": "Gemma 4 E2B",
       "subtitle": "Google DeepMind, 2025",
       "category_id": "llm",
-      "description_md": "Google's latest on-device multimodal LLM. 2.3B effective parameters with Per-Layer Embeddings. Text + image input, streaming text output. Runs on Apple Neural Engine at ~31 tok/s decode. Supports multi-turn conversations, image understanding, and reasoning.",
       "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/gemma4.jpg",
       "demo": {
         "template": "chat",
@@ -102,7 +102,7 @@
           "name": "gemma4-e2b-coreml.zip",
           "url": "https://huggingface.co/mlboydaisuke/gemma-4-E2B-coreml/resolve/main/gemma4-e2b-coreml.zip",
           "archive": "zip",
-          "size_bytes": 2700000000,
           "sha256": "TODO",
           "compute_units": "cpuAndNeuralEngine",
           "kind": "model"
@@ -120,8 +120,176 @@
         "url": "https://ai.google.dev/gemma/terms"
       },
       "upstream": {
-        "name": "google/gemma-4-e2b",
-        "url": "https://huggingface.co/google/gemma-4-e2b",
         "year": 2025
       }
     },

 {
   "manifest_version": 1,
+  "updated_at": "2026-04-24T10:44:41Z",
   "min_app_version": "1.0",
   "categories": [
     {
       "name": "Gemma 4 E2B",
       "subtitle": "Google DeepMind, 2025",
       "category_id": "llm",
+      "description_md": "Google's on-device multimodal LLM (Gemma 3n E2B, 2.3B effective params with Per-Layer Embeddings). **Text + image + audio + video** input, streaming text output. Runs on Apple Neural Engine at ~31 tok/s decode on iPhone 17 Pro (34 tok/s in 3-chunk mode). Native 384x384 vision encoder (64 tokens/frame) handles video; 12-layer Conformer encoder handles audio. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
       "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/gemma4.jpg",
       "demo": {
         "template": "chat",
           "name": "gemma4-e2b-coreml.zip",
           "url": "https://huggingface.co/mlboydaisuke/gemma-4-E2B-coreml/resolve/main/gemma4-e2b-coreml.zip",
           "archive": "zip",
+          "size_bytes": 3100000000,
           "sha256": "TODO",
           "compute_units": "cpuAndNeuralEngine",
           "kind": "model"
         "url": "https://ai.google.dev/gemma/terms"
       },
       "upstream": {
+        "name": "google/gemma-3n-E2B-it",
+        "url": "https://huggingface.co/google/gemma-3n-E2B-it",
+        "year": 2025
+      }
+    },
+    {
+      "id": "gemma4_e4b",
+      "name": "Gemma 4 E4B",
+      "subtitle": "Google DeepMind, 2025",
+      "category_id": "llm",
+      "description_md": "Larger Gemma 4 variant: 42-layer text decoder, ~4B effective params, 100% ANE-resident. Text-only (no vision/audio). ~14 tok/s decode on iPhone 17 Pro at 2048-token context. Use when you want maximum text quality and have the storage budget. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
+      "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/gemma4.jpg",
+      "demo": {
+        "template": "chat",
+        "config": {
+          "max_tokens": 1024,
+          "multimodal": false
+        }
+      },
+      "files": [
+        {
+          "name": "gemma4-e4b-coreml.zip",
+          "url": "https://huggingface.co/mlboydaisuke/gemma-4-E4B-coreml/resolve/main/gemma4-e4b-coreml.zip",
+          "archive": "zip",
+          "size_bytes": 5500000000,
+          "sha256": "TODO",
+          "compute_units": "cpuAndNeuralEngine",
+          "kind": "model"
+        }
+      ],
+      "requirements": {
+        "min_ios": "18.0",
+        "min_ram_mb": 3000,
+        "device_capabilities": [
+          "arm64"
+        ]
+      },
+      "license": {
+        "name": "Gemma",
+        "url": "https://ai.google.dev/gemma/terms"
+      },
+      "upstream": {
+        "name": "google/gemma-3n-E4B-it",
+        "url": "https://huggingface.co/google/gemma-3n-E4B-it",
+        "year": 2025
+      }
+    },
+    {
+      "id": "qwen3.5-2b",
+      "name": "Qwen3.5 2B",
+      "subtitle": "Alibaba Qwen, 2025",
+      "category_id": "llm",
+      "description_md": "Hybrid Gated-DeltaNet SSM + attention architecture, shipped as 4 INT8 chunks to fit the iPhone ANE single-mlprogram budget. ~17 tok/s decode on iPhone 17 Pro with ~200 MB RSS — exceptional memory efficiency for a 2B-param model. Text-only. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
+      "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/qwen3_5.jpg",
+      "demo": {
+        "template": "chat",
+        "config": {
+          "max_tokens": 1024,
+          "multimodal": false
+        }
+      },
+      "files": [
+        {
+          "name": "qwen3.5-2B-CoreML.zip",
+          "url": "https://huggingface.co/mlboydaisuke/qwen3.5-2B-CoreML/resolve/main/qwen3.5-2B-CoreML.zip",
+          "archive": "zip",
+          "size_bytes": 2400000000,
+          "sha256": "TODO",
+          "compute_units": "cpuAndNeuralEngine",
+          "kind": "model"
+        }
+      ],
+      "requirements": {
+        "min_ios": "18.0",
+        "min_ram_mb": 1200,
+        "device_capabilities": [
+          "arm64"
+        ]
+      },
+      "license": {
+        "name": "Apache-2.0",
+        "url": "https://www.apache.org/licenses/LICENSE-2.0"
+      },
+      "upstream": {
+        "name": "Qwen/Qwen3.5-2B",
+        "url": "https://huggingface.co/Qwen/Qwen3.5-2B",
+        "year": 2025
+      }
+    },
+    {
+      "id": "qwen3.5-0.8b",
+      "name": "Qwen3.5 0.8B",
+      "subtitle": "Alibaba Qwen, 2025",
+      "category_id": "llm",
+      "description_md": "Compact hybrid SSM+attention model, INT8 palettized (same semantic precision as fp16 — 100% top-3 parity vs fp32 oracle). ~20 tok/s decode on iPhone 17 Pro. Smallest and fastest option in the CoreML-LLM lineup. Text-only. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
+      "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/qwen3_5.jpg",
+      "demo": {
+        "template": "chat",
+        "config": {
+          "max_tokens": 1024,
+          "multimodal": false
+        }
+      },
+      "files": [
+        {
+          "name": "qwen3.5-0.8B-CoreML.zip",
+          "url": "https://huggingface.co/mlboydaisuke/qwen3.5-0.8B-CoreML/resolve/main/qwen3.5-0.8B-CoreML.zip",
+          "archive": "zip",
+          "size_bytes": 754000000,
+          "sha256": "TODO",
+          "compute_units": "cpuAndNeuralEngine",
+          "kind": "model"
+        }
+      ],
+      "requirements": {
+        "min_ios": "18.0",
+        "min_ram_mb": 600,
+        "device_capabilities": [
+          "arm64"
+        ]
+      },
+      "license": {
+        "name": "Apache-2.0",
+        "url": "https://www.apache.org/licenses/LICENSE-2.0"
+      },
+      "upstream": {
+        "name": "Qwen/Qwen3.5-0.8B",
+        "url": "https://huggingface.co/Qwen/Qwen3.5-0.8B",
+        "year": 2025
+      }
+    },
+    {
+      "id": "qwen3-vl-2b",
+      "name": "Qwen3-VL 2B",
+      "subtitle": "Alibaba Qwen, 2025",
+      "category_id": "llm",
+      "description_md": "Qwen3-VL multimodal model — **text + image** input (DeepStack injection at L0/1/2, interleaved mRoPE for image tokens, 196 image tokens). 28-layer GQA text backbone, 6 INT8 body chunks + fp16 embed sidecar. ~7.5 tok/s decode on iPhone 17 Pro. Powered by [CoreML-LLM](https://github.com/john-rocky/CoreML-LLM) v1.3.",
+      "thumbnail_url": "https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/thumbnails/qwen3_vl.jpg",
+      "demo": {
+        "template": "chat",
+        "config": {
+          "max_tokens": 1024,
+          "multimodal": true
+        }
+      },
+      "files": [
+        {
+          "name": "qwen3-vl-2b-coreml.zip",
+          "url": "https://huggingface.co/mlboydaisuke/qwen3-vl-2b-coreml/resolve/main/qwen3-vl-2b-coreml.zip",
+          "archive": "zip",
+          "size_bytes": 4700000000,
+          "sha256": "TODO",
+          "compute_units": "cpuAndNeuralEngine",
+          "kind": "model"
+        }
+      ],
+      "requirements": {
+        "min_ios": "18.0",
+        "min_ram_mb": 2500,
+        "device_capabilities": [
+          "arm64"
+        ]
+      },
+      "license": {
+        "name": "Apache-2.0",
+        "url": "https://www.apache.org/licenses/LICENSE-2.0"
+      },
+      "upstream": {
+        "name": "Qwen/Qwen3-VL-2B-Instruct",
+        "url": "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct",
         "year": 2025
       }
     },