mlboydaisuke commited on May 3

Commit

d3bc0fd

verified ·

1 Parent(s): c4e5e2c

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
README.md +148 -0
audio.mlmodelc/analytics/coremldata.bin +3 -0
audio.mlmodelc/coremldata.bin +3 -0
audio.mlmodelc/metadata.json +84 -0
audio.mlmodelc/model.mil +0 -0
audio.mlmodelc/weights/weight.bin +3 -0
audio_config.json +19 -0
chunk1.mlmodelc/analytics/coremldata.bin +3 -0
chunk1.mlmodelc/coremldata.bin +3 -0
chunk1.mlmodelc/model.mil +0 -0
chunk1.mlmodelc/weights/weight.bin +3 -0
chunk2.mlmodelc/analytics/coremldata.bin +3 -0
chunk2.mlmodelc/coremldata.bin +3 -0
chunk2.mlmodelc/model.mil +0 -0
chunk2.mlmodelc/weights/weight.bin +3 -0
chunk2_3way.mlmodelc/analytics/coremldata.bin +3 -0
chunk2_3way.mlmodelc/coremldata.bin +3 -0
chunk2_3way.mlmodelc/metadata.json +285 -0
chunk2_3way.mlmodelc/model.mil +0 -0
chunk2_3way.mlmodelc/weights/weight.bin +3 -0
chunk3.mlmodelc/analytics/coremldata.bin +3 -0
chunk3.mlmodelc/coremldata.bin +3 -0
chunk3.mlmodelc/model.mil +0 -0
chunk3.mlmodelc/weights/weight.bin +3 -0
chunk3_3way.mlmodelc/analytics/coremldata.bin +3 -0
chunk3_3way.mlmodelc/coremldata.bin +3 -0
chunk3_3way.mlmodelc/metadata.json +224 -0
chunk3_3way.mlmodelc/model.mil +0 -0
chunk3_3way.mlmodelc/weights/weight.bin +3 -0
chunk4.mlmodelc/analytics/coremldata.bin +3 -0
chunk4.mlmodelc/coremldata.bin +3 -0
chunk4.mlmodelc/model.mil +0 -0
chunk4.mlmodelc/weights/weight.bin +3 -0
cos_full.npy +3 -0
cos_sliding.npy +3 -0
embed_proj_weight.npy +3 -0
embed_tokens_per_layer_q8.bin +3 -0
embed_tokens_per_layer_scales.bin +3 -0
embed_tokens_q8.bin +3 -0
embed_tokens_scales.bin +3 -0
hf_model/config.json +197 -0
hf_model/generation_config.json +14 -0
hf_model/tokenizer.json +3 -0
hf_model/tokenizer_config.json +74 -0
mel_filterbank.bin +3 -0
model_config.json +26 -0
output_proj_bias.npy +3 -0
output_proj_weight.npy +3 -0
per_layer_norm_weight.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+hf_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,148 @@

+---
+license: gemma
+library_name: coreml
+base_model: google/gemma-4-E4B-it
+tags:
+  - coreml
+  - apple-silicon
+  - ane
+  - on-device
+  - gemma-4
+  - multimodal
+  - vision
+  - audio
+pipeline_tag: image-text-to-text
+---
+## Use it from Swift
+<!-- swift-usage-begin -->
+### Add the package
+`Package.swift`:
+```swift
+.package(url: "https://github.com/john-rocky/CoreML-LLM", branch: "main"),
+// In your target:
+.product(name: "CoreMLLLM", package: "CoreML-LLM"),
+```
+Platforms: iOS 18+ / macOS 15+.
+### Download + chat (one call, text + image + audio)
+```swift
+import CoreMLLLM
+// First call pulls the bundle from this repo to Documents/Models/.
+let llm = try await CoreMLLLM.load(repo: "mlboydaisuke/gemma-4-E4B-multimodal-coreml")
+// Text-only
+let stream = try await llm.generate(
+    [CoreMLLLM.Message(role: .user, content: "Hello!")],
+    maxTokens: 256
+)
+for await chunk in stream { print(chunk, terminator: "") }
+// Image + text
+let image: CGImage = // ... your image
+let stream2 = try await llm.generate(
+    [CoreMLLLM.Message(role: .user, content: "Describe this picture.")],
+    image: image, maxTokens: 256)
+// Audio + text (16 kHz mono PCM Float)
+let pcm: [Float] = // ... your audio samples
+let stream3 = try await llm.generate(
+    [CoreMLLLM.Message(role: .user, content: "What language is this?")],
+    audio: pcm, maxTokens: 256)
+```
+Set the Xcode scheme env var `LLM_VISION_FORCE_ANE=1` to route the vision encoder through the Apple Neural Engine (built ANE-targeted, 256 tokens per image at the LM hidden dim).
+<!-- swift-usage-end -->
+# Gemma 4 E4B (multimodal) — Core ML (INT4, Apple Neural Engine)
+Core ML port of [`google/gemma-4-E4B-it`](https://huggingface.co/google/gemma-4-E4B-it) with vision (still image), video, and audio (Conformer) encoders. Sliding-window-attention chunks targeting Apple Neural Engine; vision encoder is ANE-targeted; audio runs on GPU + a small Swift/Accelerate projection sidecar.
+**iPhone 17 Pro validated 2026-05-03** — text decode **15.7 tok/s** with correct outputs across all four input modalities (text / image / video / audio).
+Built from [`john-rocky/CoreML-LLM`](https://github.com/john-rocky/CoreML-LLM); see [`docs/E4B_MULTIMODAL_BUILD.md`](https://github.com/john-rocky/CoreML-LLM/blob/main/docs/E4B_MULTIMODAL_BUILD.md) for the full reproduction guide and [`scripts/assemble_gemma4_e4b_multimodal.sh`](https://github.com/john-rocky/CoreML-LLM/blob/main/scripts/assemble_gemma4_e4b_multimodal.sh) for the assembly script.
+## Files
+```
+# Decode chunks (3-chunk Topology II — auto-detected by ChunkedEngine)
+chunk1.mlmodelc/                # L0-11   — own KV
+chunk2_3way.mlmodelc/           # L12-32  — merged 21 layers (own + KV-shared internal)
+chunk3_3way.mlmodelc/           # L33-41 + lm_head + argmax
+# Prefill chunks (legacy 4-chunk with prefill_b8 multifunction inside)
+chunk2.mlmodelc/                # L12-22  prefill (own KV writes via recurrent shift)
+chunk3.mlmodelc/                # L23-32  prefill (KV-shared)
+chunk4.mlmodelc/                # L33-41  prefill + lm_head
+# Vision encoder (ANE-targeted)
+vision.ane.mlmodelc/            # SigLIP, output [1, 256, 2560]
+# Audio encoder + Swift projection sidecars
+audio.mlmodelc/                 # Conformer, output [1, 50, 1024]
+audio_config.json
+mel_filterbank.bin
+output_proj_weight.npy          # 1024 -> 1536 (audio_soft_token_size)
+output_proj_bias.npy
+embed_proj_weight.npy           # 1536 -> 2560 (LM hidden) — E4B-specific shape
+# Token / per-layer embeddings (mmap'd, dequantised on demand by Swift)
+embed_tokens_q8.bin             640 MB  — INT8 token embeddings (262144 x 2560)
+embed_tokens_scales.bin         512 KB
+embed_tokens_per_layer_q8.bin   2.6 GB  — INT8 per-layer embeddings (PLE)
+embed_tokens_per_layer_scales.bin 512 KB
+per_layer_projection.bin        53 MB
+per_layer_norm_weight.bin       512 B
+# RoPE cos/sin tables (pre-baked, mmap'd)
+cos_sliding.npy / sin_sliding.npy
+cos_full.npy    / sin_full.npy
+# Tokenizer + runtime config
+hf_model/
+  tokenizer.json, tokenizer_config.json, config.json, generation_config.json
+model_config.json
+```
+Total bundle size: **~7.6 GB**.
+## Engine path on iPhone (what runs where)
+| Stage | Compute | Files used |
+|---|---|---|
+| Token / PLE embed lookup | Swift CPU (mmap) | `embed_tokens*.bin`, `per_layer_*.bin` |
+| Decode (T=1) | ANE | `chunk1` + `chunk2_3way` + `chunk3_3way` |
+| Prefill (batched, T=8) | ANE | `chunk1` + `chunk2` + `chunk3` + `chunk4` (`prefill_b8` multifunction) |
+| Vision encoder | ANE | `vision.ane.mlmodelc` (with `LLM_VISION_FORCE_ANE=1`) |
+| Audio encoder | GPU | `audio.mlmodelc` |
+| Audio projection (1024 → 1536 → 2560) | Swift / Accelerate | `output_proj_*.npy`, `embed_proj_weight.npy` |
+The Swift runtime auto-detects Topology II by the presence of `chunk2_3way` + `chunk3_3way` and routes prefill through the legacy 4-chunk `prefill_b8` multifunction (the engine's `fillBatchMasksVisionAware` keeps bidirectional within-image attention working at `T=8` batches).
+## Why so many sidecars (vs a single `model.mlpackage`)?
+Gemma 4 E-series uses a per-layer embedding (PLE) bank that's much larger than the token embedding (2.6 GB vs 640 MB for E4B). Loading PLE through Core ML would dequantize the entire bank into the CPU heap and blow up `phys_footprint`. We mmap the raw INT8 + scale `.bin` files instead, dequantize the few rows touched per token in pure Swift, and feed the result to the chunks. The chunks themselves are pure transformer bodies and stay ANE-resident.
+The `.npy` RoPE tables are pre-baked at conversion-time so Swift doesn't need to ship a `cos`/`sin` builder.
+The audio Swift projection (`output_proj_*` / `embed_proj_weight`) lives outside the ANE because of a Core ML GPU runtime bug with `RMSNorm(with_scale=False)` that produces all-zero outputs. Sgemm in Accelerate is fast enough on CPU.
+## Tokenizer
+The Gemma 4 SentencePiece tokenizer ships in `hf_model/`. Three multimodal placeholder token IDs:
+- `<|image|>` = 258880 — image-pad span (256 per still image)
+- `<|audio|>` = 258881 — audio-pad span (~188 per 2 sec)
+- `<|video|>` = 258884 — video-pad span (64 per frame)
+Vision encoder output rows replace `<|image|>`/`<|video|>` rows during prefill (and per-token at decode for tail spans). Audio output rows replace `<|audio|>`. `per_layer_raw` is forced to zero at multimodal positions — the chunks compute `per_layer_combined` entirely from the spliced hidden state.
+## License
+This is a derivative work of `google/gemma-4-E4B-it`. Use is governed by the [Gemma Terms of Use](https://ai.google.dev/gemma/terms). Vision / audio extensions inherit the same license.

audio.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea515c53f416101ef42bce8f1a9ac1be59d838914747ecba22b70ead41039ee5
+size 243

audio.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cda961a2bf20e093c8fe82e55240512265ee10f1cf48078033ebc972298750b9
+size 390

audio.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,84 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Int32, Palettized (10 bits), Palettized (11 bits), Palettized (4 bits), Palettized (9 bits), UInt4)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 50 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[1, 50, 1024]",
+        "name" : "hidden_states",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 1,
+      "Ios18.mul" : 312,
+      "Ios18.softmax" : 12,
+      "Ios18.matmul" : 36,
+      "Ios16.reduceMean" : 108,
+      "Ios18.sigmoid" : 12,
+      "Split" : 12,
+      "Select" : 12,
+      "Ios18.add" : 168,
+      "Ios18.layerNorm" : 2,
+      "Ios18.reshape" : 109,
+      "Pad" : 60,
+      "Ios18.constexprLutToDense" : 134,
+      "Ios18.linear" : 121,
+      "Ios18.conv" : 14,
+      "Ios18.relu" : 2,
+      "Ios18.clip" : 312,
+      "Ios18.silu" : 36,
+      "Stack" : 24,
+      "Ios18.pow" : 216,
+      "Ios18.cast" : 540,
+      "Ios18.transpose" : 75,
+      "Ios18.tanh" : 12,
+      "Ios18.sliceByIndex" : 144
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-04-30",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 200 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 200, 128]",
+        "name" : "input_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "audio",
+    "method" : "predict"
+  }
+]

audio.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

audio.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37da916ac6ac7911266a9c7532a681e4039aea7ce13bf570d80636b705dc6163
+size 146087488

audio_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "sampling_rate": 16000,
+  "feature_size": 128,
+  "frame_length": 320,
+  "hop_length": 160,
+  "fft_length": 512,
+  "mel_floor": 1e-05,
+  "min_frequency": 0,
+  "max_frequency": 8000,
+  "log_offset": 0.001,
+  "preemphasis": 0.97,
+  "mel_frames": 200,
+  "num_tokens": 50,
+  "audio_token_id": 258881,
+  "boa_token_id": 256000,
+  "eoa_token_id": 258883,
+  "ms_per_token": 40,
+  "quantization": "int4"
+}

chunk1.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb7d0c074925f5e2b23d70754135276d3b38e5bb2ebf89df153a401e37ef2f57
+size 243

chunk1.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b53fc92f6d11bf88eb63b9a7af4a7211180e3c031115a9abfa20655814727d4
+size 1333

chunk1.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

chunk1.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67c868123b2e7b2182d97a0aaca1d4e33f861ee446eac5b038bdb9f0e2c6e787
+size 585970432

chunk2.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c032a454d2eaeea9fd5bfdfe3a2caf53e1a79d401fe8007c986abcc44469a19
+size 243

chunk2.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8046d35bb019573a7541c830b6b712d5176c199f51e2b1fec3a17342d86a3ac6
+size 1471

chunk2.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

chunk2.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6cd92e3945e5e809a15df7a1d9e648fb651e859d733d9589eba817805e2d96d
+size 572196992

chunk2_3way.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:223a79744041af35a291271aca045883b40f5cc88ad1fb9040a2ee0a2a5b25b9
+size 243

chunk2_3way.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:476686c14666d2a23a5e04271a9bb1f2ce006c76ac085370b9f10fe90a05c810
+size 979

chunk2_3way.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,285 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), Palettized (9 bits), UInt4)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2560)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2560]",
+        "name" : "hidden_states_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)",
+        "shortDescription" : "",
+        "shape" : "[10, 2, 512, 512]",
+        "name" : "K_sliding_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)",
+        "shortDescription" : "",
+        "shape" : "[10, 2, 512, 512]",
+        "name" : "V_sliding_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)",
+        "shortDescription" : "",
+        "shape" : "[2, 2, 2048, 512]",
+        "name" : "K_full_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)",
+        "shortDescription" : "",
+        "shape" : "[2, 2, 2048, 512]",
+        "name" : "V_full_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 2, 512, 256]",
+        "name" : "kv13_k",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 2, 512, 256]",
+        "name" : "kv13_v",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 2, 2048, 512]",
+        "name" : "kv14_k",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 2, 2048, 512]",
+        "name" : "kv14_v",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 108,
+      "Ios18.mul" : 457,
+      "Ios18.matmul" : 42,
+      "Ios18.rsqrt" : 12,
+      "Ios18.exp" : 21,
+      "Ios16.reduceMean" : 12,
+      "Ios18.realDiv" : 21,
+      "Split" : 171,
+      "Ios16.reduceMax" : 21,
+      "Tile" : 28,
+      "Ios18.add" : 133,
+      "Ios16.reduceSum" : 21,
+      "Ios18.layerNorm" : 138,
+      "Ios18.reshape" : 180,
+      "Pad" : 20,
+      "Ios18.constexprLutToDense" : 171,
+      "Ios18.conv" : 171,
+      "Ios18.concat" : 191,
+      "Ios18.transpose" : 306,
+      "Ios18.sub" : 22,
+      "Ios18.pow" : 12,
+      "Ios18.gelu" : 42,
+      "Stack" : 4,
+      "Ios18.sliceByIndex" : 85,
+      "Ios18.squeeze" : 66
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-04-30",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2560)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2560]",
+        "name" : "hidden_states",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 2048]",
+        "name" : "causal_mask_full",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 512]",
+        "name" : "causal_mask_sliding",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2048, 1]",
+        "name" : "update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 10752)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 10752]",
+        "name" : "per_layer_combined",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 256]",
+        "name" : "cos_s",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 256]",
+        "name" : "sin_s",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 512]",
+        "name" : "cos_f",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 512]",
+        "name" : "sin_f",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)",
+        "shortDescription" : "",
+        "shape" : "[10, 2, 512, 512]",
+        "name" : "K_sliding_in",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)",
+        "shortDescription" : "",
+        "shape" : "[10, 2, 512, 512]",
+        "name" : "V_sliding_in",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)",
+        "shortDescription" : "",
+        "shape" : "[2, 2, 2048, 512]",
+        "name" : "K_full_in",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)",
+        "shortDescription" : "",
+        "shape" : "[2, 2, 2048, 512]",
+        "name" : "V_full_in",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "chunk2_3way",
+    "method" : "predict"
+  }
+]

chunk2_3way.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

chunk2_3way.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2dfcddee4de0905bab42c9dbf6b4d03ec1fc888d76de762dee5153423081b838
+size 984936000

chunk3.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83e107e7d0f531fa20c6861ea0483120a4246adc98daba2bdc9ec015f77bc7ac
+size 243

chunk3.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d43e7d6694b27bec11ece3eb5bb8b3b5b185fa8bc0b668980e154952fd36bf0b
+size 940

chunk3.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

chunk3.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db45f9ce7443de57765ba412a0158ac2ad46a9d2f735fba9376bbdb0aa357b88
+size 412740736

chunk3_3way.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83149a33c6c49a2607a6e038d88d42ea6829bd7ae99bbc67bbba085b983cff48
+size 243

chunk3_3way.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:346fab1c61528cc8fccc0b5f0d65bb8943b6ebafb0f983a1d6cb1361047195d5
+size 780

chunk3_3way.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,224 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (17 bits), Palettized (7 bits), UInt4)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "token_id",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "token_logit",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2560)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2560]",
+        "name" : "hidden_states_out",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 37,
+      "Ios18.mul" : 166,
+      "Ios18.matmul" : 18,
+      "Identity" : 1,
+      "Ios18.exp" : 9,
+      "Ios18.realDiv" : 9,
+      "Split" : 64,
+      "Ios18.gatherAlongAxis" : 1,
+      "Ios16.reduceMax" : 9,
+      "Tile" : 4,
+      "Ios16.reduceSum" : 9,
+      "Ios18.add" : 45,
+      "Ios18.layerNorm" : 55,
+      "Ios18.reduceArgmax" : 1,
+      "Ios18.reshape" : 44,
+      "Ios18.constexprLutToDense" : 64,
+      "Ios18.conv" : 64,
+      "Ios18.gelu" : 18,
+      "Ios18.concat" : 64,
+      "Ios18.sub" : 9,
+      "Ios18.transpose" : 104,
+      "Ios18.tanh" : 1,
+      "Ios18.squeeze" : 20,
+      "Ios18.sliceByIndex" : 9
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-04-30",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2560)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2560]",
+        "name" : "hidden_states",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 2048]",
+        "name" : "causal_mask_full",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 512]",
+        "name" : "causal_mask_sliding",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 2048, 1]",
+        "name" : "update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 10752)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 10752]",
+        "name" : "per_layer_combined",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 256]",
+        "name" : "cos_s",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 256]",
+        "name" : "sin_s",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 512]",
+        "name" : "cos_f",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 512]",
+        "name" : "sin_f",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 2, 512, 256]",
+        "name" : "kv13_k",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 2, 512, 256]",
+        "name" : "kv13_v",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 2, 2048, 512]",
+        "name" : "kv14_k",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 2, 2048, 512]",
+        "name" : "kv14_v",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "chunk3_3way",
+    "method" : "predict"
+  }
+]

chunk3_3way.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

chunk3_3way.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:155bcb0a818cb9f95184346c2cc319980d33f6acf5ec4b14fec14abc61888cd9
+size 753797440

chunk4.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58c1fb89f6c05774b2ac875839fcc1e5c153cc195cfe223ad9ffb42d2d30ea48
+size 243

chunk4.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b78c0ef3198782c4e4060bdf45f682dcb566666e05fee15858a5c2467c05965b
+size 1014

chunk4.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

chunk4.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f044d109750fec5d781baada3c070cd3d524b674ea68a00c1c99b5e8015cfbb
+size 753797440

cos_full.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:767b3a21305a67e3a3fd22e256f2e7385014b32374442b6103fb820c7d9ef1fc
+size 4194432

cos_sliding.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a27afac2d0282008c59736cb498f0b49e6f775e0b3847811fdd06be09c6df4a1
+size 2097280

embed_proj_weight.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cae010a79030ee666136bf5317af76019a87a686bc93947ca1e20535f4a9109
+size 7864448

embed_tokens_per_layer_q8.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:269eb54aa366e2d73474d7521b376025fa53bd2d72c6d3bc0301b0882c6ae681
+size 2818572288

embed_tokens_per_layer_scales.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc32925fb678b96d96cf804b77c1d137f157a6136b76e6f8a003d69f4e976fea
+size 524288

embed_tokens_q8.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8921fcbb6d8a79c7b304b929f357cdb41905fec75a90bad00dfed071c76fb82
+size 671088640

embed_tokens_scales.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5c20211cbfbb5e5a1059d91c5c1e0493e7630505028459e7843a9b1b41ee854
+size 524288

hf_model/config.json ADDED Viewed

	@@ -0,0 +1,197 @@

+{
+  "architectures": [
+    "Gemma4ForConditionalGeneration"
+  ],
+  "audio_config": {
+    "_name_or_path": "",
+    "architectures": null,
+    "attention_chunk_size": 12,
+    "attention_context_left": 13,
+    "attention_context_right": 0,
+    "attention_invalid_logits_value": -1000000000.0,
+    "attention_logit_cap": 50.0,
+    "chunk_size_feed_forward": 0,
+    "conv_kernel_size": 5,
+    "dtype": "bfloat16",
+    "gradient_clipping": 10000000000.0,
+    "hidden_act": "silu",
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_range": 0.02,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "model_type": "gemma4_audio",
+    "num_attention_heads": 8,
+    "num_hidden_layers": 12,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_proj_dims": 1536,
+    "problem_type": null,
+    "residual_weight": 0.5,
+    "return_dict": true,
+    "rms_norm_eps": 1e-06,
+    "subsampling_conv_channels": [
+      128,
+      32
+    ],
+    "use_clipped_linears": true
+  },
+  "audio_token_id": 258881,
+  "boa_token_id": 256000,
+  "boi_token_id": 255999,
+  "dtype": "bfloat16",
+  "eoa_token_id": 258883,
+  "eoa_token_index": 258883,
+  "eoi_token_id": 258882,
+  "eos_token_id": [
+    1,
+    106
+  ],
+  "image_token_id": 258880,
+  "initializer_range": 0.02,
+  "model_type": "gemma4",
+  "text_config": {
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "attention_k_eq_v": false,
+    "bos_token_id": 2,
+    "dtype": "bfloat16",
+    "enable_moe_block": false,
+    "eos_token_id": 1,
+    "expert_intermediate_size": null,
+    "final_logit_softcapping": 30.0,
+    "global_head_dim": 512,
+    "head_dim": 256,
+    "hidden_activation": "gelu_pytorch_tanh",
+    "hidden_size": 2560,
+    "hidden_size_per_layer_input": 256,
+    "initializer_range": 0.02,
+    "intermediate_size": 10240,
+    "layer_types": [
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 131072,
+    "model_type": "gemma4_text",
+    "num_attention_heads": 8,
+    "num_experts": null,
+    "num_global_key_value_heads": null,
+    "num_hidden_layers": 42,
+    "num_key_value_heads": 2,
+    "num_kv_shared_layers": 18,
+    "pad_token_id": 0,
+    "rms_norm_eps": 1e-06,
+    "rope_parameters": {
+      "full_attention": {
+        "partial_rotary_factor": 0.25,
+        "rope_theta": 1000000.0,
+        "rope_type": "proportional"
+      },
+      "sliding_attention": {
+        "rope_theta": 10000.0,
+        "rope_type": "default"
+      }
+    },
+    "sliding_window": 512,
+    "tie_word_embeddings": true,
+    "top_k_experts": null,
+    "use_bidirectional_attention": null,
+    "use_cache": true,
+    "use_double_wide_mlp": false,
+    "vocab_size": 262144,
+    "vocab_size_per_layer_input": 262144
+  },
+  "tie_word_embeddings": true,
+  "transformers_version": "5.5.0.dev0",
+  "video_token_id": 258884,
+  "vision_config": {
+    "_name_or_path": "",
+    "architectures": null,
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "chunk_size_feed_forward": 0,
+    "default_output_length": 280,
+    "dtype": "bfloat16",
+    "global_head_dim": 64,
+    "head_dim": 64,
+    "hidden_activation": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "max_position_embeddings": 131072,
+    "model_type": "gemma4_vision",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 16,
+    "num_key_value_heads": 12,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "patch_size": 16,
+    "pooling_kernel_size": 3,
+    "position_embedding_size": 10240,
+    "problem_type": null,
+    "return_dict": true,
+    "rms_norm_eps": 1e-06,
+    "rope_parameters": {
+      "rope_theta": 100.0,
+      "rope_type": "default"
+    },
+    "standardize": false,
+    "use_clipped_linears": true
+  },
+  "vision_soft_tokens_per_image": 280
+}

hf_model/generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "bos_token_id": 2,
+  "do_sample": true,
+  "eos_token_id": [
+    1,
+    106,
+    50
+  ],
+  "pad_token_id": 0,
+  "temperature": 1.0,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "5.5.0.dev0"
+}

hf_model/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
+size 32169626

hf_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "response_schema": {
+    "type": "object",
+    "properties": {
+      "role": {
+        "const": "assistant"
+      },
+      "thinking": {
+        "type": "string"
+      },
+      "content": {
+        "type": "string"
+      },
+      "tool_calls": {
+        "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
+        "type": "array",
+        "items": {
+          "type": "object",
+          "properties": {
+            "type": {
+              "const": "function"
+            },
+            "function": {
+              "type": "object",
+              "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
+              "properties": {
+                "name": {
+                  "type": "string"
+                },
+                "arguments": {
+                  "type": "object",
+                  "x-parser": "gemma4-tool-call",
+                  "additionalProperties": {}
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
+  },
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}

mel_filterbank.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:427860b9a9429175f0e450512def4224f46ced89960dfb1d9cf7479d7e485e2b
+size 131584

model_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "model_name": "gemma4-e4b",
+  "architecture": "gemma4",
+  "hidden_size": 2560,
+  "num_hidden_layers": 42,
+  "num_attention_heads": 8,
+  "num_key_value_heads": 2,
+  "head_dim": 256,
+  "global_head_dim": 512,
+  "vocab_size": 262144,
+  "context_length": 2048,
+  "sliding_window": 512,
+  "per_layer_dim": 256,
+  "num_layers": 42,
+  "embed_scale": 50.59644256269407,
+  "per_layer_embed_scale": 16.0,
+  "per_layer_model_projection_scale": 0.01976423537605237,
+  "per_layer_input_scale": 0.7071067811865475,
+  "rms_norm_eps": 1e-06,
+  "bos_token_id": 2,
+  "eos_token_id": 1,
+  "final_logit_softcapping": 30.0,
+  "quantization": "int4",
+  "compute_units": "CPU_AND_NE",
+  "tokenizer_repo": "google/gemma-4-E4B-it"
+}

output_proj_bias.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5f564508f25bb143480d3dd9071c5deb8340f7c9b35582fefd55815ab355597
+size 3200

output_proj_weight.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:074449599eed804bc8fe0e83e96d535109c1f9f9a3c51df7c3a232d67502e285
+size 3145856

per_layer_norm_weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff67a6ed2e1ac597c28467653c4d67ecd4018f668b1d667af95e564539bd4c10
+size 512