mlboydaisuke commited on
Commit
d3bc0fd
·
verified ·
1 Parent(s): c4e5e2c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. README.md +148 -0
  3. audio.mlmodelc/analytics/coremldata.bin +3 -0
  4. audio.mlmodelc/coremldata.bin +3 -0
  5. audio.mlmodelc/metadata.json +84 -0
  6. audio.mlmodelc/model.mil +0 -0
  7. audio.mlmodelc/weights/weight.bin +3 -0
  8. audio_config.json +19 -0
  9. chunk1.mlmodelc/analytics/coremldata.bin +3 -0
  10. chunk1.mlmodelc/coremldata.bin +3 -0
  11. chunk1.mlmodelc/model.mil +0 -0
  12. chunk1.mlmodelc/weights/weight.bin +3 -0
  13. chunk2.mlmodelc/analytics/coremldata.bin +3 -0
  14. chunk2.mlmodelc/coremldata.bin +3 -0
  15. chunk2.mlmodelc/model.mil +0 -0
  16. chunk2.mlmodelc/weights/weight.bin +3 -0
  17. chunk2_3way.mlmodelc/analytics/coremldata.bin +3 -0
  18. chunk2_3way.mlmodelc/coremldata.bin +3 -0
  19. chunk2_3way.mlmodelc/metadata.json +285 -0
  20. chunk2_3way.mlmodelc/model.mil +0 -0
  21. chunk2_3way.mlmodelc/weights/weight.bin +3 -0
  22. chunk3.mlmodelc/analytics/coremldata.bin +3 -0
  23. chunk3.mlmodelc/coremldata.bin +3 -0
  24. chunk3.mlmodelc/model.mil +0 -0
  25. chunk3.mlmodelc/weights/weight.bin +3 -0
  26. chunk3_3way.mlmodelc/analytics/coremldata.bin +3 -0
  27. chunk3_3way.mlmodelc/coremldata.bin +3 -0
  28. chunk3_3way.mlmodelc/metadata.json +224 -0
  29. chunk3_3way.mlmodelc/model.mil +0 -0
  30. chunk3_3way.mlmodelc/weights/weight.bin +3 -0
  31. chunk4.mlmodelc/analytics/coremldata.bin +3 -0
  32. chunk4.mlmodelc/coremldata.bin +3 -0
  33. chunk4.mlmodelc/model.mil +0 -0
  34. chunk4.mlmodelc/weights/weight.bin +3 -0
  35. cos_full.npy +3 -0
  36. cos_sliding.npy +3 -0
  37. embed_proj_weight.npy +3 -0
  38. embed_tokens_per_layer_q8.bin +3 -0
  39. embed_tokens_per_layer_scales.bin +3 -0
  40. embed_tokens_q8.bin +3 -0
  41. embed_tokens_scales.bin +3 -0
  42. hf_model/config.json +197 -0
  43. hf_model/generation_config.json +14 -0
  44. hf_model/tokenizer.json +3 -0
  45. hf_model/tokenizer_config.json +74 -0
  46. mel_filterbank.bin +3 -0
  47. model_config.json +26 -0
  48. output_proj_bias.npy +3 -0
  49. output_proj_weight.npy +3 -0
  50. per_layer_norm_weight.bin +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ hf_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gemma
3
+ library_name: coreml
4
+ base_model: google/gemma-4-E4B-it
5
+ tags:
6
+ - coreml
7
+ - apple-silicon
8
+ - ane
9
+ - on-device
10
+ - gemma-4
11
+ - multimodal
12
+ - vision
13
+ - audio
14
+ pipeline_tag: image-text-to-text
15
+ ---
16
+
17
+ ## Use it from Swift
18
+
19
+ <!-- swift-usage-begin -->
20
+ ### Add the package
21
+
22
+ `Package.swift`:
23
+
24
+ ```swift
25
+ .package(url: "https://github.com/john-rocky/CoreML-LLM", branch: "main"),
26
+
27
+ // In your target:
28
+ .product(name: "CoreMLLLM", package: "CoreML-LLM"),
29
+ ```
30
+
31
+ Platforms: iOS 18+ / macOS 15+.
32
+
33
+ ### Download + chat (one call, text + image + audio)
34
+
35
+ ```swift
36
+ import CoreMLLLM
37
+
38
+ // First call pulls the bundle from this repo to Documents/Models/.
39
+ let llm = try await CoreMLLLM.load(repo: "mlboydaisuke/gemma-4-E4B-multimodal-coreml")
40
+
41
+ // Text-only
42
+ let stream = try await llm.generate(
43
+ [CoreMLLLM.Message(role: .user, content: "Hello!")],
44
+ maxTokens: 256
45
+ )
46
+ for await chunk in stream { print(chunk, terminator: "") }
47
+
48
+ // Image + text
49
+ let image: CGImage = // ... your image
50
+ let stream2 = try await llm.generate(
51
+ [CoreMLLLM.Message(role: .user, content: "Describe this picture.")],
52
+ image: image, maxTokens: 256)
53
+
54
+ // Audio + text (16 kHz mono PCM Float)
55
+ let pcm: [Float] = // ... your audio samples
56
+ let stream3 = try await llm.generate(
57
+ [CoreMLLLM.Message(role: .user, content: "What language is this?")],
58
+ audio: pcm, maxTokens: 256)
59
+ ```
60
+
61
+ Set the Xcode scheme env var `LLM_VISION_FORCE_ANE=1` to route the vision encoder through the Apple Neural Engine (built ANE-targeted, 256 tokens per image at the LM hidden dim).
62
+ <!-- swift-usage-end -->
63
+
64
+ # Gemma 4 E4B (multimodal) — Core ML (INT4, Apple Neural Engine)
65
+
66
+ Core ML port of [`google/gemma-4-E4B-it`](https://huggingface.co/google/gemma-4-E4B-it) with vision (still image), video, and audio (Conformer) encoders. Sliding-window-attention chunks targeting Apple Neural Engine; vision encoder is ANE-targeted; audio runs on GPU + a small Swift/Accelerate projection sidecar.
67
+
68
+ **iPhone 17 Pro validated 2026-05-03** — text decode **15.7 tok/s** with correct outputs across all four input modalities (text / image / video / audio).
69
+
70
+ Built from [`john-rocky/CoreML-LLM`](https://github.com/john-rocky/CoreML-LLM); see [`docs/E4B_MULTIMODAL_BUILD.md`](https://github.com/john-rocky/CoreML-LLM/blob/main/docs/E4B_MULTIMODAL_BUILD.md) for the full reproduction guide and [`scripts/assemble_gemma4_e4b_multimodal.sh`](https://github.com/john-rocky/CoreML-LLM/blob/main/scripts/assemble_gemma4_e4b_multimodal.sh) for the assembly script.
71
+
72
+ ## Files
73
+
74
+ ```
75
+ # Decode chunks (3-chunk Topology II — auto-detected by ChunkedEngine)
76
+ chunk1.mlmodelc/ # L0-11 — own KV
77
+ chunk2_3way.mlmodelc/ # L12-32 — merged 21 layers (own + KV-shared internal)
78
+ chunk3_3way.mlmodelc/ # L33-41 + lm_head + argmax
79
+
80
+ # Prefill chunks (legacy 4-chunk with prefill_b8 multifunction inside)
81
+ chunk2.mlmodelc/ # L12-22 prefill (own KV writes via recurrent shift)
82
+ chunk3.mlmodelc/ # L23-32 prefill (KV-shared)
83
+ chunk4.mlmodelc/ # L33-41 prefill + lm_head
84
+
85
+ # Vision encoder (ANE-targeted)
86
+ vision.ane.mlmodelc/ # SigLIP, output [1, 256, 2560]
87
+
88
+ # Audio encoder + Swift projection sidecars
89
+ audio.mlmodelc/ # Conformer, output [1, 50, 1024]
90
+ audio_config.json
91
+ mel_filterbank.bin
92
+ output_proj_weight.npy # 1024 -> 1536 (audio_soft_token_size)
93
+ output_proj_bias.npy
94
+ embed_proj_weight.npy # 1536 -> 2560 (LM hidden) — E4B-specific shape
95
+
96
+ # Token / per-layer embeddings (mmap'd, dequantised on demand by Swift)
97
+ embed_tokens_q8.bin 640 MB — INT8 token embeddings (262144 x 2560)
98
+ embed_tokens_scales.bin 512 KB
99
+ embed_tokens_per_layer_q8.bin 2.6 GB — INT8 per-layer embeddings (PLE)
100
+ embed_tokens_per_layer_scales.bin 512 KB
101
+ per_layer_projection.bin 53 MB
102
+ per_layer_norm_weight.bin 512 B
103
+
104
+ # RoPE cos/sin tables (pre-baked, mmap'd)
105
+ cos_sliding.npy / sin_sliding.npy
106
+ cos_full.npy / sin_full.npy
107
+
108
+ # Tokenizer + runtime config
109
+ hf_model/
110
+ tokenizer.json, tokenizer_config.json, config.json, generation_config.json
111
+ model_config.json
112
+ ```
113
+
114
+ Total bundle size: **~7.6 GB**.
115
+
116
+ ## Engine path on iPhone (what runs where)
117
+
118
+ | Stage | Compute | Files used |
119
+ |---|---|---|
120
+ | Token / PLE embed lookup | Swift CPU (mmap) | `embed_tokens*.bin`, `per_layer_*.bin` |
121
+ | Decode (T=1) | ANE | `chunk1` + `chunk2_3way` + `chunk3_3way` |
122
+ | Prefill (batched, T=8) | ANE | `chunk1` + `chunk2` + `chunk3` + `chunk4` (`prefill_b8` multifunction) |
123
+ | Vision encoder | ANE | `vision.ane.mlmodelc` (with `LLM_VISION_FORCE_ANE=1`) |
124
+ | Audio encoder | GPU | `audio.mlmodelc` |
125
+ | Audio projection (1024 → 1536 → 2560) | Swift / Accelerate | `output_proj_*.npy`, `embed_proj_weight.npy` |
126
+
127
+ The Swift runtime auto-detects Topology II by the presence of `chunk2_3way` + `chunk3_3way` and routes prefill through the legacy 4-chunk `prefill_b8` multifunction (the engine's `fillBatchMasksVisionAware` keeps bidirectional within-image attention working at `T=8` batches).
128
+
129
+ ## Why so many sidecars (vs a single `model.mlpackage`)?
130
+
131
+ Gemma 4 E-series uses a per-layer embedding (PLE) bank that's much larger than the token embedding (2.6 GB vs 640 MB for E4B). Loading PLE through Core ML would dequantize the entire bank into the CPU heap and blow up `phys_footprint`. We mmap the raw INT8 + scale `.bin` files instead, dequantize the few rows touched per token in pure Swift, and feed the result to the chunks. The chunks themselves are pure transformer bodies and stay ANE-resident.
132
+
133
+ The `.npy` RoPE tables are pre-baked at conversion-time so Swift doesn't need to ship a `cos`/`sin` builder.
134
+
135
+ The audio Swift projection (`output_proj_*` / `embed_proj_weight`) lives outside the ANE because of a Core ML GPU runtime bug with `RMSNorm(with_scale=False)` that produces all-zero outputs. Sgemm in Accelerate is fast enough on CPU.
136
+
137
+ ## Tokenizer
138
+
139
+ The Gemma 4 SentencePiece tokenizer ships in `hf_model/`. Three multimodal placeholder token IDs:
140
+ - `<|image|>` = 258880 — image-pad span (256 per still image)
141
+ - `<|audio|>` = 258881 — audio-pad span (~188 per 2 sec)
142
+ - `<|video|>` = 258884 — video-pad span (64 per frame)
143
+
144
+ Vision encoder output rows replace `<|image|>`/`<|video|>` rows during prefill (and per-token at decode for tail spans). Audio output rows replace `<|audio|>`. `per_layer_raw` is forced to zero at multimodal positions — the chunks compute `per_layer_combined` entirely from the spliced hidden state.
145
+
146
+ ## License
147
+
148
+ This is a derivative work of `google/gemma-4-E4B-it`. Use is governed by the [Gemma Terms of Use](https://ai.google.dev/gemma/terms). Vision / audio extensions inherit the same license.
audio.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea515c53f416101ef42bce8f1a9ac1be59d838914747ecba22b70ead41039ee5
3
+ size 243
audio.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cda961a2bf20e093c8fe82e55240512265ee10f1cf48078033ebc972298750b9
3
+ size 390
audio.mlmodelc/metadata.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Int32, Palettized (10 bits), Palettized (11 bits), Palettized (4 bits), Palettized (9 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 50 × 1024)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 50, 1024]",
13
+ "name" : "hidden_states",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 9,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios18.expandDims" : 1,
23
+ "Ios18.mul" : 312,
24
+ "Ios18.softmax" : 12,
25
+ "Ios18.matmul" : 36,
26
+ "Ios16.reduceMean" : 108,
27
+ "Ios18.sigmoid" : 12,
28
+ "Split" : 12,
29
+ "Select" : 12,
30
+ "Ios18.add" : 168,
31
+ "Ios18.layerNorm" : 2,
32
+ "Ios18.reshape" : 109,
33
+ "Pad" : 60,
34
+ "Ios18.constexprLutToDense" : 134,
35
+ "Ios18.linear" : 121,
36
+ "Ios18.conv" : 14,
37
+ "Ios18.relu" : 2,
38
+ "Ios18.clip" : 312,
39
+ "Ios18.silu" : 36,
40
+ "Stack" : 24,
41
+ "Ios18.pow" : 216,
42
+ "Ios18.cast" : 540,
43
+ "Ios18.transpose" : 75,
44
+ "Ios18.tanh" : 12,
45
+ "Ios18.sliceByIndex" : 144
46
+ },
47
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
48
+ "isUpdatable" : "0",
49
+ "stateSchema" : [
50
+
51
+ ],
52
+ "availability" : {
53
+ "macOS" : "15.0",
54
+ "tvOS" : "18.0",
55
+ "visionOS" : "2.0",
56
+ "watchOS" : "11.0",
57
+ "iOS" : "18.0",
58
+ "macCatalyst" : "18.0"
59
+ },
60
+ "modelType" : {
61
+ "name" : "MLModelType_mlProgram"
62
+ },
63
+ "userDefinedMetadata" : {
64
+ "com.github.apple.coremltools.conversion_date" : "2026-04-30",
65
+ "com.github.apple.coremltools.source" : "torch==2.11.0",
66
+ "com.github.apple.coremltools.version" : "9.0",
67
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
68
+ },
69
+ "inputSchema" : [
70
+ {
71
+ "hasShapeFlexibility" : "0",
72
+ "isOptional" : "0",
73
+ "dataType" : "Float16",
74
+ "formattedType" : "MultiArray (Float16 1 × 200 × 128)",
75
+ "shortDescription" : "",
76
+ "shape" : "[1, 200, 128]",
77
+ "name" : "input_features",
78
+ "type" : "MultiArray"
79
+ }
80
+ ],
81
+ "generatedClassName" : "audio",
82
+ "method" : "predict"
83
+ }
84
+ ]
audio.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
audio.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37da916ac6ac7911266a9c7532a681e4039aea7ce13bf570d80636b705dc6163
3
+ size 146087488
audio_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sampling_rate": 16000,
3
+ "feature_size": 128,
4
+ "frame_length": 320,
5
+ "hop_length": 160,
6
+ "fft_length": 512,
7
+ "mel_floor": 1e-05,
8
+ "min_frequency": 0,
9
+ "max_frequency": 8000,
10
+ "log_offset": 0.001,
11
+ "preemphasis": 0.97,
12
+ "mel_frames": 200,
13
+ "num_tokens": 50,
14
+ "audio_token_id": 258881,
15
+ "boa_token_id": 256000,
16
+ "eoa_token_id": 258883,
17
+ "ms_per_token": 40,
18
+ "quantization": "int4"
19
+ }
chunk1.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb7d0c074925f5e2b23d70754135276d3b38e5bb2ebf89df153a401e37ef2f57
3
+ size 243
chunk1.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b53fc92f6d11bf88eb63b9a7af4a7211180e3c031115a9abfa20655814727d4
3
+ size 1333
chunk1.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk1.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67c868123b2e7b2182d97a0aaca1d4e33f861ee446eac5b038bdb9f0e2c6e787
3
+ size 585970432
chunk2.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c032a454d2eaeea9fd5bfdfe3a2caf53e1a79d401fe8007c986abcc44469a19
3
+ size 243
chunk2.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8046d35bb019573a7541c830b6b712d5176c199f51e2b1fec3a17342d86a3ac6
3
+ size 1471
chunk2.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk2.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6cd92e3945e5e809a15df7a1d9e648fb651e859d733d9589eba817805e2d96d
3
+ size 572196992
chunk2_3way.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:223a79744041af35a291271aca045883b40f5cc88ad1fb9040a2ee0a2a5b25b9
3
+ size 243
chunk2_3way.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476686c14666d2a23a5e04271a9bb1f2ce006c76ac085370b9f10fe90a05c810
3
+ size 979
chunk2_3way.mlmodelc/metadata.json ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), Palettized (9 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2560)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 2560]",
13
+ "name" : "hidden_states_out",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)",
21
+ "shortDescription" : "",
22
+ "shape" : "[10, 2, 512, 512]",
23
+ "name" : "K_sliding_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)",
31
+ "shortDescription" : "",
32
+ "shape" : "[10, 2, 512, 512]",
33
+ "name" : "V_sliding_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)",
41
+ "shortDescription" : "",
42
+ "shape" : "[2, 2, 2048, 512]",
43
+ "name" : "K_full_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 2, 2048, 512]",
53
+ "name" : "V_full_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)",
61
+ "shortDescription" : "",
62
+ "shape" : "[1, 2, 512, 256]",
63
+ "name" : "kv13_k",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 2, 512, 256]",
73
+ "name" : "kv13_v",
74
+ "type" : "MultiArray"
75
+ },
76
+ {
77
+ "hasShapeFlexibility" : "0",
78
+ "isOptional" : "0",
79
+ "dataType" : "Float16",
80
+ "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)",
81
+ "shortDescription" : "",
82
+ "shape" : "[1, 2, 2048, 512]",
83
+ "name" : "kv14_k",
84
+ "type" : "MultiArray"
85
+ },
86
+ {
87
+ "hasShapeFlexibility" : "0",
88
+ "isOptional" : "0",
89
+ "dataType" : "Float16",
90
+ "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)",
91
+ "shortDescription" : "",
92
+ "shape" : "[1, 2, 2048, 512]",
93
+ "name" : "kv14_v",
94
+ "type" : "MultiArray"
95
+ }
96
+ ],
97
+ "modelParameters" : [
98
+
99
+ ],
100
+ "specificationVersion" : 9,
101
+ "mlProgramOperationTypeHistogram" : {
102
+ "Ios18.expandDims" : 108,
103
+ "Ios18.mul" : 457,
104
+ "Ios18.matmul" : 42,
105
+ "Ios18.rsqrt" : 12,
106
+ "Ios18.exp" : 21,
107
+ "Ios16.reduceMean" : 12,
108
+ "Ios18.realDiv" : 21,
109
+ "Split" : 171,
110
+ "Ios16.reduceMax" : 21,
111
+ "Tile" : 28,
112
+ "Ios18.add" : 133,
113
+ "Ios16.reduceSum" : 21,
114
+ "Ios18.layerNorm" : 138,
115
+ "Ios18.reshape" : 180,
116
+ "Pad" : 20,
117
+ "Ios18.constexprLutToDense" : 171,
118
+ "Ios18.conv" : 171,
119
+ "Ios18.concat" : 191,
120
+ "Ios18.transpose" : 306,
121
+ "Ios18.sub" : 22,
122
+ "Ios18.pow" : 12,
123
+ "Ios18.gelu" : 42,
124
+ "Stack" : 4,
125
+ "Ios18.sliceByIndex" : 85,
126
+ "Ios18.squeeze" : 66
127
+ },
128
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
129
+ "isUpdatable" : "0",
130
+ "stateSchema" : [
131
+
132
+ ],
133
+ "availability" : {
134
+ "macOS" : "15.0",
135
+ "tvOS" : "18.0",
136
+ "visionOS" : "2.0",
137
+ "watchOS" : "11.0",
138
+ "iOS" : "18.0",
139
+ "macCatalyst" : "18.0"
140
+ },
141
+ "modelType" : {
142
+ "name" : "MLModelType_mlProgram"
143
+ },
144
+ "userDefinedMetadata" : {
145
+ "com.github.apple.coremltools.conversion_date" : "2026-04-30",
146
+ "com.github.apple.coremltools.source" : "torch==2.11.0",
147
+ "com.github.apple.coremltools.version" : "9.0",
148
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
149
+ },
150
+ "inputSchema" : [
151
+ {
152
+ "hasShapeFlexibility" : "0",
153
+ "isOptional" : "0",
154
+ "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2560)",
156
+ "shortDescription" : "",
157
+ "shape" : "[1, 1, 2560]",
158
+ "name" : "hidden_states",
159
+ "type" : "MultiArray"
160
+ },
161
+ {
162
+ "hasShapeFlexibility" : "0",
163
+ "isOptional" : "0",
164
+ "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
166
+ "shortDescription" : "",
167
+ "shape" : "[1, 1, 1, 2048]",
168
+ "name" : "causal_mask_full",
169
+ "type" : "MultiArray"
170
+ },
171
+ {
172
+ "hasShapeFlexibility" : "0",
173
+ "isOptional" : "0",
174
+ "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
176
+ "shortDescription" : "",
177
+ "shape" : "[1, 1, 1, 512]",
178
+ "name" : "causal_mask_sliding",
179
+ "type" : "MultiArray"
180
+ },
181
+ {
182
+ "hasShapeFlexibility" : "0",
183
+ "isOptional" : "0",
184
+ "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 1)",
186
+ "shortDescription" : "",
187
+ "shape" : "[1, 1, 2048, 1]",
188
+ "name" : "update_mask",
189
+ "type" : "MultiArray"
190
+ },
191
+ {
192
+ "hasShapeFlexibility" : "0",
193
+ "isOptional" : "0",
194
+ "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 1 × 10752)",
196
+ "shortDescription" : "",
197
+ "shape" : "[1, 1, 10752]",
198
+ "name" : "per_layer_combined",
199
+ "type" : "MultiArray"
200
+ },
201
+ {
202
+ "hasShapeFlexibility" : "0",
203
+ "isOptional" : "0",
204
+ "dataType" : "Float16",
205
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
206
+ "shortDescription" : "",
207
+ "shape" : "[1, 1, 1, 256]",
208
+ "name" : "cos_s",
209
+ "type" : "MultiArray"
210
+ },
211
+ {
212
+ "hasShapeFlexibility" : "0",
213
+ "isOptional" : "0",
214
+ "dataType" : "Float16",
215
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
216
+ "shortDescription" : "",
217
+ "shape" : "[1, 1, 1, 256]",
218
+ "name" : "sin_s",
219
+ "type" : "MultiArray"
220
+ },
221
+ {
222
+ "hasShapeFlexibility" : "0",
223
+ "isOptional" : "0",
224
+ "dataType" : "Float16",
225
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
226
+ "shortDescription" : "",
227
+ "shape" : "[1, 1, 1, 512]",
228
+ "name" : "cos_f",
229
+ "type" : "MultiArray"
230
+ },
231
+ {
232
+ "hasShapeFlexibility" : "0",
233
+ "isOptional" : "0",
234
+ "dataType" : "Float16",
235
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
236
+ "shortDescription" : "",
237
+ "shape" : "[1, 1, 1, 512]",
238
+ "name" : "sin_f",
239
+ "type" : "MultiArray"
240
+ },
241
+ {
242
+ "hasShapeFlexibility" : "0",
243
+ "isOptional" : "0",
244
+ "dataType" : "Float16",
245
+ "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)",
246
+ "shortDescription" : "",
247
+ "shape" : "[10, 2, 512, 512]",
248
+ "name" : "K_sliding_in",
249
+ "type" : "MultiArray"
250
+ },
251
+ {
252
+ "hasShapeFlexibility" : "0",
253
+ "isOptional" : "0",
254
+ "dataType" : "Float16",
255
+ "formattedType" : "MultiArray (Float16 10 × 2 × 512 × 512)",
256
+ "shortDescription" : "",
257
+ "shape" : "[10, 2, 512, 512]",
258
+ "name" : "V_sliding_in",
259
+ "type" : "MultiArray"
260
+ },
261
+ {
262
+ "hasShapeFlexibility" : "0",
263
+ "isOptional" : "0",
264
+ "dataType" : "Float16",
265
+ "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)",
266
+ "shortDescription" : "",
267
+ "shape" : "[2, 2, 2048, 512]",
268
+ "name" : "K_full_in",
269
+ "type" : "MultiArray"
270
+ },
271
+ {
272
+ "hasShapeFlexibility" : "0",
273
+ "isOptional" : "0",
274
+ "dataType" : "Float16",
275
+ "formattedType" : "MultiArray (Float16 2 × 2 × 2048 × 512)",
276
+ "shortDescription" : "",
277
+ "shape" : "[2, 2, 2048, 512]",
278
+ "name" : "V_full_in",
279
+ "type" : "MultiArray"
280
+ }
281
+ ],
282
+ "generatedClassName" : "chunk2_3way",
283
+ "method" : "predict"
284
+ }
285
+ ]
chunk2_3way.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk2_3way.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dfcddee4de0905bab42c9dbf6b4d03ec1fc888d76de762dee5153423081b838
3
+ size 984936000
chunk3.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e107e7d0f531fa20c6861ea0483120a4246adc98daba2bdc9ec015f77bc7ac
3
+ size 243
chunk3.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d43e7d6694b27bec11ece3eb5bb8b3b5b185fa8bc0b668980e154952fd36bf0b
3
+ size 940
chunk3.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk3.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db45f9ce7443de57765ba412a0158ac2ad46a9d2f735fba9376bbdb0aa357b88
3
+ size 412740736
chunk3_3way.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83149a33c6c49a2607a6e038d88d42ea6829bd7ae99bbc67bbba085b983cff48
3
+ size 243
chunk3_3way.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:346fab1c61528cc8fccc0b5f0d65bb8943b6ebafb0f983a1d6cb1361047195d5
3
+ size 780
chunk3_3way.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (17 bits), Palettized (7 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Int32",
10
+ "formattedType" : "MultiArray (Int32 1)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1]",
13
+ "name" : "token_id",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1]",
23
+ "name" : "token_logit",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2560)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 1, 2560]",
33
+ "name" : "hidden_states_out",
34
+ "type" : "MultiArray"
35
+ }
36
+ ],
37
+ "modelParameters" : [
38
+
39
+ ],
40
+ "specificationVersion" : 9,
41
+ "mlProgramOperationTypeHistogram" : {
42
+ "Ios18.expandDims" : 37,
43
+ "Ios18.mul" : 166,
44
+ "Ios18.matmul" : 18,
45
+ "Identity" : 1,
46
+ "Ios18.exp" : 9,
47
+ "Ios18.realDiv" : 9,
48
+ "Split" : 64,
49
+ "Ios18.gatherAlongAxis" : 1,
50
+ "Ios16.reduceMax" : 9,
51
+ "Tile" : 4,
52
+ "Ios16.reduceSum" : 9,
53
+ "Ios18.add" : 45,
54
+ "Ios18.layerNorm" : 55,
55
+ "Ios18.reduceArgmax" : 1,
56
+ "Ios18.reshape" : 44,
57
+ "Ios18.constexprLutToDense" : 64,
58
+ "Ios18.conv" : 64,
59
+ "Ios18.gelu" : 18,
60
+ "Ios18.concat" : 64,
61
+ "Ios18.sub" : 9,
62
+ "Ios18.transpose" : 104,
63
+ "Ios18.tanh" : 1,
64
+ "Ios18.squeeze" : 20,
65
+ "Ios18.sliceByIndex" : 9
66
+ },
67
+ "computePrecision" : "Mixed (Float16, Int32)",
68
+ "isUpdatable" : "0",
69
+ "stateSchema" : [
70
+
71
+ ],
72
+ "availability" : {
73
+ "macOS" : "15.0",
74
+ "tvOS" : "18.0",
75
+ "visionOS" : "2.0",
76
+ "watchOS" : "11.0",
77
+ "iOS" : "18.0",
78
+ "macCatalyst" : "18.0"
79
+ },
80
+ "modelType" : {
81
+ "name" : "MLModelType_mlProgram"
82
+ },
83
+ "userDefinedMetadata" : {
84
+ "com.github.apple.coremltools.conversion_date" : "2026-04-30",
85
+ "com.github.apple.coremltools.source" : "torch==2.11.0",
86
+ "com.github.apple.coremltools.version" : "9.0",
87
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
88
+ },
89
+ "inputSchema" : [
90
+ {
91
+ "hasShapeFlexibility" : "0",
92
+ "isOptional" : "0",
93
+ "dataType" : "Float16",
94
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2560)",
95
+ "shortDescription" : "",
96
+ "shape" : "[1, 1, 2560]",
97
+ "name" : "hidden_states",
98
+ "type" : "MultiArray"
99
+ },
100
+ {
101
+ "hasShapeFlexibility" : "0",
102
+ "isOptional" : "0",
103
+ "dataType" : "Float16",
104
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 2048)",
105
+ "shortDescription" : "",
106
+ "shape" : "[1, 1, 1, 2048]",
107
+ "name" : "causal_mask_full",
108
+ "type" : "MultiArray"
109
+ },
110
+ {
111
+ "hasShapeFlexibility" : "0",
112
+ "isOptional" : "0",
113
+ "dataType" : "Float16",
114
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
115
+ "shortDescription" : "",
116
+ "shape" : "[1, 1, 1, 512]",
117
+ "name" : "causal_mask_sliding",
118
+ "type" : "MultiArray"
119
+ },
120
+ {
121
+ "hasShapeFlexibility" : "0",
122
+ "isOptional" : "0",
123
+ "dataType" : "Float16",
124
+ "formattedType" : "MultiArray (Float16 1 × 1 × 2048 × 1)",
125
+ "shortDescription" : "",
126
+ "shape" : "[1, 1, 2048, 1]",
127
+ "name" : "update_mask",
128
+ "type" : "MultiArray"
129
+ },
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float16",
134
+ "formattedType" : "MultiArray (Float16 1 × 1 × 10752)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 10752]",
137
+ "name" : "per_layer_combined",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float16",
144
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
145
+ "shortDescription" : "",
146
+ "shape" : "[1, 1, 1, 256]",
147
+ "name" : "cos_s",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float16",
154
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
155
+ "shortDescription" : "",
156
+ "shape" : "[1, 1, 1, 256]",
157
+ "name" : "sin_s",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float16",
164
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
165
+ "shortDescription" : "",
166
+ "shape" : "[1, 1, 1, 512]",
167
+ "name" : "cos_f",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float16",
174
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
175
+ "shortDescription" : "",
176
+ "shape" : "[1, 1, 1, 512]",
177
+ "name" : "sin_f",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float16",
184
+ "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)",
185
+ "shortDescription" : "",
186
+ "shape" : "[1, 2, 512, 256]",
187
+ "name" : "kv13_k",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float16",
194
+ "formattedType" : "MultiArray (Float16 1 × 2 × 512 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 2, 512, 256]",
197
+ "name" : "kv13_v",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float16",
204
+ "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1, 2, 2048, 512]",
207
+ "name" : "kv14_k",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float16",
214
+ "formattedType" : "MultiArray (Float16 1 × 2 × 2048 × 512)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1, 2, 2048, 512]",
217
+ "name" : "kv14_v",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "chunk3_3way",
222
+ "method" : "predict"
223
+ }
224
+ ]
chunk3_3way.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk3_3way.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:155bcb0a818cb9f95184346c2cc319980d33f6acf5ec4b14fec14abc61888cd9
3
+ size 753797440
chunk4.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58c1fb89f6c05774b2ac875839fcc1e5c153cc195cfe223ad9ffb42d2d30ea48
3
+ size 243
chunk4.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b78c0ef3198782c4e4060bdf45f682dcb566666e05fee15858a5c2467c05965b
3
+ size 1014
chunk4.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk4.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f044d109750fec5d781baada3c070cd3d524b674ea68a00c1c99b5e8015cfbb
3
+ size 753797440
cos_full.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:767b3a21305a67e3a3fd22e256f2e7385014b32374442b6103fb820c7d9ef1fc
3
+ size 4194432
cos_sliding.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a27afac2d0282008c59736cb498f0b49e6f775e0b3847811fdd06be09c6df4a1
3
+ size 2097280
embed_proj_weight.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cae010a79030ee666136bf5317af76019a87a686bc93947ca1e20535f4a9109
3
+ size 7864448
embed_tokens_per_layer_q8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:269eb54aa366e2d73474d7521b376025fa53bd2d72c6d3bc0301b0882c6ae681
3
+ size 2818572288
embed_tokens_per_layer_scales.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc32925fb678b96d96cf804b77c1d137f157a6136b76e6f8a003d69f4e976fea
3
+ size 524288
embed_tokens_q8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8921fcbb6d8a79c7b304b929f357cdb41905fec75a90bad00dfed071c76fb82
3
+ size 671088640
embed_tokens_scales.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c20211cbfbb5e5a1059d91c5c1e0493e7630505028459e7843a9b1b41ee854
3
+ size 524288
hf_model/config.json ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": {
6
+ "_name_or_path": "",
7
+ "architectures": null,
8
+ "attention_chunk_size": 12,
9
+ "attention_context_left": 13,
10
+ "attention_context_right": 0,
11
+ "attention_invalid_logits_value": -1000000000.0,
12
+ "attention_logit_cap": 50.0,
13
+ "chunk_size_feed_forward": 0,
14
+ "conv_kernel_size": 5,
15
+ "dtype": "bfloat16",
16
+ "gradient_clipping": 10000000000.0,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 1024,
19
+ "id2label": {
20
+ "0": "LABEL_0",
21
+ "1": "LABEL_1"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "is_encoder_decoder": false,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1
28
+ },
29
+ "model_type": "gemma4_audio",
30
+ "num_attention_heads": 8,
31
+ "num_hidden_layers": 12,
32
+ "output_attentions": false,
33
+ "output_hidden_states": false,
34
+ "output_proj_dims": 1536,
35
+ "problem_type": null,
36
+ "residual_weight": 0.5,
37
+ "return_dict": true,
38
+ "rms_norm_eps": 1e-06,
39
+ "subsampling_conv_channels": [
40
+ 128,
41
+ 32
42
+ ],
43
+ "use_clipped_linears": true
44
+ },
45
+ "audio_token_id": 258881,
46
+ "boa_token_id": 256000,
47
+ "boi_token_id": 255999,
48
+ "dtype": "bfloat16",
49
+ "eoa_token_id": 258883,
50
+ "eoa_token_index": 258883,
51
+ "eoi_token_id": 258882,
52
+ "eos_token_id": [
53
+ 1,
54
+ 106
55
+ ],
56
+ "image_token_id": 258880,
57
+ "initializer_range": 0.02,
58
+ "model_type": "gemma4",
59
+ "text_config": {
60
+ "attention_bias": false,
61
+ "attention_dropout": 0.0,
62
+ "attention_k_eq_v": false,
63
+ "bos_token_id": 2,
64
+ "dtype": "bfloat16",
65
+ "enable_moe_block": false,
66
+ "eos_token_id": 1,
67
+ "expert_intermediate_size": null,
68
+ "final_logit_softcapping": 30.0,
69
+ "global_head_dim": 512,
70
+ "head_dim": 256,
71
+ "hidden_activation": "gelu_pytorch_tanh",
72
+ "hidden_size": 2560,
73
+ "hidden_size_per_layer_input": 256,
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 10240,
76
+ "layer_types": [
77
+ "sliding_attention",
78
+ "sliding_attention",
79
+ "sliding_attention",
80
+ "sliding_attention",
81
+ "sliding_attention",
82
+ "full_attention",
83
+ "sliding_attention",
84
+ "sliding_attention",
85
+ "sliding_attention",
86
+ "sliding_attention",
87
+ "sliding_attention",
88
+ "full_attention",
89
+ "sliding_attention",
90
+ "sliding_attention",
91
+ "sliding_attention",
92
+ "sliding_attention",
93
+ "sliding_attention",
94
+ "full_attention",
95
+ "sliding_attention",
96
+ "sliding_attention",
97
+ "sliding_attention",
98
+ "sliding_attention",
99
+ "sliding_attention",
100
+ "full_attention",
101
+ "sliding_attention",
102
+ "sliding_attention",
103
+ "sliding_attention",
104
+ "sliding_attention",
105
+ "sliding_attention",
106
+ "full_attention",
107
+ "sliding_attention",
108
+ "sliding_attention",
109
+ "sliding_attention",
110
+ "sliding_attention",
111
+ "sliding_attention",
112
+ "full_attention",
113
+ "sliding_attention",
114
+ "sliding_attention",
115
+ "sliding_attention",
116
+ "sliding_attention",
117
+ "sliding_attention",
118
+ "full_attention"
119
+ ],
120
+ "max_position_embeddings": 131072,
121
+ "model_type": "gemma4_text",
122
+ "num_attention_heads": 8,
123
+ "num_experts": null,
124
+ "num_global_key_value_heads": null,
125
+ "num_hidden_layers": 42,
126
+ "num_key_value_heads": 2,
127
+ "num_kv_shared_layers": 18,
128
+ "pad_token_id": 0,
129
+ "rms_norm_eps": 1e-06,
130
+ "rope_parameters": {
131
+ "full_attention": {
132
+ "partial_rotary_factor": 0.25,
133
+ "rope_theta": 1000000.0,
134
+ "rope_type": "proportional"
135
+ },
136
+ "sliding_attention": {
137
+ "rope_theta": 10000.0,
138
+ "rope_type": "default"
139
+ }
140
+ },
141
+ "sliding_window": 512,
142
+ "tie_word_embeddings": true,
143
+ "top_k_experts": null,
144
+ "use_bidirectional_attention": null,
145
+ "use_cache": true,
146
+ "use_double_wide_mlp": false,
147
+ "vocab_size": 262144,
148
+ "vocab_size_per_layer_input": 262144
149
+ },
150
+ "tie_word_embeddings": true,
151
+ "transformers_version": "5.5.0.dev0",
152
+ "video_token_id": 258884,
153
+ "vision_config": {
154
+ "_name_or_path": "",
155
+ "architectures": null,
156
+ "attention_bias": false,
157
+ "attention_dropout": 0.0,
158
+ "chunk_size_feed_forward": 0,
159
+ "default_output_length": 280,
160
+ "dtype": "bfloat16",
161
+ "global_head_dim": 64,
162
+ "head_dim": 64,
163
+ "hidden_activation": "gelu_pytorch_tanh",
164
+ "hidden_size": 768,
165
+ "id2label": {
166
+ "0": "LABEL_0",
167
+ "1": "LABEL_1"
168
+ },
169
+ "initializer_range": 0.02,
170
+ "intermediate_size": 3072,
171
+ "is_encoder_decoder": false,
172
+ "label2id": {
173
+ "LABEL_0": 0,
174
+ "LABEL_1": 1
175
+ },
176
+ "max_position_embeddings": 131072,
177
+ "model_type": "gemma4_vision",
178
+ "num_attention_heads": 12,
179
+ "num_hidden_layers": 16,
180
+ "num_key_value_heads": 12,
181
+ "output_attentions": false,
182
+ "output_hidden_states": false,
183
+ "patch_size": 16,
184
+ "pooling_kernel_size": 3,
185
+ "position_embedding_size": 10240,
186
+ "problem_type": null,
187
+ "return_dict": true,
188
+ "rms_norm_eps": 1e-06,
189
+ "rope_parameters": {
190
+ "rope_theta": 100.0,
191
+ "rope_type": "default"
192
+ },
193
+ "standardize": false,
194
+ "use_clipped_linears": true
195
+ },
196
+ "vision_soft_tokens_per_image": 280
197
+ }
hf_model/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106,
7
+ 50
8
+ ],
9
+ "pad_token_id": 0,
10
+ "temperature": 1.0,
11
+ "top_k": 64,
12
+ "top_p": 0.95,
13
+ "transformers_version": "5.5.0.dev0"
14
+ }
hf_model/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
hf_model/tokenizer_config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
+ "image_token": "<|image|>",
20
+ "mask_token": "<mask>",
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": "<pad>",
23
+ "padding_side": "left",
24
+ "processor_class": "Gemma4Processor",
25
+ "response_schema": {
26
+ "type": "object",
27
+ "properties": {
28
+ "role": {
29
+ "const": "assistant"
30
+ },
31
+ "thinking": {
32
+ "type": "string"
33
+ },
34
+ "content": {
35
+ "type": "string"
36
+ },
37
+ "tool_calls": {
38
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
+ "type": "array",
40
+ "items": {
41
+ "type": "object",
42
+ "properties": {
43
+ "type": {
44
+ "const": "function"
45
+ },
46
+ "function": {
47
+ "type": "object",
48
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
+ "properties": {
50
+ "name": {
51
+ "type": "string"
52
+ },
53
+ "arguments": {
54
+ "type": "object",
55
+ "x-parser": "gemma4-tool-call",
56
+ "additionalProperties": {}
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
62
+ }
63
+ },
64
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
65
+ },
66
+ "soc_token": "<|channel>",
67
+ "sot_token": "<|turn>",
68
+ "stc_token": "<|tool_call>",
69
+ "std_token": "<|tool>",
70
+ "str_token": "<|tool_response>",
71
+ "think_token": "<|think|>",
72
+ "tokenizer_class": "GemmaTokenizer",
73
+ "unk_token": "<unk>"
74
+ }
mel_filterbank.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427860b9a9429175f0e450512def4224f46ced89960dfb1d9cf7479d7e485e2b
3
+ size 131584
model_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gemma4-e4b",
3
+ "architecture": "gemma4",
4
+ "hidden_size": 2560,
5
+ "num_hidden_layers": 42,
6
+ "num_attention_heads": 8,
7
+ "num_key_value_heads": 2,
8
+ "head_dim": 256,
9
+ "global_head_dim": 512,
10
+ "vocab_size": 262144,
11
+ "context_length": 2048,
12
+ "sliding_window": 512,
13
+ "per_layer_dim": 256,
14
+ "num_layers": 42,
15
+ "embed_scale": 50.59644256269407,
16
+ "per_layer_embed_scale": 16.0,
17
+ "per_layer_model_projection_scale": 0.01976423537605237,
18
+ "per_layer_input_scale": 0.7071067811865475,
19
+ "rms_norm_eps": 1e-06,
20
+ "bos_token_id": 2,
21
+ "eos_token_id": 1,
22
+ "final_logit_softcapping": 30.0,
23
+ "quantization": "int4",
24
+ "compute_units": "CPU_AND_NE",
25
+ "tokenizer_repo": "google/gemma-4-E4B-it"
26
+ }
output_proj_bias.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f564508f25bb143480d3dd9071c5deb8340f7c9b35582fefd55815ab355597
3
+ size 3200
output_proj_weight.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:074449599eed804bc8fe0e83e96d535109c1f9f9a3c51df7c3a232d67502e285
3
+ size 3145856
per_layer_norm_weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff67a6ed2e1ac597c28467653c4d67ecd4018f668b1d667af95e564539bd4c10
3
+ size 512