aoiandroid mlboydaisuke commited on
Commit
71c57fe
·
0 Parent(s):

Duplicate from mlboydaisuke/gemma-4-E2B-coreml

Browse files

Co-authored-by: Majima <mlboydaisuke@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +44 -0
  2. README.md +176 -0
  3. audio.mlmodelc/analytics/coremldata.bin +3 -0
  4. audio.mlmodelc/coremldata.bin +3 -0
  5. audio.mlmodelc/metadata.json +84 -0
  6. audio.mlmodelc/model.mil +0 -0
  7. audio.mlmodelc/weights/weight.bin +3 -0
  8. audio_config.json +19 -0
  9. chunk1.mlmodelc/analytics/coremldata.bin +3 -0
  10. chunk1.mlmodelc/coremldata.bin +3 -0
  11. chunk1.mlmodelc/metadata.json +162 -0
  12. chunk1.mlmodelc/model.mil +0 -0
  13. chunk1.mlmodelc/weights/weight.bin +3 -0
  14. chunk2.mlmodelc/analytics/coremldata.bin +3 -0
  15. chunk2.mlmodelc/coremldata.bin +3 -0
  16. chunk2.mlmodelc/metadata.json +202 -0
  17. chunk2.mlmodelc/model.mil +0 -0
  18. chunk2.mlmodelc/weights/weight.bin +3 -0
  19. chunk3.mlmodelc/analytics/coremldata.bin +3 -0
  20. chunk3.mlmodelc/coremldata.bin +3 -0
  21. chunk3.mlmodelc/metadata.json +190 -0
  22. chunk3.mlmodelc/model.mil +0 -0
  23. chunk3.mlmodelc/weights/weight.bin +3 -0
  24. cos_full.npy +3 -0
  25. cos_sliding.npy +3 -0
  26. embed_proj_weight.npy +3 -0
  27. embed_tokens_per_layer_q8.bin +3 -0
  28. embed_tokens_per_layer_scales.bin +3 -0
  29. embed_tokens_q8.bin +3 -0
  30. embed_tokens_scales.bin +3 -0
  31. hf_model/config.json +190 -0
  32. hf_model/tokenizer.json +3 -0
  33. hf_model/tokenizer_config.json +74 -0
  34. lite-chunks/chunk1.mlmodelc/analytics/coremldata.bin +3 -0
  35. lite-chunks/chunk1.mlmodelc/coremldata.bin +3 -0
  36. lite-chunks/chunk1.mlmodelc/model.mil +0 -0
  37. lite-chunks/chunk1.mlmodelc/weights/weight.bin +3 -0
  38. lite-chunks/chunk2.mlmodelc/analytics/coremldata.bin +3 -0
  39. lite-chunks/chunk2.mlmodelc/coremldata.bin +3 -0
  40. lite-chunks/chunk2.mlmodelc/model.mil +0 -0
  41. lite-chunks/chunk2.mlmodelc/weights/weight.bin +3 -0
  42. lite-chunks/model_config.json +18 -0
  43. lite/model.mlmodelc/analytics/coremldata.bin +3 -0
  44. lite/model.mlmodelc/coremldata.bin +3 -0
  45. lite/model.mlmodelc/model.mil +0 -0
  46. lite/model.mlmodelc/weights/weight.bin +3 -0
  47. lite/model_config.json +17 -0
  48. mel_filterbank.bin +3 -0
  49. mf/chunk1.mlmodelc/analytics/coremldata.bin +3 -0
  50. mf/chunk1.mlmodelc/coremldata.bin +3 -0
.gitattributes ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ hf_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ sdpa-8k/swa/chunk1.mlmodelc/model.mil filter=lfs diff=lfs merge=lfs -text
38
+ sdpa-8k/swa/chunk2.mlmodelc/model.mil filter=lfs diff=lfs merge=lfs -text
39
+ sdpa-8k/swa/chunk3.mlmodelc/model.mil filter=lfs diff=lfs merge=lfs -text
40
+ sdpa-8k/swa/chunk4.mlmodelc/model.mil filter=lfs diff=lfs merge=lfs -text
41
+ sdpa-8k/prefill/prefill_chunk1.mlmodelc/model.mil filter=lfs diff=lfs merge=lfs -text
42
+ sdpa-8k/prefill/prefill_chunk2.mlmodelc/model.mil filter=lfs diff=lfs merge=lfs -text
43
+ sdpa-8k/prefill/prefill_chunk3.mlmodelc/model.mil filter=lfs diff=lfs merge=lfs -text
44
+ sdpa-8k/prefill/prefill_chunk4.mlmodelc/model.mil filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gemma
3
+ library_name: coreml
4
+ base_model: google/gemma-4-E2B-it
5
+ tags:
6
+ - coreml
7
+ - apple-silicon
8
+ - ane
9
+ - on-device
10
+ - gemma-4
11
+ - gemma-3n
12
+ - multimodal
13
+ - vision
14
+ - audio
15
+ pipeline_tag: image-text-to-text
16
+ ---
17
+
18
+ ## Use it from Swift
19
+
20
+ <!-- swift-usage-begin -->
21
+ ### Add the package
22
+
23
+ `Package.swift`:
24
+
25
+ ```swift
26
+ .package(url: "https://github.com/john-rocky/CoreML-LLM", branch: "main"),
27
+
28
+ // In your target:
29
+ .product(name: "CoreMLLLM", package: "CoreML-LLM"),
30
+ ```
31
+
32
+ Platforms: iOS 18+ / macOS 15+.
33
+
34
+ ### Download + chat (one call)
35
+
36
+ ```swift
37
+ import CoreMLLLM
38
+
39
+ let llm = try await CoreMLLLM.load(repo: "mlboydaisuke/gemma-4-E2B-coreml")
40
+
41
+ let stream = try await llm.generate(
42
+ [CoreMLLLM.Message(role: .user, content: "Hello!")],
43
+ maxTokens: 256
44
+ )
45
+ for await chunk in stream { print(chunk, terminator: "") }
46
+ ```
47
+
48
+ ### Image / video / audio
49
+
50
+ ```swift
51
+ // Image
52
+ let stream = try await llm.generate(
53
+ [CoreMLLLM.Message(role: .user,
54
+ content: "Describe this image")],
55
+ image: cgImage)
56
+
57
+ // Video (frames + audio extracted internally)
58
+ let stream = try await llm.generate(
59
+ [CoreMLLLM.Message(role: .user,
60
+ content: "What happens in this clip?")],
61
+ videoURL: localFileURL)
62
+ ```
63
+
64
+ Audio-only and other variants are exposed via the same
65
+ `generate(_:)` overloads — see the Swift file
66
+ [`CoreMLLLM.swift`](https://github.com/john-rocky/CoreML-LLM/blob/main/Sources/CoreMLLLM/CoreMLLLM.swift)
67
+ for the full surface.
68
+ <!-- swift-usage-end -->
69
+
70
+
71
+
72
+ # Gemma 4 E2B — Core ML (ANE multimodal)
73
+
74
+ Core ML port of [`google/gemma-4-E2B-it`](https://huggingface.co/google/gemma-4-E2B-it) (the 2B-effective Gemma 4 / Gemma 3n decoder), optimized for Apple Neural Engine. Text + image + audio + short video, INT4 weights.
75
+
76
+ > **Branches:** `main` is the long-running 4-chunk text+vision+audio bundle. The default ship target for [`CoreMLLLMChat`](https://github.com/john-rocky/CoreML-LLM) v1.6+ is the **`n1024`** branch (3-chunk merged decoder, slightly faster prefill). Both ship the same architecture — only the chunk topology and tokenizer artifacts differ. Pick whichever matches the Swift runtime you're using; if in doubt, use `n1024`.
77
+
78
+ ## Files (root, `n1024` branch — recommended)
79
+
80
+ ```
81
+ chunk1.mlmodelc/ # L0–7 — INT4 palettized
82
+ chunk2_3way.mlmodelc/ # L8–24 — merged middle (3-chunk decoder)
83
+ chunk3_3way.mlmodelc/ # L25–34 + lm_head — multifunction
84
+ prefill_chunk{1..4}.mlmodelc/ # T=N prefill bodies (mlmodelc, weights shared
85
+ # with decode chunks via hardlink)
86
+ vision.mlmodelc/ # SigLIP encoder, 322 MB
87
+ vision_video.mlmodelc/ # video frame encoder (64 tok/frame)
88
+ audio.mlmodelc/ # 282 MB Whisper-style audio encoder
89
+
90
+ embed_tokens_q8.bin 402 MB — INT8 token embeddings (262144 × 1536)
91
+ embed_tokens_scales.bin 512 KB
92
+ embed_tokens_per_layer_q8.bin 2.19 GB — INT8 PLE
93
+ embed_tokens_per_layer_scales.bin 512 KB
94
+ per_layer_projection.bin 26 MB
95
+ per_layer_norm_weight.bin 1 KB
96
+ cos_{full,sliding}.npy 8 MB / 4 MB — precomputed RoPE cos
97
+ sin_{full,sliding}.npy 8 MB / 4 MB — precomputed RoPE sin
98
+ mel_filterbank.bin 129 KB — for audio path
99
+ embed_proj_weight.npy 4.5 MB — vision/audio → text embed projection
100
+ output_proj_{weight,bias}.npy 3 MB / 3 KB — audio output projection
101
+
102
+ model_config.json 434 B — runtime config (hidden=1536, layers=35, …)
103
+ audio_config.json 402 B — audio path config
104
+ hf_model/{tokenizer.json, tokenizer_config.json, config.json}
105
+ ```
106
+
107
+ The `main` branch additionally carries the older 4-chunk topology (`chunk2.mlmodelc` + `chunk3.mlmodelc` + `chunk4.mlmodelc`) and several legacy variant directories (`sdpa/`, `sdpa-8k/`, `swa/`, `stateless/`, `stateless-ctx2048/`, `lite/`, `lite-chunks/`, `mf/`, `w8a8-8k/`, `model.mlmodelc`, `model.mlpackage`). These are research builds — only the `chunk*.mlmodelc` (or `chunk{1,2_3way,3_3way}.mlmodelc`) family is the shipping path.
108
+
109
+ ## Why so many sidecars
110
+
111
+ Gemma 4 / 3n uses a per-layer embedding (PLE) bank that dwarfs the token embedding. Loading PLE through Core ML would dequant the whole 2.19 GB into the CPU heap. Instead, the raw INT8 + scale files are mmap'd in Swift and only the rows actually touched are dequantized on the fly. The chunks themselves stay ANE-resident.
112
+
113
+ `cos`/`sin` `.npy` are pre-baked so the Swift side doesn't ship a RoPE builder.
114
+
115
+ ## Tokenizer
116
+
117
+ Already in `hf_model/`. Or pull from upstream:
118
+
119
+ ```python
120
+ from transformers import AutoTokenizer
121
+ tok = AutoTokenizer.from_pretrained("google/gemma-4-E2B-it")
122
+ ```
123
+
124
+ ## Standalone usage (Python / Mac)
125
+
126
+ ```python
127
+ from huggingface_hub import snapshot_download
128
+ import coremltools as ct, json
129
+
130
+ local = snapshot_download(
131
+ "mlboydaisuke/gemma-4-E2B-coreml", revision="n1024",
132
+ allow_patterns=[
133
+ "chunk1.mlmodelc/*", "chunk2_3way.mlmodelc/*", "chunk3_3way.mlmodelc/*",
134
+ "prefill_chunk*.mlmodelc/*",
135
+ "embed_tokens*.bin", "per_layer_*.bin",
136
+ "cos_*.npy", "sin_*.npy",
137
+ "model_config.json", "hf_model/*",
138
+ ],
139
+ )
140
+ cfg = json.load(open(f"{local}/model_config.json"))
141
+ chunks = [
142
+ ct.models.MLModel(f"{local}/chunk1.mlmodelc"),
143
+ ct.models.MLModel(f"{local}/chunk2_3way.mlmodelc"),
144
+ ct.models.MLModel(f"{local}/chunk3_3way.mlmodelc"),
145
+ ]
146
+ ```
147
+
148
+ For a working end-to-end loop (PLE dequant, vision/audio injection, KV alias plumbing), see [`Sources/CoreMLLLM/ChunkedEngine.swift`](https://github.com/john-rocky/CoreML-LLM/blob/main/Sources/CoreMLLLM/ChunkedEngine.swift) — the canonical reference.
149
+
150
+ ## Vision / Audio
151
+
152
+ - `vision.mlmodelc` expects `pixel_values (1, 3, 256, 256)` fp16, outputs `(1, 256, 1536)` text-aligned tokens.
153
+ - `audio.mlmodelc` expects mel-spectrogram features (use `mel_filterbank.bin` for the front-end), outputs an audio token stream injected into the same text decoder.
154
+ - `vision_video.mlmodelc` packs 64 tokens per frame for short video.
155
+
156
+ ## iOS / Mac app
157
+
158
+ Pick **Gemma 4 E2B** in [`CoreMLLLMChat`](https://github.com/john-rocky/CoreML-LLM/tree/main/Examples/CoreMLLLMChat) — it auto-downloads this repo (the picker fetches the `n1024` branch by default) and runs it via `ChunkedEngine`.
159
+
160
+ ## Architecture
161
+
162
+ | | value |
163
+ |---|---:|
164
+ | `num_hidden_layers` | 35 |
165
+ | `hidden_size` | 1536 |
166
+ | `num_key_value_heads` | 1 |
167
+ | `intermediate_size` | 6144 |
168
+ | `num_kv_shared_layers` | 20 |
169
+ | KV producers (sliding/full) | L13 / L14 |
170
+ | sliding window | 512 |
171
+ | context length (shipping) | 1024 (n1024) / 2048 (main) |
172
+ | vocab | 262144 |
173
+
174
+ ## License
175
+
176
+ Inherits the [Gemma terms of use](https://ai.google.dev/gemma/terms).
audio.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787af9d90589dada22454417de6cd75a7dd63d782ea231dc88acc1d37213184b
3
+ size 243
audio.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f22e4537a4a60b0fa8843657072b2d5d9751b9027366167fc32ccf3137d916
3
+ size 392
audio.mlmodelc/metadata.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Int32, Int8)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 250 × 1536)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 250, 1536]",
13
+ "name" : "audio_features",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 9,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios18.expandDims" : 1,
23
+ "Ios18.mul" : 313,
24
+ "Ios18.softmax" : 12,
25
+ "Ios18.matmul" : 36,
26
+ "Ios16.reduceMean" : 109,
27
+ "Ios18.sigmoid" : 12,
28
+ "Split" : 12,
29
+ "Select" : 12,
30
+ "Ios18.add" : 169,
31
+ "Ios18.layerNorm" : 2,
32
+ "Ios18.reshape" : 109,
33
+ "Pad" : 60,
34
+ "Ios18.linear" : 123,
35
+ "Ios18.conv" : 14,
36
+ "Ios18.relu" : 2,
37
+ "Ios18.clip" : 312,
38
+ "Ios18.silu" : 36,
39
+ "Stack" : 24,
40
+ "Ios18.pow" : 218,
41
+ "Ios18.transpose" : 75,
42
+ "Ios18.cast" : 144,
43
+ "Ios18.tanh" : 12,
44
+ "Ios18.sliceByIndex" : 528,
45
+ "Ios18.constexprBlockwiseShiftScale" : 148
46
+ },
47
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
48
+ "isUpdatable" : "0",
49
+ "stateSchema" : [
50
+
51
+ ],
52
+ "availability" : {
53
+ "macOS" : "15.0",
54
+ "tvOS" : "18.0",
55
+ "visionOS" : "2.0",
56
+ "watchOS" : "11.0",
57
+ "iOS" : "18.0",
58
+ "macCatalyst" : "18.0"
59
+ },
60
+ "modelType" : {
61
+ "name" : "MLModelType_mlProgram"
62
+ },
63
+ "userDefinedMetadata" : {
64
+ "com.github.apple.coremltools.conversion_date" : "2026-04-11",
65
+ "com.github.apple.coremltools.source" : "torch==2.11.0",
66
+ "com.github.apple.coremltools.version" : "9.0",
67
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
68
+ },
69
+ "inputSchema" : [
70
+ {
71
+ "hasShapeFlexibility" : "0",
72
+ "isOptional" : "0",
73
+ "dataType" : "Float16",
74
+ "formattedType" : "MultiArray (Float16 1 × 1000 × 128)",
75
+ "shortDescription" : "",
76
+ "shape" : "[1, 1000, 128]",
77
+ "name" : "input_features",
78
+ "type" : "MultiArray"
79
+ }
80
+ ],
81
+ "generatedClassName" : "audio",
82
+ "method" : "predict"
83
+ }
84
+ ]
audio.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
audio.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e067cc94f6f826d3d8f150cea14ba086b7a3555bffa72ffb76a0743ff98038
3
+ size 295373248
audio_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sampling_rate": 16000,
3
+ "feature_size": 128,
4
+ "frame_length": 320,
5
+ "hop_length": 160,
6
+ "fft_length": 512,
7
+ "mel_floor": 1e-05,
8
+ "min_frequency": 0,
9
+ "max_frequency": 8000,
10
+ "log_offset": 0.001,
11
+ "preemphasis": 0.97,
12
+ "mel_frames": 1000,
13
+ "num_tokens": 250,
14
+ "audio_token_id": 258881,
15
+ "boa_token_id": 256000,
16
+ "eoa_token_id": 258883,
17
+ "ms_per_token": 40,
18
+ "quantization": "int8"
19
+ }
chunk1.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd907c451c760d6a5bd6cbb2e2aea4d4385b452435f53b7bdec2dcb76a6f65fc
3
+ size 243
chunk1.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c581491e5aef9c16869f261f5ccc31a404a2152054b71459531d19908b0766da
3
+ size 616
chunk1.mlmodelc/metadata.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (12 bits), Palettized (7 bits), Palettized (8 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1536]",
13
+ "name" : "hidden_states_out",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 9,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios18.expandDims" : 48,
23
+ "Ios18.mul" : 335,
24
+ "Ios18.softmax" : 12,
25
+ "Ios18.matmul" : 24,
26
+ "Ios16.reduceMean" : 12,
27
+ "Split" : 108,
28
+ "Ios18.readState" : 24,
29
+ "Tile" : 48,
30
+ "Ios18.add" : 108,
31
+ "Ios18.writeState" : 24,
32
+ "Ios18.sliceUpdate" : 24,
33
+ "Ios18.layerNorm" : 84,
34
+ "Pad" : 20,
35
+ "Ios18.reshape" : 144,
36
+ "Ios18.constexprLutToDense" : 108,
37
+ "Ios18.conv" : 84,
38
+ "Ios18.concat" : 108,
39
+ "Ios18.transpose" : 180,
40
+ "Ios18.sub" : 1,
41
+ "Ios18.pow" : 24,
42
+ "Ios18.gelu" : 24,
43
+ "Ios18.linear" : 24,
44
+ "Ios18.sliceByIndex" : 56,
45
+ "Ios18.squeeze" : 36
46
+ },
47
+ "computePrecision" : "Mixed (Float16, Int32)",
48
+ "isUpdatable" : "0",
49
+ "stateSchema" : [
50
+ {
51
+ "dataType" : "Float16",
52
+ "isOptional" : "0",
53
+ "formattedType" : "State (Float16 24 × 1 × 512 × 512)",
54
+ "shortDescription" : "",
55
+ "shape" : "[24, 1, 512, 512]",
56
+ "name" : "kv_cache_0",
57
+ "type" : "State"
58
+ }
59
+ ],
60
+ "availability" : {
61
+ "macOS" : "15.0",
62
+ "tvOS" : "18.0",
63
+ "visionOS" : "2.0",
64
+ "watchOS" : "11.0",
65
+ "iOS" : "18.0",
66
+ "macCatalyst" : "18.0"
67
+ },
68
+ "modelType" : {
69
+ "name" : "MLModelType_mlProgram"
70
+ },
71
+ "userDefinedMetadata" : {
72
+ "com.github.apple.coremltools.conversion_date" : "2026-04-09",
73
+ "com.github.apple.coremltools.source" : "torch==2.11.0",
74
+ "com.github.apple.coremltools.version" : "9.0",
75
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
76
+ },
77
+ "inputSchema" : [
78
+ {
79
+ "hasShapeFlexibility" : "0",
80
+ "isOptional" : "0",
81
+ "dataType" : "Float16",
82
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
83
+ "shortDescription" : "",
84
+ "shape" : "[1, 1, 1536]",
85
+ "name" : "hidden_states",
86
+ "type" : "MultiArray"
87
+ },
88
+ {
89
+ "hasShapeFlexibility" : "0",
90
+ "isOptional" : "0",
91
+ "dataType" : "Float16",
92
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
93
+ "shortDescription" : "",
94
+ "shape" : "[1, 1, 8960]",
95
+ "name" : "per_layer_combined",
96
+ "type" : "MultiArray"
97
+ },
98
+ {
99
+ "hasShapeFlexibility" : "0",
100
+ "isOptional" : "0",
101
+ "dataType" : "Float16",
102
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
103
+ "shortDescription" : "",
104
+ "shape" : "[1, 1, 1, 256]",
105
+ "name" : "cos_s",
106
+ "type" : "MultiArray"
107
+ },
108
+ {
109
+ "hasShapeFlexibility" : "0",
110
+ "isOptional" : "0",
111
+ "dataType" : "Float16",
112
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
113
+ "shortDescription" : "",
114
+ "shape" : "[1, 1, 1, 256]",
115
+ "name" : "sin_s",
116
+ "type" : "MultiArray"
117
+ },
118
+ {
119
+ "hasShapeFlexibility" : "0",
120
+ "isOptional" : "0",
121
+ "dataType" : "Float16",
122
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
123
+ "shortDescription" : "",
124
+ "shape" : "[1, 1, 1, 512]",
125
+ "name" : "cos_f",
126
+ "type" : "MultiArray"
127
+ },
128
+ {
129
+ "hasShapeFlexibility" : "0",
130
+ "isOptional" : "0",
131
+ "dataType" : "Float16",
132
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
133
+ "shortDescription" : "",
134
+ "shape" : "[1, 1, 1, 512]",
135
+ "name" : "sin_f",
136
+ "type" : "MultiArray"
137
+ },
138
+ {
139
+ "hasShapeFlexibility" : "0",
140
+ "isOptional" : "0",
141
+ "dataType" : "Float16",
142
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
143
+ "shortDescription" : "",
144
+ "shape" : "[1, 1, 1, 512]",
145
+ "name" : "causal_mask",
146
+ "type" : "MultiArray"
147
+ },
148
+ {
149
+ "hasShapeFlexibility" : "0",
150
+ "isOptional" : "0",
151
+ "dataType" : "Float16",
152
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 1)",
153
+ "shortDescription" : "",
154
+ "shape" : "[1, 1, 512, 1]",
155
+ "name" : "update_mask",
156
+ "type" : "MultiArray"
157
+ }
158
+ ],
159
+ "generatedClassName" : "chunk1",
160
+ "method" : "predict"
161
+ }
162
+ ]
chunk1.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk1.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aef2217c8492f349cee65f12f53a6fcced7410d00844b119209c8567c03cadac
3
+ size 224593536
chunk2.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515f00967033a2e15bc723d7713d5008382ae7600841855da258a2525c712818
3
+ size 243
chunk2.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a017df2d9900f0a32b1b949e3af4d144c056383f12772be20fd6c12986eb638
3
+ size 720
chunk2.mlmodelc/metadata.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (12 bits), Palettized (13 bits), Palettized (7 bits), Palettized (8 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1536]",
13
+ "name" : "hidden_states_out",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 1, 512, 256]",
23
+ "name" : "kv13_k",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 1, 512, 256]",
33
+ "name" : "kv13_v",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 512)",
41
+ "shortDescription" : "",
42
+ "shape" : "[1, 1, 512, 512]",
43
+ "name" : "kv14_k",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 512)",
51
+ "shortDescription" : "",
52
+ "shape" : "[1, 1, 512, 512]",
53
+ "name" : "kv14_v",
54
+ "type" : "MultiArray"
55
+ }
56
+ ],
57
+ "modelParameters" : [
58
+
59
+ ],
60
+ "specificationVersion" : 9,
61
+ "mlProgramOperationTypeHistogram" : {
62
+ "Ios18.expandDims" : 30,
63
+ "Ios18.mul" : 246,
64
+ "Ios18.softmax" : 12,
65
+ "Ios18.matmul" : 24,
66
+ "Ios16.reduceMean" : 3,
67
+ "Split" : 90,
68
+ "Ios18.readState" : 6,
69
+ "Tile" : 12,
70
+ "Ios18.add" : 72,
71
+ "Ios18.writeState" : 6,
72
+ "Ios18.sliceUpdate" : 6,
73
+ "Ios18.layerNorm" : 75,
74
+ "Pad" : 4,
75
+ "Ios18.reshape" : 72,
76
+ "Ios18.constexprLutToDense" : 90,
77
+ "Ios18.conv" : 66,
78
+ "Ios18.concat" : 90,
79
+ "Ios18.transpose" : 108,
80
+ "Ios18.sub" : 1,
81
+ "Ios18.pow" : 6,
82
+ "Ios18.gelu" : 24,
83
+ "Ios18.linear" : 24,
84
+ "Ios18.sliceByIndex" : 22,
85
+ "Ios18.squeeze" : 18
86
+ },
87
+ "computePrecision" : "Mixed (Float16, Int32)",
88
+ "isUpdatable" : "0",
89
+ "stateSchema" : [
90
+ {
91
+ "dataType" : "Float16",
92
+ "isOptional" : "0",
93
+ "formattedType" : "State (Float16 24 × 1 × 512 × 512)",
94
+ "shortDescription" : "",
95
+ "shape" : "[24, 1, 512, 512]",
96
+ "name" : "kv_cache_0",
97
+ "type" : "State"
98
+ }
99
+ ],
100
+ "availability" : {
101
+ "macOS" : "15.0",
102
+ "tvOS" : "18.0",
103
+ "visionOS" : "2.0",
104
+ "watchOS" : "11.0",
105
+ "iOS" : "18.0",
106
+ "macCatalyst" : "18.0"
107
+ },
108
+ "modelType" : {
109
+ "name" : "MLModelType_mlProgram"
110
+ },
111
+ "userDefinedMetadata" : {
112
+ "com.github.apple.coremltools.conversion_date" : "2026-04-09",
113
+ "com.github.apple.coremltools.source" : "torch==2.11.0",
114
+ "com.github.apple.coremltools.version" : "9.0",
115
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
116
+ },
117
+ "inputSchema" : [
118
+ {
119
+ "hasShapeFlexibility" : "0",
120
+ "isOptional" : "0",
121
+ "dataType" : "Float16",
122
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
123
+ "shortDescription" : "",
124
+ "shape" : "[1, 1, 1536]",
125
+ "name" : "hidden_states",
126
+ "type" : "MultiArray"
127
+ },
128
+ {
129
+ "hasShapeFlexibility" : "0",
130
+ "isOptional" : "0",
131
+ "dataType" : "Float16",
132
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
133
+ "shortDescription" : "",
134
+ "shape" : "[1, 1, 8960]",
135
+ "name" : "per_layer_combined",
136
+ "type" : "MultiArray"
137
+ },
138
+ {
139
+ "hasShapeFlexibility" : "0",
140
+ "isOptional" : "0",
141
+ "dataType" : "Float16",
142
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
143
+ "shortDescription" : "",
144
+ "shape" : "[1, 1, 1, 256]",
145
+ "name" : "cos_s",
146
+ "type" : "MultiArray"
147
+ },
148
+ {
149
+ "hasShapeFlexibility" : "0",
150
+ "isOptional" : "0",
151
+ "dataType" : "Float16",
152
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
153
+ "shortDescription" : "",
154
+ "shape" : "[1, 1, 1, 256]",
155
+ "name" : "sin_s",
156
+ "type" : "MultiArray"
157
+ },
158
+ {
159
+ "hasShapeFlexibility" : "0",
160
+ "isOptional" : "0",
161
+ "dataType" : "Float16",
162
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
163
+ "shortDescription" : "",
164
+ "shape" : "[1, 1, 1, 512]",
165
+ "name" : "cos_f",
166
+ "type" : "MultiArray"
167
+ },
168
+ {
169
+ "hasShapeFlexibility" : "0",
170
+ "isOptional" : "0",
171
+ "dataType" : "Float16",
172
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
173
+ "shortDescription" : "",
174
+ "shape" : "[1, 1, 1, 512]",
175
+ "name" : "sin_f",
176
+ "type" : "MultiArray"
177
+ },
178
+ {
179
+ "hasShapeFlexibility" : "0",
180
+ "isOptional" : "0",
181
+ "dataType" : "Float16",
182
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
183
+ "shortDescription" : "",
184
+ "shape" : "[1, 1, 1, 512]",
185
+ "name" : "causal_mask",
186
+ "type" : "MultiArray"
187
+ },
188
+ {
189
+ "hasShapeFlexibility" : "0",
190
+ "isOptional" : "0",
191
+ "dataType" : "Float16",
192
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 1)",
193
+ "shortDescription" : "",
194
+ "shape" : "[1, 1, 512, 1]",
195
+ "name" : "update_mask",
196
+ "type" : "MultiArray"
197
+ }
198
+ ],
199
+ "generatedClassName" : "chunk2",
200
+ "method" : "predict"
201
+ }
202
+ ]
chunk2.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk2.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30f8e0ff131ba884c980f3731b67a55b021f3f1b58194cb8a36320cf8bbf81b
3
+ size 348158016
chunk3.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:433b8d279ee1280cff500e24de8f31c7f10881a49cd34c3f64137511704613ae
3
+ size 243
chunk3.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8de33ff562cb4083c21f249f283ba21da97e8ce9f970a6dee338680915c46d8
3
+ size 672
chunk3.mlmodelc/metadata.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float16, Palettized (10 bits), Palettized (11 bits), Palettized (13 bits), Palettized (17 bits), Palettized (7 bits), UInt4)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Int32",
10
+ "formattedType" : "MultiArray (Int32 1)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1]",
13
+ "name" : "token_id",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1]",
23
+ "name" : "token_logit",
24
+ "type" : "MultiArray"
25
+ }
26
+ ],
27
+ "modelParameters" : [
28
+
29
+ ],
30
+ "specificationVersion" : 9,
31
+ "mlProgramOperationTypeHistogram" : {
32
+ "Ios18.expandDims" : 23,
33
+ "Ios18.softmax" : 11,
34
+ "Ios18.mul" : 202,
35
+ "Ios18.matmul" : 22,
36
+ "Split" : 78,
37
+ "Ios18.gatherAlongAxis" : 1,
38
+ "Tile" : 4,
39
+ "Ios18.add" : 55,
40
+ "Ios18.reduceArgmax" : 1,
41
+ "Ios18.layerNorm" : 67,
42
+ "Ios18.reshape" : 52,
43
+ "Ios18.constexprLutToDense" : 78,
44
+ "Ios18.linear" : 22,
45
+ "Ios18.conv" : 56,
46
+ "Ios18.gelu" : 22,
47
+ "Ios18.concat" : 78,
48
+ "Ios18.tanh" : 1,
49
+ "Ios18.transpose" : 91,
50
+ "Ios18.sliceByIndex" : 11,
51
+ "Ios18.squeeze" : 13
52
+ },
53
+ "computePrecision" : "Mixed (Float16, Int32)",
54
+ "isUpdatable" : "0",
55
+ "stateSchema" : [
56
+
57
+ ],
58
+ "availability" : {
59
+ "macOS" : "15.0",
60
+ "tvOS" : "18.0",
61
+ "visionOS" : "2.0",
62
+ "watchOS" : "11.0",
63
+ "iOS" : "18.0",
64
+ "macCatalyst" : "18.0"
65
+ },
66
+ "modelType" : {
67
+ "name" : "MLModelType_mlProgram"
68
+ },
69
+ "userDefinedMetadata" : {
70
+ "com.github.apple.coremltools.conversion_date" : "2026-04-09",
71
+ "com.github.apple.coremltools.source" : "torch==2.11.0",
72
+ "com.github.apple.coremltools.version" : "9.0",
73
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
74
+ },
75
+ "inputSchema" : [
76
+ {
77
+ "hasShapeFlexibility" : "0",
78
+ "isOptional" : "0",
79
+ "dataType" : "Float16",
80
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1536)",
81
+ "shortDescription" : "",
82
+ "shape" : "[1, 1, 1536]",
83
+ "name" : "hidden_states",
84
+ "type" : "MultiArray"
85
+ },
86
+ {
87
+ "hasShapeFlexibility" : "0",
88
+ "isOptional" : "0",
89
+ "dataType" : "Float16",
90
+ "formattedType" : "MultiArray (Float16 1 × 1 × 8960)",
91
+ "shortDescription" : "",
92
+ "shape" : "[1, 1, 8960]",
93
+ "name" : "per_layer_combined",
94
+ "type" : "MultiArray"
95
+ },
96
+ {
97
+ "hasShapeFlexibility" : "0",
98
+ "isOptional" : "0",
99
+ "dataType" : "Float16",
100
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
101
+ "shortDescription" : "",
102
+ "shape" : "[1, 1, 1, 256]",
103
+ "name" : "cos_s",
104
+ "type" : "MultiArray"
105
+ },
106
+ {
107
+ "hasShapeFlexibility" : "0",
108
+ "isOptional" : "0",
109
+ "dataType" : "Float16",
110
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 256)",
111
+ "shortDescription" : "",
112
+ "shape" : "[1, 1, 1, 256]",
113
+ "name" : "sin_s",
114
+ "type" : "MultiArray"
115
+ },
116
+ {
117
+ "hasShapeFlexibility" : "0",
118
+ "isOptional" : "0",
119
+ "dataType" : "Float16",
120
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
121
+ "shortDescription" : "",
122
+ "shape" : "[1, 1, 1, 512]",
123
+ "name" : "cos_f",
124
+ "type" : "MultiArray"
125
+ },
126
+ {
127
+ "hasShapeFlexibility" : "0",
128
+ "isOptional" : "0",
129
+ "dataType" : "Float16",
130
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
131
+ "shortDescription" : "",
132
+ "shape" : "[1, 1, 1, 512]",
133
+ "name" : "sin_f",
134
+ "type" : "MultiArray"
135
+ },
136
+ {
137
+ "hasShapeFlexibility" : "0",
138
+ "isOptional" : "0",
139
+ "dataType" : "Float16",
140
+ "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
141
+ "shortDescription" : "",
142
+ "shape" : "[1, 1, 1, 512]",
143
+ "name" : "causal_mask",
144
+ "type" : "MultiArray"
145
+ },
146
+ {
147
+ "hasShapeFlexibility" : "0",
148
+ "isOptional" : "0",
149
+ "dataType" : "Float16",
150
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
151
+ "shortDescription" : "",
152
+ "shape" : "[1, 1, 512, 256]",
153
+ "name" : "kv13_k",
154
+ "type" : "MultiArray"
155
+ },
156
+ {
157
+ "hasShapeFlexibility" : "0",
158
+ "isOptional" : "0",
159
+ "dataType" : "Float16",
160
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 256)",
161
+ "shortDescription" : "",
162
+ "shape" : "[1, 1, 512, 256]",
163
+ "name" : "kv13_v",
164
+ "type" : "MultiArray"
165
+ },
166
+ {
167
+ "hasShapeFlexibility" : "0",
168
+ "isOptional" : "0",
169
+ "dataType" : "Float16",
170
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 512)",
171
+ "shortDescription" : "",
172
+ "shape" : "[1, 1, 512, 512]",
173
+ "name" : "kv14_k",
174
+ "type" : "MultiArray"
175
+ },
176
+ {
177
+ "hasShapeFlexibility" : "0",
178
+ "isOptional" : "0",
179
+ "dataType" : "Float16",
180
+ "formattedType" : "MultiArray (Float16 1 × 1 × 512 × 512)",
181
+ "shortDescription" : "",
182
+ "shape" : "[1, 1, 512, 512]",
183
+ "name" : "kv14_v",
184
+ "type" : "MultiArray"
185
+ }
186
+ ],
187
+ "generatedClassName" : "chunk3",
188
+ "method" : "predict"
189
+ }
190
+ ]
chunk3.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
chunk3.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba86daad260a6c05877145540541fd7a87823fb618d15e8cc6ec0d9ab4e570e
3
+ size 561924928
cos_full.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41fa3ab781c1ec56286864598a66e5f8ddaffdd4d570948fb03cc8984c70ecca
3
+ size 1048704
cos_sliding.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:490c6b33d055d55c7ea1153f69945da4c83c1a3bd0da158d7ed3d02bde189a0d
3
+ size 524416
embed_proj_weight.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4e61ba9b02186427d6e20bb20bc40661e6fd21b6bb0527829fb954405e7dbf
3
+ size 4718720
embed_tokens_per_layer_q8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1af367cde6fcb67a48925c0c566b0aac11946248d8e3cfebc4027550947cd8
3
+ size 2348810240
embed_tokens_per_layer_scales.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a439606004e3372158eed3efd4c671cefea056122afb93c546108411ea41b057
3
+ size 524288
embed_tokens_q8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2286db227b17e71fb3908983999876625c811295055457b6f798e0f09969797a
3
+ size 402653184
embed_tokens_scales.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db47a8808d782d3c2b1ab722833b425e0bb4755360409d766acb89a4a8443461
3
+ size 524288
hf_model/config.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": {
6
+ "_name_or_path": "",
7
+ "architectures": null,
8
+ "attention_chunk_size": 12,
9
+ "attention_context_left": 13,
10
+ "attention_context_right": 0,
11
+ "attention_invalid_logits_value": -1000000000.0,
12
+ "attention_logit_cap": 50.0,
13
+ "chunk_size_feed_forward": 0,
14
+ "conv_kernel_size": 5,
15
+ "dtype": "bfloat16",
16
+ "gradient_clipping": 10000000000.0,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 1024,
19
+ "id2label": {
20
+ "0": "LABEL_0",
21
+ "1": "LABEL_1"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "is_encoder_decoder": false,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1
28
+ },
29
+ "model_type": "gemma4_audio",
30
+ "num_attention_heads": 8,
31
+ "num_hidden_layers": 12,
32
+ "output_attentions": false,
33
+ "output_hidden_states": false,
34
+ "output_proj_dims": 1536,
35
+ "problem_type": null,
36
+ "residual_weight": 0.5,
37
+ "return_dict": true,
38
+ "rms_norm_eps": 1e-06,
39
+ "subsampling_conv_channels": [
40
+ 128,
41
+ 32
42
+ ],
43
+ "use_clipped_linears": true
44
+ },
45
+ "audio_token_id": 258881,
46
+ "boa_token_id": 256000,
47
+ "boi_token_id": 255999,
48
+ "dtype": "bfloat16",
49
+ "eoa_token_id": 258883,
50
+ "eoa_token_index": 258883,
51
+ "eoi_token_id": 258882,
52
+ "eos_token_id": [
53
+ 1,
54
+ 106
55
+ ],
56
+ "image_token_id": 258880,
57
+ "initializer_range": 0.02,
58
+ "model_type": "gemma4",
59
+ "text_config": {
60
+ "attention_bias": false,
61
+ "attention_dropout": 0.0,
62
+ "attention_k_eq_v": false,
63
+ "bos_token_id": 2,
64
+ "dtype": "bfloat16",
65
+ "enable_moe_block": false,
66
+ "eos_token_id": 1,
67
+ "expert_intermediate_size": null,
68
+ "final_logit_softcapping": 30.0,
69
+ "global_head_dim": 512,
70
+ "head_dim": 256,
71
+ "hidden_activation": "gelu_pytorch_tanh",
72
+ "hidden_size": 1536,
73
+ "hidden_size_per_layer_input": 256,
74
+ "initializer_range": 0.02,
75
+ "intermediate_size": 6144,
76
+ "layer_types": [
77
+ "sliding_attention",
78
+ "sliding_attention",
79
+ "sliding_attention",
80
+ "sliding_attention",
81
+ "full_attention",
82
+ "sliding_attention",
83
+ "sliding_attention",
84
+ "sliding_attention",
85
+ "sliding_attention",
86
+ "full_attention",
87
+ "sliding_attention",
88
+ "sliding_attention",
89
+ "sliding_attention",
90
+ "sliding_attention",
91
+ "full_attention",
92
+ "sliding_attention",
93
+ "sliding_attention",
94
+ "sliding_attention",
95
+ "sliding_attention",
96
+ "full_attention",
97
+ "sliding_attention",
98
+ "sliding_attention",
99
+ "sliding_attention",
100
+ "sliding_attention",
101
+ "full_attention",
102
+ "sliding_attention",
103
+ "sliding_attention",
104
+ "sliding_attention",
105
+ "sliding_attention",
106
+ "full_attention",
107
+ "sliding_attention",
108
+ "sliding_attention",
109
+ "sliding_attention",
110
+ "sliding_attention",
111
+ "full_attention"
112
+ ],
113
+ "max_position_embeddings": 131072,
114
+ "model_type": "gemma4_text",
115
+ "num_attention_heads": 8,
116
+ "num_experts": null,
117
+ "num_global_key_value_heads": null,
118
+ "num_hidden_layers": 35,
119
+ "num_key_value_heads": 1,
120
+ "num_kv_shared_layers": 20,
121
+ "pad_token_id": 0,
122
+ "rms_norm_eps": 1e-06,
123
+ "rope_parameters": {
124
+ "full_attention": {
125
+ "partial_rotary_factor": 0.25,
126
+ "rope_theta": 1000000.0,
127
+ "rope_type": "proportional"
128
+ },
129
+ "sliding_attention": {
130
+ "rope_theta": 10000.0,
131
+ "rope_type": "default"
132
+ }
133
+ },
134
+ "sliding_window": 512,
135
+ "tie_word_embeddings": true,
136
+ "top_k_experts": null,
137
+ "use_bidirectional_attention": null,
138
+ "use_cache": true,
139
+ "use_double_wide_mlp": true,
140
+ "vocab_size": 262144,
141
+ "vocab_size_per_layer_input": 262144
142
+ },
143
+ "tie_word_embeddings": true,
144
+ "transformers_version": "5.5.0.dev0",
145
+ "video_token_id": 258884,
146
+ "vision_config": {
147
+ "_name_or_path": "",
148
+ "architectures": null,
149
+ "attention_bias": false,
150
+ "attention_dropout": 0.0,
151
+ "chunk_size_feed_forward": 0,
152
+ "default_output_length": 280,
153
+ "dtype": "bfloat16",
154
+ "global_head_dim": 64,
155
+ "head_dim": 64,
156
+ "hidden_activation": "gelu_pytorch_tanh",
157
+ "hidden_size": 768,
158
+ "id2label": {
159
+ "0": "LABEL_0",
160
+ "1": "LABEL_1"
161
+ },
162
+ "initializer_range": 0.02,
163
+ "intermediate_size": 3072,
164
+ "is_encoder_decoder": false,
165
+ "label2id": {
166
+ "LABEL_0": 0,
167
+ "LABEL_1": 1
168
+ },
169
+ "max_position_embeddings": 131072,
170
+ "model_type": "gemma4_vision",
171
+ "num_attention_heads": 12,
172
+ "num_hidden_layers": 16,
173
+ "num_key_value_heads": 12,
174
+ "output_attentions": false,
175
+ "output_hidden_states": false,
176
+ "patch_size": 16,
177
+ "pooling_kernel_size": 3,
178
+ "position_embedding_size": 10240,
179
+ "problem_type": null,
180
+ "return_dict": true,
181
+ "rms_norm_eps": 1e-06,
182
+ "rope_parameters": {
183
+ "rope_theta": 100.0,
184
+ "rope_type": "default"
185
+ },
186
+ "standardize": false,
187
+ "use_clipped_linears": true
188
+ },
189
+ "vision_soft_tokens_per_image": 280
190
+ }
hf_model/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
hf_model/tokenizer_config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
+ "image_token": "<|image|>",
20
+ "mask_token": "<mask>",
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": "<pad>",
23
+ "padding_side": "left",
24
+ "processor_class": "Gemma4Processor",
25
+ "response_schema": {
26
+ "type": "object",
27
+ "properties": {
28
+ "role": {
29
+ "const": "assistant"
30
+ },
31
+ "thinking": {
32
+ "type": "string"
33
+ },
34
+ "content": {
35
+ "type": "string"
36
+ },
37
+ "tool_calls": {
38
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
+ "type": "array",
40
+ "items": {
41
+ "type": "object",
42
+ "properties": {
43
+ "type": {
44
+ "const": "function"
45
+ },
46
+ "function": {
47
+ "type": "object",
48
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
+ "properties": {
50
+ "name": {
51
+ "type": "string"
52
+ },
53
+ "arguments": {
54
+ "type": "object",
55
+ "x-parser": "gemma4-tool-call",
56
+ "additionalProperties": {}
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
62
+ }
63
+ },
64
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
65
+ },
66
+ "soc_token": "<|channel>",
67
+ "sot_token": "<|turn>",
68
+ "stc_token": "<|tool_call>",
69
+ "std_token": "<|tool>",
70
+ "str_token": "<|tool_response>",
71
+ "think_token": "<|think|>",
72
+ "tokenizer_class": "GemmaTokenizer",
73
+ "unk_token": "<unk>"
74
+ }
lite-chunks/chunk1.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f43848234c83768f456ced9021d89bf8a176269e93e6d24917b2cea30b0344e
3
+ size 243
lite-chunks/chunk1.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f153053e142c607b1998c1871ea3cb2dfbb166783093145bee663c4ca84ad28f
3
+ size 678
lite-chunks/chunk1.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
lite-chunks/chunk1.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edcdea405378e1c413487cc23eae7f0d88644ce003b634bcdd3a0bedf9546a86
3
+ size 484892288
lite-chunks/chunk2.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:105d47ff96b357c5a4e44f3fa75fdd2119ae3dcf916beb0c3c26be58d2ff32a6
3
+ size 243
lite-chunks/chunk2.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c6c80ff346337915cd3b66231c73db9381c06db2665dd370ef59767c0d9afbb
3
+ size 633
lite-chunks/chunk2.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
lite-chunks/chunk2.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ade112884788cfa7723fa54f03a4e2f751197ba40441343c63beba5ec08aa3aa
3
+ size 852950784
lite-chunks/model_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gemma4-e2b-lite-chunks",
3
+ "architecture": "gemma4",
4
+ "hidden_size": 1536,
5
+ "num_hidden_layers": 35,
6
+ "context_length": 512,
7
+ "vocab_size": 262144,
8
+ "bos_token_id": 2,
9
+ "eos_token_id": 1,
10
+ "per_layer_dim": 256,
11
+ "embed_scale": 39.191835884530846,
12
+ "per_layer_model_projection_scale": 0.02551551815399144,
13
+ "per_layer_input_scale": 0.7071067811865476,
14
+ "per_layer_embed_scale": 16.0,
15
+ "external_embeddings": true,
16
+ "has_multimodal": true,
17
+ "chunked": true
18
+ }
lite/model.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d76da9c8e80ee8fa80650037f75b23ef97873de075fc4aeeca8355d1f24fe77
3
+ size 243
lite/model.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:926c0207bac21850898edb761b030cfa21743319dbae7b4d4d04e5f0c6b3aebb
3
+ size 588
lite/model.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
lite/model.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a90518966541e73dc4e2477973391dbebc7efa3f8b6f931e4d74362dbedeab1
3
+ size 1337046592
lite/model_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "gemma4-e2b-lite",
3
+ "architecture": "gemma4",
4
+ "hidden_size": 1536,
5
+ "num_hidden_layers": 35,
6
+ "context_length": 512,
7
+ "vocab_size": 262144,
8
+ "bos_token_id": 2,
9
+ "eos_token_id": 1,
10
+ "per_layer_dim": 256,
11
+ "embed_scale": 39.191835884530846,
12
+ "per_layer_model_projection_scale": 0.02551551815399144,
13
+ "per_layer_input_scale": 0.7071067811865476,
14
+ "per_layer_embed_scale": 16.0,
15
+ "external_embeddings": true,
16
+ "has_multimodal": true
17
+ }
mel_filterbank.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427860b9a9429175f0e450512def4224f46ced89960dfb1d9cf7479d7e485e2b
3
+ size 131584
mf/chunk1.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db31b52905612845c8825d4e104673bded13be17a6473905b939241f880646b
3
+ size 243
mf/chunk1.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b145aa3b71ec1af8e83efd69905de1cb606e9d75a51cf0e6ae5481bb0e516bf
3
+ size 1360