Snider Cladius Maximus commited on
Commit
e9d378b
·
1 Parent(s): a935485

fix: apply proven configs from mlx-community/unsloth references

Browse files

Fresh weights from full multimodal Google base model.
Configs matched against repos with millions of downloads.

Co-Authored-By: Cladius Maximus <cladius@lethean.io>

README.md CHANGED
@@ -1,7 +1,9 @@
1
  ---
2
- language: en
3
- tags:
4
- - mlx
5
  library_name: mlx
 
 
6
  pipeline_tag: text-generation
 
 
 
7
  ---
 
1
  ---
 
 
 
2
  library_name: mlx
3
+ license: apache-2.0
4
+ license_link: https://ai.google.dev/gemma/docs/gemma_4_license
5
  pipeline_tag: text-generation
6
+ tags:
7
+ - mlx
8
+ base_model: google/gemma-4-26b-a4b-it
9
  ---
config.json CHANGED
@@ -10,11 +10,7 @@
10
  "eoa_token_id": 258883,
11
  "eoa_token_index": 258883,
12
  "eoi_token_id": 258882,
13
- "eos_token_id": [
14
- 1,
15
- 106,
16
- 50
17
- ],
18
  "image_token_id": 258880,
19
  "initializer_range": 0.02,
20
  "model_type": "gemma4",
@@ -22,164 +18,644 @@
22
  "group_size": 64,
23
  "bits": 8,
24
  "mode": "affine",
25
- "language_model.model.layers.0.router.proj": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "group_size": 64,
27
  "bits": 8
28
  },
29
- "language_model.model.layers.1.router.proj": {
30
  "group_size": 64,
31
  "bits": 8
32
  },
33
- "language_model.model.layers.2.router.proj": {
34
  "group_size": 64,
35
  "bits": 8
36
  },
37
- "language_model.model.layers.3.router.proj": {
38
  "group_size": 64,
39
  "bits": 8
40
  },
41
- "language_model.model.layers.4.router.proj": {
42
  "group_size": 64,
43
  "bits": 8
44
  },
45
- "language_model.model.layers.5.router.proj": {
46
  "group_size": 64,
47
  "bits": 8
48
  },
49
- "language_model.model.layers.6.router.proj": {
50
  "group_size": 64,
51
  "bits": 8
52
  },
53
- "language_model.model.layers.7.router.proj": {
54
  "group_size": 64,
55
  "bits": 8
56
  },
57
- "language_model.model.layers.8.router.proj": {
58
  "group_size": 64,
59
  "bits": 8
60
  },
61
- "language_model.model.layers.9.router.proj": {
62
  "group_size": 64,
63
  "bits": 8
64
  },
65
- "language_model.model.layers.10.router.proj": {
66
  "group_size": 64,
67
  "bits": 8
68
  },
69
- "language_model.model.layers.11.router.proj": {
70
  "group_size": 64,
71
  "bits": 8
72
  },
73
- "language_model.model.layers.12.router.proj": {
74
  "group_size": 64,
75
  "bits": 8
76
  },
77
- "language_model.model.layers.13.router.proj": {
78
  "group_size": 64,
79
  "bits": 8
80
  },
81
- "language_model.model.layers.14.router.proj": {
82
  "group_size": 64,
83
  "bits": 8
84
  },
85
- "language_model.model.layers.15.router.proj": {
86
  "group_size": 64,
87
  "bits": 8
88
  },
89
- "language_model.model.layers.16.router.proj": {
90
  "group_size": 64,
91
  "bits": 8
92
  },
93
- "language_model.model.layers.17.router.proj": {
94
  "group_size": 64,
95
  "bits": 8
96
  },
97
- "language_model.model.layers.18.router.proj": {
98
  "group_size": 64,
99
  "bits": 8
100
  },
101
- "language_model.model.layers.19.router.proj": {
102
  "group_size": 64,
103
  "bits": 8
104
  },
105
- "language_model.model.layers.20.router.proj": {
106
  "group_size": 64,
107
  "bits": 8
108
  },
109
- "language_model.model.layers.21.router.proj": {
110
  "group_size": 64,
111
  "bits": 8
112
  },
113
- "language_model.model.layers.22.router.proj": {
114
  "group_size": 64,
115
  "bits": 8
116
  },
117
- "language_model.model.layers.23.router.proj": {
118
  "group_size": 64,
119
  "bits": 8
120
  },
121
- "language_model.model.layers.24.router.proj": {
122
  "group_size": 64,
123
  "bits": 8
124
  },
125
- "language_model.model.layers.25.router.proj": {
126
  "group_size": 64,
127
  "bits": 8
128
  },
129
- "language_model.model.layers.26.router.proj": {
130
  "group_size": 64,
131
  "bits": 8
132
  },
133
- "language_model.model.layers.27.router.proj": {
134
  "group_size": 64,
135
  "bits": 8
136
  },
137
- "language_model.model.layers.28.router.proj": {
138
  "group_size": 64,
139
  "bits": 8
140
  },
141
- "language_model.model.layers.29.router.proj": {
142
  "group_size": 64,
143
  "bits": 8
144
- }
145
- },
146
- "quantization_config": {
147
- "group_size": 64,
148
- "bits": 8,
149
- "mode": "affine",
150
- "language_model.model.layers.0.router.proj": {
151
  "group_size": 64,
152
  "bits": 8
153
  },
154
- "language_model.model.layers.1.router.proj": {
155
  "group_size": 64,
156
  "bits": 8
157
  },
158
- "language_model.model.layers.2.router.proj": {
159
  "group_size": 64,
160
  "bits": 8
161
  },
162
- "language_model.model.layers.3.router.proj": {
163
  "group_size": 64,
164
  "bits": 8
165
  },
166
- "language_model.model.layers.4.router.proj": {
167
  "group_size": 64,
168
  "bits": 8
169
  },
170
- "language_model.model.layers.5.router.proj": {
171
  "group_size": 64,
172
  "bits": 8
173
  },
174
- "language_model.model.layers.6.router.proj": {
175
  "group_size": 64,
176
  "bits": 8
177
  },
178
- "language_model.model.layers.7.router.proj": {
179
  "group_size": 64,
180
  "bits": 8
181
  },
182
- "language_model.model.layers.8.router.proj": {
183
  "group_size": 64,
184
  "bits": 8
185
  },
@@ -187,82 +663,322 @@
187
  "group_size": 64,
188
  "bits": 8
189
  },
 
 
 
 
 
 
 
 
 
 
 
 
190
  "language_model.model.layers.10.router.proj": {
191
  "group_size": 64,
192
  "bits": 8
193
  },
 
 
 
 
 
 
 
 
 
 
 
 
194
  "language_model.model.layers.11.router.proj": {
195
  "group_size": 64,
196
  "bits": 8
197
  },
 
 
 
 
 
 
 
 
 
 
 
 
198
  "language_model.model.layers.12.router.proj": {
199
  "group_size": 64,
200
  "bits": 8
201
  },
 
 
 
 
 
 
 
 
 
 
 
 
202
  "language_model.model.layers.13.router.proj": {
203
  "group_size": 64,
204
  "bits": 8
205
  },
 
 
 
 
 
 
 
 
 
 
 
 
206
  "language_model.model.layers.14.router.proj": {
207
  "group_size": 64,
208
  "bits": 8
209
  },
 
 
 
 
 
 
 
 
 
 
 
 
210
  "language_model.model.layers.15.router.proj": {
211
  "group_size": 64,
212
  "bits": 8
213
  },
 
 
 
 
 
 
 
 
 
 
 
 
214
  "language_model.model.layers.16.router.proj": {
215
  "group_size": 64,
216
  "bits": 8
217
  },
 
 
 
 
 
 
 
 
 
 
 
 
218
  "language_model.model.layers.17.router.proj": {
219
  "group_size": 64,
220
  "bits": 8
221
  },
 
 
 
 
 
 
 
 
 
 
 
 
222
  "language_model.model.layers.18.router.proj": {
223
  "group_size": 64,
224
  "bits": 8
225
  },
 
 
 
 
 
 
 
 
 
 
 
 
226
  "language_model.model.layers.19.router.proj": {
227
  "group_size": 64,
228
  "bits": 8
229
  },
 
 
 
 
 
 
 
 
 
 
 
 
230
  "language_model.model.layers.20.router.proj": {
231
  "group_size": 64,
232
  "bits": 8
233
  },
 
 
 
 
 
 
 
 
 
 
 
 
234
  "language_model.model.layers.21.router.proj": {
235
  "group_size": 64,
236
  "bits": 8
237
  },
 
 
 
 
 
 
 
 
 
 
 
 
238
  "language_model.model.layers.22.router.proj": {
239
  "group_size": 64,
240
  "bits": 8
241
  },
 
 
 
 
 
 
 
 
 
 
 
 
242
  "language_model.model.layers.23.router.proj": {
243
  "group_size": 64,
244
  "bits": 8
245
  },
 
 
 
 
 
 
 
 
 
 
 
 
246
  "language_model.model.layers.24.router.proj": {
247
  "group_size": 64,
248
  "bits": 8
249
  },
 
 
 
 
 
 
 
 
 
 
 
 
250
  "language_model.model.layers.25.router.proj": {
251
  "group_size": 64,
252
  "bits": 8
253
  },
 
 
 
 
 
 
 
 
 
 
 
 
254
  "language_model.model.layers.26.router.proj": {
255
  "group_size": 64,
256
  "bits": 8
257
  },
 
 
 
 
 
 
 
 
 
 
 
 
258
  "language_model.model.layers.27.router.proj": {
259
  "group_size": 64,
260
  "bits": 8
261
  },
 
 
 
 
 
 
 
 
 
 
 
 
262
  "language_model.model.layers.28.router.proj": {
263
  "group_size": 64,
264
  "bits": 8
265
  },
 
 
 
 
 
 
 
 
 
 
 
 
266
  "language_model.model.layers.29.router.proj": {
267
  "group_size": 64,
268
  "bits": 8
@@ -350,5 +1066,48 @@
350
  "tie_word_embeddings": true,
351
  "transformers_version": "5.5.0.dev0",
352
  "video_token_id": 258884,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  "vision_soft_tokens_per_image": 280
354
- }
 
10
  "eoa_token_id": 258883,
11
  "eoa_token_index": 258883,
12
  "eoi_token_id": 258882,
13
+ "eos_token_id": 1,
 
 
 
 
14
  "image_token_id": 258880,
15
  "initializer_range": 0.02,
16
  "model_type": "gemma4",
 
18
  "group_size": 64,
19
  "bits": 8,
20
  "mode": "affine",
21
+ "language_model.model.layers.0.mlp.gate_proj": {
22
+ "group_size": 64,
23
+ "bits": 8
24
+ },
25
+ "language_model.model.layers.0.mlp.down_proj": {
26
+ "group_size": 64,
27
+ "bits": 8
28
+ },
29
+ "language_model.model.layers.0.mlp.up_proj": {
30
+ "group_size": 64,
31
+ "bits": 8
32
+ },
33
+ "language_model.model.layers.0.router.proj": {
34
+ "group_size": 64,
35
+ "bits": 8
36
+ },
37
+ "language_model.model.layers.1.mlp.gate_proj": {
38
+ "group_size": 64,
39
+ "bits": 8
40
+ },
41
+ "language_model.model.layers.1.mlp.down_proj": {
42
+ "group_size": 64,
43
+ "bits": 8
44
+ },
45
+ "language_model.model.layers.1.mlp.up_proj": {
46
+ "group_size": 64,
47
+ "bits": 8
48
+ },
49
+ "language_model.model.layers.1.router.proj": {
50
+ "group_size": 64,
51
+ "bits": 8
52
+ },
53
+ "language_model.model.layers.2.mlp.gate_proj": {
54
+ "group_size": 64,
55
+ "bits": 8
56
+ },
57
+ "language_model.model.layers.2.mlp.down_proj": {
58
+ "group_size": 64,
59
+ "bits": 8
60
+ },
61
+ "language_model.model.layers.2.mlp.up_proj": {
62
+ "group_size": 64,
63
+ "bits": 8
64
+ },
65
+ "language_model.model.layers.2.router.proj": {
66
+ "group_size": 64,
67
+ "bits": 8
68
+ },
69
+ "language_model.model.layers.3.mlp.gate_proj": {
70
+ "group_size": 64,
71
+ "bits": 8
72
+ },
73
+ "language_model.model.layers.3.mlp.down_proj": {
74
+ "group_size": 64,
75
+ "bits": 8
76
+ },
77
+ "language_model.model.layers.3.mlp.up_proj": {
78
+ "group_size": 64,
79
+ "bits": 8
80
+ },
81
+ "language_model.model.layers.3.router.proj": {
82
+ "group_size": 64,
83
+ "bits": 8
84
+ },
85
+ "language_model.model.layers.4.mlp.gate_proj": {
86
+ "group_size": 64,
87
+ "bits": 8
88
+ },
89
+ "language_model.model.layers.4.mlp.down_proj": {
90
+ "group_size": 64,
91
+ "bits": 8
92
+ },
93
+ "language_model.model.layers.4.mlp.up_proj": {
94
+ "group_size": 64,
95
+ "bits": 8
96
+ },
97
+ "language_model.model.layers.4.router.proj": {
98
+ "group_size": 64,
99
+ "bits": 8
100
+ },
101
+ "language_model.model.layers.5.mlp.gate_proj": {
102
+ "group_size": 64,
103
+ "bits": 8
104
+ },
105
+ "language_model.model.layers.5.mlp.down_proj": {
106
+ "group_size": 64,
107
+ "bits": 8
108
+ },
109
+ "language_model.model.layers.5.mlp.up_proj": {
110
+ "group_size": 64,
111
+ "bits": 8
112
+ },
113
+ "language_model.model.layers.5.router.proj": {
114
+ "group_size": 64,
115
+ "bits": 8
116
+ },
117
+ "language_model.model.layers.6.mlp.gate_proj": {
118
+ "group_size": 64,
119
+ "bits": 8
120
+ },
121
+ "language_model.model.layers.6.mlp.down_proj": {
122
+ "group_size": 64,
123
+ "bits": 8
124
+ },
125
+ "language_model.model.layers.6.mlp.up_proj": {
126
+ "group_size": 64,
127
+ "bits": 8
128
+ },
129
+ "language_model.model.layers.6.router.proj": {
130
+ "group_size": 64,
131
+ "bits": 8
132
+ },
133
+ "language_model.model.layers.7.mlp.gate_proj": {
134
+ "group_size": 64,
135
+ "bits": 8
136
+ },
137
+ "language_model.model.layers.7.mlp.down_proj": {
138
+ "group_size": 64,
139
+ "bits": 8
140
+ },
141
+ "language_model.model.layers.7.mlp.up_proj": {
142
+ "group_size": 64,
143
+ "bits": 8
144
+ },
145
+ "language_model.model.layers.7.router.proj": {
146
+ "group_size": 64,
147
+ "bits": 8
148
+ },
149
+ "language_model.model.layers.8.mlp.gate_proj": {
150
+ "group_size": 64,
151
+ "bits": 8
152
+ },
153
+ "language_model.model.layers.8.mlp.down_proj": {
154
+ "group_size": 64,
155
+ "bits": 8
156
+ },
157
+ "language_model.model.layers.8.mlp.up_proj": {
158
+ "group_size": 64,
159
+ "bits": 8
160
+ },
161
+ "language_model.model.layers.8.router.proj": {
162
+ "group_size": 64,
163
+ "bits": 8
164
+ },
165
+ "language_model.model.layers.9.mlp.gate_proj": {
166
+ "group_size": 64,
167
+ "bits": 8
168
+ },
169
+ "language_model.model.layers.9.mlp.down_proj": {
170
+ "group_size": 64,
171
+ "bits": 8
172
+ },
173
+ "language_model.model.layers.9.mlp.up_proj": {
174
+ "group_size": 64,
175
+ "bits": 8
176
+ },
177
+ "language_model.model.layers.9.router.proj": {
178
+ "group_size": 64,
179
+ "bits": 8
180
+ },
181
+ "language_model.model.layers.10.mlp.gate_proj": {
182
+ "group_size": 64,
183
+ "bits": 8
184
+ },
185
+ "language_model.model.layers.10.mlp.down_proj": {
186
+ "group_size": 64,
187
+ "bits": 8
188
+ },
189
+ "language_model.model.layers.10.mlp.up_proj": {
190
+ "group_size": 64,
191
+ "bits": 8
192
+ },
193
+ "language_model.model.layers.10.router.proj": {
194
+ "group_size": 64,
195
+ "bits": 8
196
+ },
197
+ "language_model.model.layers.11.mlp.gate_proj": {
198
+ "group_size": 64,
199
+ "bits": 8
200
+ },
201
+ "language_model.model.layers.11.mlp.down_proj": {
202
+ "group_size": 64,
203
+ "bits": 8
204
+ },
205
+ "language_model.model.layers.11.mlp.up_proj": {
206
+ "group_size": 64,
207
+ "bits": 8
208
+ },
209
+ "language_model.model.layers.11.router.proj": {
210
+ "group_size": 64,
211
+ "bits": 8
212
+ },
213
+ "language_model.model.layers.12.mlp.gate_proj": {
214
+ "group_size": 64,
215
+ "bits": 8
216
+ },
217
+ "language_model.model.layers.12.mlp.down_proj": {
218
+ "group_size": 64,
219
+ "bits": 8
220
+ },
221
+ "language_model.model.layers.12.mlp.up_proj": {
222
+ "group_size": 64,
223
+ "bits": 8
224
+ },
225
+ "language_model.model.layers.12.router.proj": {
226
+ "group_size": 64,
227
+ "bits": 8
228
+ },
229
+ "language_model.model.layers.13.mlp.gate_proj": {
230
+ "group_size": 64,
231
+ "bits": 8
232
+ },
233
+ "language_model.model.layers.13.mlp.down_proj": {
234
+ "group_size": 64,
235
+ "bits": 8
236
+ },
237
+ "language_model.model.layers.13.mlp.up_proj": {
238
+ "group_size": 64,
239
+ "bits": 8
240
+ },
241
+ "language_model.model.layers.13.router.proj": {
242
+ "group_size": 64,
243
+ "bits": 8
244
+ },
245
+ "language_model.model.layers.14.mlp.gate_proj": {
246
+ "group_size": 64,
247
+ "bits": 8
248
+ },
249
+ "language_model.model.layers.14.mlp.down_proj": {
250
+ "group_size": 64,
251
+ "bits": 8
252
+ },
253
+ "language_model.model.layers.14.mlp.up_proj": {
254
+ "group_size": 64,
255
+ "bits": 8
256
+ },
257
+ "language_model.model.layers.14.router.proj": {
258
+ "group_size": 64,
259
+ "bits": 8
260
+ },
261
+ "language_model.model.layers.15.mlp.gate_proj": {
262
+ "group_size": 64,
263
+ "bits": 8
264
+ },
265
+ "language_model.model.layers.15.mlp.down_proj": {
266
+ "group_size": 64,
267
+ "bits": 8
268
+ },
269
+ "language_model.model.layers.15.mlp.up_proj": {
270
+ "group_size": 64,
271
+ "bits": 8
272
+ },
273
+ "language_model.model.layers.15.router.proj": {
274
+ "group_size": 64,
275
+ "bits": 8
276
+ },
277
+ "language_model.model.layers.16.mlp.gate_proj": {
278
+ "group_size": 64,
279
+ "bits": 8
280
+ },
281
+ "language_model.model.layers.16.mlp.down_proj": {
282
+ "group_size": 64,
283
+ "bits": 8
284
+ },
285
+ "language_model.model.layers.16.mlp.up_proj": {
286
+ "group_size": 64,
287
+ "bits": 8
288
+ },
289
+ "language_model.model.layers.16.router.proj": {
290
+ "group_size": 64,
291
+ "bits": 8
292
+ },
293
+ "language_model.model.layers.17.mlp.gate_proj": {
294
+ "group_size": 64,
295
+ "bits": 8
296
+ },
297
+ "language_model.model.layers.17.mlp.down_proj": {
298
+ "group_size": 64,
299
+ "bits": 8
300
+ },
301
+ "language_model.model.layers.17.mlp.up_proj": {
302
+ "group_size": 64,
303
+ "bits": 8
304
+ },
305
+ "language_model.model.layers.17.router.proj": {
306
+ "group_size": 64,
307
+ "bits": 8
308
+ },
309
+ "language_model.model.layers.18.mlp.gate_proj": {
310
+ "group_size": 64,
311
+ "bits": 8
312
+ },
313
+ "language_model.model.layers.18.mlp.down_proj": {
314
+ "group_size": 64,
315
+ "bits": 8
316
+ },
317
+ "language_model.model.layers.18.mlp.up_proj": {
318
+ "group_size": 64,
319
+ "bits": 8
320
+ },
321
+ "language_model.model.layers.18.router.proj": {
322
+ "group_size": 64,
323
+ "bits": 8
324
+ },
325
+ "language_model.model.layers.19.mlp.gate_proj": {
326
+ "group_size": 64,
327
+ "bits": 8
328
+ },
329
+ "language_model.model.layers.19.mlp.down_proj": {
330
+ "group_size": 64,
331
+ "bits": 8
332
+ },
333
+ "language_model.model.layers.19.mlp.up_proj": {
334
+ "group_size": 64,
335
+ "bits": 8
336
+ },
337
+ "language_model.model.layers.19.router.proj": {
338
+ "group_size": 64,
339
+ "bits": 8
340
+ },
341
+ "language_model.model.layers.20.mlp.gate_proj": {
342
+ "group_size": 64,
343
+ "bits": 8
344
+ },
345
+ "language_model.model.layers.20.mlp.down_proj": {
346
+ "group_size": 64,
347
+ "bits": 8
348
+ },
349
+ "language_model.model.layers.20.mlp.up_proj": {
350
+ "group_size": 64,
351
+ "bits": 8
352
+ },
353
+ "language_model.model.layers.20.router.proj": {
354
+ "group_size": 64,
355
+ "bits": 8
356
+ },
357
+ "language_model.model.layers.21.mlp.gate_proj": {
358
+ "group_size": 64,
359
+ "bits": 8
360
+ },
361
+ "language_model.model.layers.21.mlp.down_proj": {
362
+ "group_size": 64,
363
+ "bits": 8
364
+ },
365
+ "language_model.model.layers.21.mlp.up_proj": {
366
+ "group_size": 64,
367
+ "bits": 8
368
+ },
369
+ "language_model.model.layers.21.router.proj": {
370
+ "group_size": 64,
371
+ "bits": 8
372
+ },
373
+ "language_model.model.layers.22.mlp.gate_proj": {
374
+ "group_size": 64,
375
+ "bits": 8
376
+ },
377
+ "language_model.model.layers.22.mlp.down_proj": {
378
+ "group_size": 64,
379
+ "bits": 8
380
+ },
381
+ "language_model.model.layers.22.mlp.up_proj": {
382
+ "group_size": 64,
383
+ "bits": 8
384
+ },
385
+ "language_model.model.layers.22.router.proj": {
386
+ "group_size": 64,
387
+ "bits": 8
388
+ },
389
+ "language_model.model.layers.23.mlp.gate_proj": {
390
+ "group_size": 64,
391
+ "bits": 8
392
+ },
393
+ "language_model.model.layers.23.mlp.down_proj": {
394
+ "group_size": 64,
395
+ "bits": 8
396
+ },
397
+ "language_model.model.layers.23.mlp.up_proj": {
398
+ "group_size": 64,
399
+ "bits": 8
400
+ },
401
+ "language_model.model.layers.23.router.proj": {
402
+ "group_size": 64,
403
+ "bits": 8
404
+ },
405
+ "language_model.model.layers.24.mlp.gate_proj": {
406
+ "group_size": 64,
407
+ "bits": 8
408
+ },
409
+ "language_model.model.layers.24.mlp.down_proj": {
410
+ "group_size": 64,
411
+ "bits": 8
412
+ },
413
+ "language_model.model.layers.24.mlp.up_proj": {
414
+ "group_size": 64,
415
+ "bits": 8
416
+ },
417
+ "language_model.model.layers.24.router.proj": {
418
+ "group_size": 64,
419
+ "bits": 8
420
+ },
421
+ "language_model.model.layers.25.mlp.gate_proj": {
422
+ "group_size": 64,
423
+ "bits": 8
424
+ },
425
+ "language_model.model.layers.25.mlp.down_proj": {
426
+ "group_size": 64,
427
+ "bits": 8
428
+ },
429
+ "language_model.model.layers.25.mlp.up_proj": {
430
+ "group_size": 64,
431
+ "bits": 8
432
+ },
433
+ "language_model.model.layers.25.router.proj": {
434
+ "group_size": 64,
435
+ "bits": 8
436
+ },
437
+ "language_model.model.layers.26.mlp.gate_proj": {
438
+ "group_size": 64,
439
+ "bits": 8
440
+ },
441
+ "language_model.model.layers.26.mlp.down_proj": {
442
+ "group_size": 64,
443
+ "bits": 8
444
+ },
445
+ "language_model.model.layers.26.mlp.up_proj": {
446
+ "group_size": 64,
447
+ "bits": 8
448
+ },
449
+ "language_model.model.layers.26.router.proj": {
450
+ "group_size": 64,
451
+ "bits": 8
452
+ },
453
+ "language_model.model.layers.27.mlp.gate_proj": {
454
+ "group_size": 64,
455
+ "bits": 8
456
+ },
457
+ "language_model.model.layers.27.mlp.down_proj": {
458
+ "group_size": 64,
459
+ "bits": 8
460
+ },
461
+ "language_model.model.layers.27.mlp.up_proj": {
462
+ "group_size": 64,
463
+ "bits": 8
464
+ },
465
+ "language_model.model.layers.27.router.proj": {
466
+ "group_size": 64,
467
+ "bits": 8
468
+ },
469
+ "language_model.model.layers.28.mlp.gate_proj": {
470
+ "group_size": 64,
471
+ "bits": 8
472
+ },
473
+ "language_model.model.layers.28.mlp.down_proj": {
474
+ "group_size": 64,
475
+ "bits": 8
476
+ },
477
+ "language_model.model.layers.28.mlp.up_proj": {
478
+ "group_size": 64,
479
+ "bits": 8
480
+ },
481
+ "language_model.model.layers.28.router.proj": {
482
+ "group_size": 64,
483
+ "bits": 8
484
+ },
485
+ "language_model.model.layers.29.mlp.gate_proj": {
486
+ "group_size": 64,
487
+ "bits": 8
488
+ },
489
+ "language_model.model.layers.29.mlp.down_proj": {
490
+ "group_size": 64,
491
+ "bits": 8
492
+ },
493
+ "language_model.model.layers.29.mlp.up_proj": {
494
+ "group_size": 64,
495
+ "bits": 8
496
+ },
497
+ "language_model.model.layers.29.router.proj": {
498
+ "group_size": 64,
499
+ "bits": 8
500
+ }
501
+ },
502
+ "quantization_config": {
503
+ "group_size": 64,
504
+ "bits": 8,
505
+ "mode": "affine",
506
+ "language_model.model.layers.0.mlp.gate_proj": {
507
  "group_size": 64,
508
  "bits": 8
509
  },
510
+ "language_model.model.layers.0.mlp.down_proj": {
511
  "group_size": 64,
512
  "bits": 8
513
  },
514
+ "language_model.model.layers.0.mlp.up_proj": {
515
  "group_size": 64,
516
  "bits": 8
517
  },
518
+ "language_model.model.layers.0.router.proj": {
519
  "group_size": 64,
520
  "bits": 8
521
  },
522
+ "language_model.model.layers.1.mlp.gate_proj": {
523
  "group_size": 64,
524
  "bits": 8
525
  },
526
+ "language_model.model.layers.1.mlp.down_proj": {
527
  "group_size": 64,
528
  "bits": 8
529
  },
530
+ "language_model.model.layers.1.mlp.up_proj": {
531
  "group_size": 64,
532
  "bits": 8
533
  },
534
+ "language_model.model.layers.1.router.proj": {
535
  "group_size": 64,
536
  "bits": 8
537
  },
538
+ "language_model.model.layers.2.mlp.gate_proj": {
539
  "group_size": 64,
540
  "bits": 8
541
  },
542
+ "language_model.model.layers.2.mlp.down_proj": {
543
  "group_size": 64,
544
  "bits": 8
545
  },
546
+ "language_model.model.layers.2.mlp.up_proj": {
547
  "group_size": 64,
548
  "bits": 8
549
  },
550
+ "language_model.model.layers.2.router.proj": {
551
  "group_size": 64,
552
  "bits": 8
553
  },
554
+ "language_model.model.layers.3.mlp.gate_proj": {
555
  "group_size": 64,
556
  "bits": 8
557
  },
558
+ "language_model.model.layers.3.mlp.down_proj": {
559
  "group_size": 64,
560
  "bits": 8
561
  },
562
+ "language_model.model.layers.3.mlp.up_proj": {
563
  "group_size": 64,
564
  "bits": 8
565
  },
566
+ "language_model.model.layers.3.router.proj": {
567
  "group_size": 64,
568
  "bits": 8
569
  },
570
+ "language_model.model.layers.4.mlp.gate_proj": {
571
  "group_size": 64,
572
  "bits": 8
573
  },
574
+ "language_model.model.layers.4.mlp.down_proj": {
575
  "group_size": 64,
576
  "bits": 8
577
  },
578
+ "language_model.model.layers.4.mlp.up_proj": {
579
  "group_size": 64,
580
  "bits": 8
581
  },
582
+ "language_model.model.layers.4.router.proj": {
583
  "group_size": 64,
584
  "bits": 8
585
  },
586
+ "language_model.model.layers.5.mlp.gate_proj": {
587
  "group_size": 64,
588
  "bits": 8
589
  },
590
+ "language_model.model.layers.5.mlp.down_proj": {
591
  "group_size": 64,
592
  "bits": 8
593
  },
594
+ "language_model.model.layers.5.mlp.up_proj": {
595
  "group_size": 64,
596
  "bits": 8
597
  },
598
+ "language_model.model.layers.5.router.proj": {
599
  "group_size": 64,
600
  "bits": 8
601
  },
602
+ "language_model.model.layers.6.mlp.gate_proj": {
603
  "group_size": 64,
604
  "bits": 8
605
  },
606
+ "language_model.model.layers.6.mlp.down_proj": {
607
  "group_size": 64,
608
  "bits": 8
609
  },
610
+ "language_model.model.layers.6.mlp.up_proj": {
611
  "group_size": 64,
612
  "bits": 8
613
  },
614
+ "language_model.model.layers.6.router.proj": {
615
  "group_size": 64,
616
  "bits": 8
617
  },
618
+ "language_model.model.layers.7.mlp.gate_proj": {
619
  "group_size": 64,
620
  "bits": 8
621
  },
622
+ "language_model.model.layers.7.mlp.down_proj": {
623
  "group_size": 64,
624
  "bits": 8
625
+ },
626
+ "language_model.model.layers.7.mlp.up_proj": {
 
 
 
 
 
627
  "group_size": 64,
628
  "bits": 8
629
  },
630
+ "language_model.model.layers.7.router.proj": {
631
  "group_size": 64,
632
  "bits": 8
633
  },
634
+ "language_model.model.layers.8.mlp.gate_proj": {
635
  "group_size": 64,
636
  "bits": 8
637
  },
638
+ "language_model.model.layers.8.mlp.down_proj": {
639
  "group_size": 64,
640
  "bits": 8
641
  },
642
+ "language_model.model.layers.8.mlp.up_proj": {
643
  "group_size": 64,
644
  "bits": 8
645
  },
646
+ "language_model.model.layers.8.router.proj": {
647
  "group_size": 64,
648
  "bits": 8
649
  },
650
+ "language_model.model.layers.9.mlp.gate_proj": {
651
  "group_size": 64,
652
  "bits": 8
653
  },
654
+ "language_model.model.layers.9.mlp.down_proj": {
655
  "group_size": 64,
656
  "bits": 8
657
  },
658
+ "language_model.model.layers.9.mlp.up_proj": {
659
  "group_size": 64,
660
  "bits": 8
661
  },
 
663
  "group_size": 64,
664
  "bits": 8
665
  },
666
+ "language_model.model.layers.10.mlp.gate_proj": {
667
+ "group_size": 64,
668
+ "bits": 8
669
+ },
670
+ "language_model.model.layers.10.mlp.down_proj": {
671
+ "group_size": 64,
672
+ "bits": 8
673
+ },
674
+ "language_model.model.layers.10.mlp.up_proj": {
675
+ "group_size": 64,
676
+ "bits": 8
677
+ },
678
  "language_model.model.layers.10.router.proj": {
679
  "group_size": 64,
680
  "bits": 8
681
  },
682
+ "language_model.model.layers.11.mlp.gate_proj": {
683
+ "group_size": 64,
684
+ "bits": 8
685
+ },
686
+ "language_model.model.layers.11.mlp.down_proj": {
687
+ "group_size": 64,
688
+ "bits": 8
689
+ },
690
+ "language_model.model.layers.11.mlp.up_proj": {
691
+ "group_size": 64,
692
+ "bits": 8
693
+ },
694
  "language_model.model.layers.11.router.proj": {
695
  "group_size": 64,
696
  "bits": 8
697
  },
698
+ "language_model.model.layers.12.mlp.gate_proj": {
699
+ "group_size": 64,
700
+ "bits": 8
701
+ },
702
+ "language_model.model.layers.12.mlp.down_proj": {
703
+ "group_size": 64,
704
+ "bits": 8
705
+ },
706
+ "language_model.model.layers.12.mlp.up_proj": {
707
+ "group_size": 64,
708
+ "bits": 8
709
+ },
710
  "language_model.model.layers.12.router.proj": {
711
  "group_size": 64,
712
  "bits": 8
713
  },
714
+ "language_model.model.layers.13.mlp.gate_proj": {
715
+ "group_size": 64,
716
+ "bits": 8
717
+ },
718
+ "language_model.model.layers.13.mlp.down_proj": {
719
+ "group_size": 64,
720
+ "bits": 8
721
+ },
722
+ "language_model.model.layers.13.mlp.up_proj": {
723
+ "group_size": 64,
724
+ "bits": 8
725
+ },
726
  "language_model.model.layers.13.router.proj": {
727
  "group_size": 64,
728
  "bits": 8
729
  },
730
+ "language_model.model.layers.14.mlp.gate_proj": {
731
+ "group_size": 64,
732
+ "bits": 8
733
+ },
734
+ "language_model.model.layers.14.mlp.down_proj": {
735
+ "group_size": 64,
736
+ "bits": 8
737
+ },
738
+ "language_model.model.layers.14.mlp.up_proj": {
739
+ "group_size": 64,
740
+ "bits": 8
741
+ },
742
  "language_model.model.layers.14.router.proj": {
743
  "group_size": 64,
744
  "bits": 8
745
  },
746
+ "language_model.model.layers.15.mlp.gate_proj": {
747
+ "group_size": 64,
748
+ "bits": 8
749
+ },
750
+ "language_model.model.layers.15.mlp.down_proj": {
751
+ "group_size": 64,
752
+ "bits": 8
753
+ },
754
+ "language_model.model.layers.15.mlp.up_proj": {
755
+ "group_size": 64,
756
+ "bits": 8
757
+ },
758
  "language_model.model.layers.15.router.proj": {
759
  "group_size": 64,
760
  "bits": 8
761
  },
762
+ "language_model.model.layers.16.mlp.gate_proj": {
763
+ "group_size": 64,
764
+ "bits": 8
765
+ },
766
+ "language_model.model.layers.16.mlp.down_proj": {
767
+ "group_size": 64,
768
+ "bits": 8
769
+ },
770
+ "language_model.model.layers.16.mlp.up_proj": {
771
+ "group_size": 64,
772
+ "bits": 8
773
+ },
774
  "language_model.model.layers.16.router.proj": {
775
  "group_size": 64,
776
  "bits": 8
777
  },
778
+ "language_model.model.layers.17.mlp.gate_proj": {
779
+ "group_size": 64,
780
+ "bits": 8
781
+ },
782
+ "language_model.model.layers.17.mlp.down_proj": {
783
+ "group_size": 64,
784
+ "bits": 8
785
+ },
786
+ "language_model.model.layers.17.mlp.up_proj": {
787
+ "group_size": 64,
788
+ "bits": 8
789
+ },
790
  "language_model.model.layers.17.router.proj": {
791
  "group_size": 64,
792
  "bits": 8
793
  },
794
+ "language_model.model.layers.18.mlp.gate_proj": {
795
+ "group_size": 64,
796
+ "bits": 8
797
+ },
798
+ "language_model.model.layers.18.mlp.down_proj": {
799
+ "group_size": 64,
800
+ "bits": 8
801
+ },
802
+ "language_model.model.layers.18.mlp.up_proj": {
803
+ "group_size": 64,
804
+ "bits": 8
805
+ },
806
  "language_model.model.layers.18.router.proj": {
807
  "group_size": 64,
808
  "bits": 8
809
  },
810
+ "language_model.model.layers.19.mlp.gate_proj": {
811
+ "group_size": 64,
812
+ "bits": 8
813
+ },
814
+ "language_model.model.layers.19.mlp.down_proj": {
815
+ "group_size": 64,
816
+ "bits": 8
817
+ },
818
+ "language_model.model.layers.19.mlp.up_proj": {
819
+ "group_size": 64,
820
+ "bits": 8
821
+ },
822
  "language_model.model.layers.19.router.proj": {
823
  "group_size": 64,
824
  "bits": 8
825
  },
826
+ "language_model.model.layers.20.mlp.gate_proj": {
827
+ "group_size": 64,
828
+ "bits": 8
829
+ },
830
+ "language_model.model.layers.20.mlp.down_proj": {
831
+ "group_size": 64,
832
+ "bits": 8
833
+ },
834
+ "language_model.model.layers.20.mlp.up_proj": {
835
+ "group_size": 64,
836
+ "bits": 8
837
+ },
838
  "language_model.model.layers.20.router.proj": {
839
  "group_size": 64,
840
  "bits": 8
841
  },
842
+ "language_model.model.layers.21.mlp.gate_proj": {
843
+ "group_size": 64,
844
+ "bits": 8
845
+ },
846
+ "language_model.model.layers.21.mlp.down_proj": {
847
+ "group_size": 64,
848
+ "bits": 8
849
+ },
850
+ "language_model.model.layers.21.mlp.up_proj": {
851
+ "group_size": 64,
852
+ "bits": 8
853
+ },
854
  "language_model.model.layers.21.router.proj": {
855
  "group_size": 64,
856
  "bits": 8
857
  },
858
+ "language_model.model.layers.22.mlp.gate_proj": {
859
+ "group_size": 64,
860
+ "bits": 8
861
+ },
862
+ "language_model.model.layers.22.mlp.down_proj": {
863
+ "group_size": 64,
864
+ "bits": 8
865
+ },
866
+ "language_model.model.layers.22.mlp.up_proj": {
867
+ "group_size": 64,
868
+ "bits": 8
869
+ },
870
  "language_model.model.layers.22.router.proj": {
871
  "group_size": 64,
872
  "bits": 8
873
  },
874
+ "language_model.model.layers.23.mlp.gate_proj": {
875
+ "group_size": 64,
876
+ "bits": 8
877
+ },
878
+ "language_model.model.layers.23.mlp.down_proj": {
879
+ "group_size": 64,
880
+ "bits": 8
881
+ },
882
+ "language_model.model.layers.23.mlp.up_proj": {
883
+ "group_size": 64,
884
+ "bits": 8
885
+ },
886
  "language_model.model.layers.23.router.proj": {
887
  "group_size": 64,
888
  "bits": 8
889
  },
890
+ "language_model.model.layers.24.mlp.gate_proj": {
891
+ "group_size": 64,
892
+ "bits": 8
893
+ },
894
+ "language_model.model.layers.24.mlp.down_proj": {
895
+ "group_size": 64,
896
+ "bits": 8
897
+ },
898
+ "language_model.model.layers.24.mlp.up_proj": {
899
+ "group_size": 64,
900
+ "bits": 8
901
+ },
902
  "language_model.model.layers.24.router.proj": {
903
  "group_size": 64,
904
  "bits": 8
905
  },
906
+ "language_model.model.layers.25.mlp.gate_proj": {
907
+ "group_size": 64,
908
+ "bits": 8
909
+ },
910
+ "language_model.model.layers.25.mlp.down_proj": {
911
+ "group_size": 64,
912
+ "bits": 8
913
+ },
914
+ "language_model.model.layers.25.mlp.up_proj": {
915
+ "group_size": 64,
916
+ "bits": 8
917
+ },
918
  "language_model.model.layers.25.router.proj": {
919
  "group_size": 64,
920
  "bits": 8
921
  },
922
+ "language_model.model.layers.26.mlp.gate_proj": {
923
+ "group_size": 64,
924
+ "bits": 8
925
+ },
926
+ "language_model.model.layers.26.mlp.down_proj": {
927
+ "group_size": 64,
928
+ "bits": 8
929
+ },
930
+ "language_model.model.layers.26.mlp.up_proj": {
931
+ "group_size": 64,
932
+ "bits": 8
933
+ },
934
  "language_model.model.layers.26.router.proj": {
935
  "group_size": 64,
936
  "bits": 8
937
  },
938
+ "language_model.model.layers.27.mlp.gate_proj": {
939
+ "group_size": 64,
940
+ "bits": 8
941
+ },
942
+ "language_model.model.layers.27.mlp.down_proj": {
943
+ "group_size": 64,
944
+ "bits": 8
945
+ },
946
+ "language_model.model.layers.27.mlp.up_proj": {
947
+ "group_size": 64,
948
+ "bits": 8
949
+ },
950
  "language_model.model.layers.27.router.proj": {
951
  "group_size": 64,
952
  "bits": 8
953
  },
954
+ "language_model.model.layers.28.mlp.gate_proj": {
955
+ "group_size": 64,
956
+ "bits": 8
957
+ },
958
+ "language_model.model.layers.28.mlp.down_proj": {
959
+ "group_size": 64,
960
+ "bits": 8
961
+ },
962
+ "language_model.model.layers.28.mlp.up_proj": {
963
+ "group_size": 64,
964
+ "bits": 8
965
+ },
966
  "language_model.model.layers.28.router.proj": {
967
  "group_size": 64,
968
  "bits": 8
969
  },
970
+ "language_model.model.layers.29.mlp.gate_proj": {
971
+ "group_size": 64,
972
+ "bits": 8
973
+ },
974
+ "language_model.model.layers.29.mlp.down_proj": {
975
+ "group_size": 64,
976
+ "bits": 8
977
+ },
978
+ "language_model.model.layers.29.mlp.up_proj": {
979
+ "group_size": 64,
980
+ "bits": 8
981
+ },
982
  "language_model.model.layers.29.router.proj": {
983
  "group_size": 64,
984
  "bits": 8
 
1066
  "tie_word_embeddings": true,
1067
  "transformers_version": "5.5.0.dev0",
1068
  "video_token_id": 258884,
1069
+ "vision_config": {
1070
+ "_name_or_path": "",
1071
+ "architectures": null,
1072
+ "attention_bias": false,
1073
+ "attention_dropout": 0.0,
1074
+ "chunk_size_feed_forward": 0,
1075
+ "default_output_length": 280,
1076
+ "dtype": "bfloat16",
1077
+ "global_head_dim": 72,
1078
+ "head_dim": 72,
1079
+ "hidden_activation": "gelu_pytorch_tanh",
1080
+ "hidden_size": 1152,
1081
+ "id2label": {
1082
+ "0": "LABEL_0",
1083
+ "1": "LABEL_1"
1084
+ },
1085
+ "initializer_range": 0.02,
1086
+ "intermediate_size": 4304,
1087
+ "is_encoder_decoder": false,
1088
+ "label2id": {
1089
+ "LABEL_0": 0,
1090
+ "LABEL_1": 1
1091
+ },
1092
+ "max_position_embeddings": 131072,
1093
+ "model_type": "gemma4_vision",
1094
+ "num_attention_heads": 16,
1095
+ "num_hidden_layers": 27,
1096
+ "num_key_value_heads": 16,
1097
+ "output_attentions": false,
1098
+ "output_hidden_states": false,
1099
+ "patch_size": 16,
1100
+ "pooling_kernel_size": 3,
1101
+ "position_embedding_size": 10240,
1102
+ "problem_type": null,
1103
+ "return_dict": true,
1104
+ "rms_norm_eps": 1e-06,
1105
+ "rope_parameters": {
1106
+ "rope_theta": 100.0,
1107
+ "rope_type": "default"
1108
+ },
1109
+ "standardize": true,
1110
+ "use_clipped_linears": false
1111
+ },
1112
  "vision_soft_tokens_per_image": 280
1113
+ }
generation_config.json CHANGED
@@ -1,11 +1,7 @@
1
  {
2
  "bos_token_id": 2,
3
  "do_sample": true,
4
- "eos_token_id": [
5
- 1,
6
- 106,
7
- 50
8
- ],
9
  "pad_token_id": 0,
10
  "temperature": 1.0,
11
  "top_k": 64,
 
1
  {
2
  "bos_token_id": 2,
3
  "do_sample": true,
4
+ "eos_token_id": 1,
 
 
 
 
5
  "pad_token_id": 0,
6
  "temperature": 1.0,
7
  "top_k": 64,
model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d215a185bbb369c692e1d03d84ae4ac1c3ebdc3a59e551aa961319ab0120b608
3
  size 5180812050
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdf66cb7cec3616751e2162a7581bd9390fd7ab35eb4fbecb749e3b2dcd319f6
3
  size 5180812050
model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a17068b12b2fc4118084b243224eeda5a3d9045bcaa924aaeb8c5c4f3e01fbb3
3
  size 5205340944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54ec0f76c9514f2576f908a86923af092dd81d0b8eae44fa2d4919e5a131d4e2
3
  size 5205340944
model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:faf30ad0d18df25edc5d8404dcf7469d95cee6d6aa5e3b00dc52ba235f85eedd
3
  size 5205341183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fbffdbdf0a768892a84c8ee5c827f66e131ad40568ba4c202bae9626d627aa9
3
  size 5205341183
model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5800c7aaeac6aa994df57f498b26fe555ef1faf2b9fe1fbbea3681d37990d4d
3
  size 5205341191
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89800b466a26de4bb4eca33d5970fd6d14255fe6d22feddaf3d1b8d82431df8
3
  size 5205341191
model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b504fd28e4dc2e3e07d116f530c833dd44974f398fc8ea7be8c48a4bfcd8848e
3
  size 5205341163
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:790699d63d35744aafb4b3dd3da5f33cc0daf759e5800d23e99838a142916082
3
  size 5205341163
model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:912ee7998df06ac63996ddc41dbb5e094d7d54f33f5f2204f69ea2bab6944852
3
  size 808869767
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a803e9bfee237850ea839531b0b000527ce24d55de1e97853c8d2c465e12ac
3
  size 808869767
processor_config.json CHANGED
@@ -1,27 +1,5 @@
1
  {
2
- "audio_ms_per_token": 40,
3
  "audio_seq_length": 750,
4
- "feature_extractor": {
5
- "dither": 0.0,
6
- "feature_extractor_type": "Gemma4AudioFeatureExtractor",
7
- "feature_size": 128,
8
- "fft_length": 512,
9
- "fft_overdrive": false,
10
- "frame_length": 320,
11
- "hop_length": 160,
12
- "input_scale_factor": 1.0,
13
- "max_frequency": 8000.0,
14
- "mel_floor": 0.001,
15
- "min_frequency": 0.0,
16
- "padding_side": "right",
17
- "padding_value": 0.0,
18
- "per_bin_mean": null,
19
- "per_bin_stddev": null,
20
- "preemphasis": 0.0,
21
- "preemphasis_htk_flavor": true,
22
- "return_attention_mask": true,
23
- "sampling_rate": 16000
24
- },
25
  "image_processor": {
26
  "do_convert_rgb": true,
27
  "do_normalize": false,
@@ -43,33 +21,12 @@
43
  "patch_size": 16,
44
  "pooling_kernel_size": 3,
45
  "resample": 3,
46
- "rescale_factor": 0.00392156862745098
 
 
 
 
47
  },
48
  "image_seq_length": 280,
49
- "processor_class": "Gemma4Processor",
50
- "video_processor": {
51
- "do_convert_rgb": true,
52
- "do_normalize": true,
53
- "do_rescale": true,
54
- "do_resize": true,
55
- "do_sample_frames": true,
56
- "image_mean": [
57
- 0.0,
58
- 0.0,
59
- 0.0
60
- ],
61
- "image_std": [
62
- 1.0,
63
- 1.0,
64
- 1.0
65
- ],
66
- "max_soft_tokens": 70,
67
- "num_frames": 32,
68
- "patch_size": 16,
69
- "pooling_kernel_size": 3,
70
- "resample": 3,
71
- "rescale_factor": 0.00392156862745098,
72
- "return_metadata": false,
73
- "video_processor_type": "Gemma4VideoProcessor"
74
- }
75
  }
 
1
  {
 
2
  "audio_seq_length": 750,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "image_processor": {
4
  "do_convert_rgb": true,
5
  "do_normalize": false,
 
21
  "patch_size": 16,
22
  "pooling_kernel_size": 3,
23
  "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
  },
30
  "image_seq_length": 280,
31
+ "processor_class": "Gemma4Processor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
- size 32169626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12bac982b793c44b03d52a250a9f0d0b666813da566b910c24a6da0695fd11e6
3
+ size 32170070
tokenizer_config.json CHANGED
@@ -17,52 +17,32 @@
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
 
20
  "mask_token": "<mask>",
21
  "model_max_length": 1000000000000000019884624838656,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "pad_token": "<pad>",
23
  "padding_side": "left",
24
  "processor_class": "Gemma4Processor",
25
- "response_schema": {
26
- "type": "object",
27
- "properties": {
28
- "role": {
29
- "const": "assistant"
30
- },
31
- "thinking": {
32
- "type": "string"
33
- },
34
- "content": {
35
- "type": "string"
36
- },
37
- "tool_calls": {
38
- "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
- "type": "array",
40
- "items": {
41
- "type": "object",
42
- "properties": {
43
- "type": {
44
- "const": "function"
45
- },
46
- "function": {
47
- "type": "object",
48
- "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
- "properties": {
50
- "name": {
51
- "type": "string"
52
- },
53
- "arguments": {
54
- "type": "object",
55
- "x-parser": "gemma4-tool-call",
56
- "additionalProperties": {}
57
- }
58
- }
59
- }
60
- }
61
- }
62
- }
63
- },
64
- "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
65
- },
66
  "soc_token": "<|channel>",
67
  "sot_token": "<|turn>",
68
  "stc_token": "<|tool_call>",
 
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
20
+ "is_local": true,
21
  "mask_token": "<mask>",
22
  "model_max_length": 1000000000000000019884624838656,
23
+ "model_specific_special_tokens": {
24
+ "audio_token": "<|audio|>",
25
+ "boa_token": "<|audio>",
26
+ "boi_token": "<|image>",
27
+ "eoa_token": "<audio|>",
28
+ "eoc_token": "<channel|>",
29
+ "eoi_token": "<image|>",
30
+ "eot_token": "<turn|>",
31
+ "escape_token": "<|\"|>",
32
+ "etc_token": "<tool_call|>",
33
+ "etd_token": "<tool|>",
34
+ "etr_token": "<tool_response|>",
35
+ "image_token": "<|image|>",
36
+ "soc_token": "<|channel>",
37
+ "sot_token": "<|turn>",
38
+ "stc_token": "<|tool_call>",
39
+ "std_token": "<|tool>",
40
+ "str_token": "<|tool_response>",
41
+ "think_token": "<|think|>"
42
+ },
43
  "pad_token": "<pad>",
44
  "padding_side": "left",
45
  "processor_class": "Gemma4Processor",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  "soc_token": "<|channel>",
47
  "sot_token": "<|turn>",
48
  "stc_token": "<|tool_call>",