lthn commited on
Commit
8ca451e
·
verified ·
1 Parent(s): fc64826

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +347 -348
config.json CHANGED
@@ -1,354 +1,353 @@
1
  {
2
- "architectures": [
3
- "Gemma4ForConditionalGeneration"
4
- ],
5
- "audio_config": null,
6
- "audio_token_id": 258881,
7
- "boa_token_id": 256000,
8
- "boi_token_id": 255999,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "dtype": "bfloat16",
10
- "eoa_token_id": 258883,
11
- "eoa_token_index": 258883,
12
- "eoi_token_id": 258882,
13
- "eos_token_id": [
14
- 1,
15
- 106,
16
- 50
17
- ],
18
- "image_token_id": 258880,
19
  "initializer_range": 0.02,
20
- "model_type": "gemma4",
21
- "quantization": {
22
- "group_size": 64,
23
- "bits": 8,
24
- "mode": "affine",
25
- "language_model.model.layers.0.router.proj": {
26
- "group_size": 64,
27
- "bits": 8
28
- },
29
- "language_model.model.layers.1.router.proj": {
30
- "group_size": 64,
31
- "bits": 8
32
- },
33
- "language_model.model.layers.2.router.proj": {
34
- "group_size": 64,
35
- "bits": 8
36
- },
37
- "language_model.model.layers.3.router.proj": {
38
- "group_size": 64,
39
- "bits": 8
40
- },
41
- "language_model.model.layers.4.router.proj": {
42
- "group_size": 64,
43
- "bits": 8
44
- },
45
- "language_model.model.layers.5.router.proj": {
46
- "group_size": 64,
47
- "bits": 8
48
- },
49
- "language_model.model.layers.6.router.proj": {
50
- "group_size": 64,
51
- "bits": 8
52
- },
53
- "language_model.model.layers.7.router.proj": {
54
- "group_size": 64,
55
- "bits": 8
56
- },
57
- "language_model.model.layers.8.router.proj": {
58
- "group_size": 64,
59
- "bits": 8
60
- },
61
- "language_model.model.layers.9.router.proj": {
62
- "group_size": 64,
63
- "bits": 8
64
- },
65
- "language_model.model.layers.10.router.proj": {
66
- "group_size": 64,
67
- "bits": 8
68
- },
69
- "language_model.model.layers.11.router.proj": {
70
- "group_size": 64,
71
- "bits": 8
72
- },
73
- "language_model.model.layers.12.router.proj": {
74
- "group_size": 64,
75
- "bits": 8
76
- },
77
- "language_model.model.layers.13.router.proj": {
78
- "group_size": 64,
79
- "bits": 8
80
- },
81
- "language_model.model.layers.14.router.proj": {
82
- "group_size": 64,
83
- "bits": 8
84
- },
85
- "language_model.model.layers.15.router.proj": {
86
- "group_size": 64,
87
- "bits": 8
88
- },
89
- "language_model.model.layers.16.router.proj": {
90
- "group_size": 64,
91
- "bits": 8
92
- },
93
- "language_model.model.layers.17.router.proj": {
94
- "group_size": 64,
95
- "bits": 8
96
- },
97
- "language_model.model.layers.18.router.proj": {
98
- "group_size": 64,
99
- "bits": 8
100
- },
101
- "language_model.model.layers.19.router.proj": {
102
- "group_size": 64,
103
- "bits": 8
104
- },
105
- "language_model.model.layers.20.router.proj": {
106
- "group_size": 64,
107
- "bits": 8
108
- },
109
- "language_model.model.layers.21.router.proj": {
110
- "group_size": 64,
111
- "bits": 8
112
- },
113
- "language_model.model.layers.22.router.proj": {
114
- "group_size": 64,
115
- "bits": 8
116
- },
117
- "language_model.model.layers.23.router.proj": {
118
- "group_size": 64,
119
- "bits": 8
120
- },
121
- "language_model.model.layers.24.router.proj": {
122
- "group_size": 64,
123
- "bits": 8
124
- },
125
- "language_model.model.layers.25.router.proj": {
126
- "group_size": 64,
127
- "bits": 8
128
- },
129
- "language_model.model.layers.26.router.proj": {
130
- "group_size": 64,
131
- "bits": 8
132
- },
133
- "language_model.model.layers.27.router.proj": {
134
- "group_size": 64,
135
- "bits": 8
136
- },
137
- "language_model.model.layers.28.router.proj": {
138
- "group_size": 64,
139
- "bits": 8
140
- },
141
- "language_model.model.layers.29.router.proj": {
142
- "group_size": 64,
143
- "bits": 8
144
- }
145
- },
146
- "quantization_config": {
147
- "group_size": 64,
148
- "bits": 8,
149
- "mode": "affine",
150
- "language_model.model.layers.0.router.proj": {
151
- "group_size": 64,
152
- "bits": 8
153
- },
154
- "language_model.model.layers.1.router.proj": {
155
- "group_size": 64,
156
- "bits": 8
157
- },
158
- "language_model.model.layers.2.router.proj": {
159
- "group_size": 64,
160
- "bits": 8
161
- },
162
- "language_model.model.layers.3.router.proj": {
163
- "group_size": 64,
164
- "bits": 8
165
- },
166
- "language_model.model.layers.4.router.proj": {
167
- "group_size": 64,
168
- "bits": 8
169
- },
170
- "language_model.model.layers.5.router.proj": {
171
- "group_size": 64,
172
- "bits": 8
173
- },
174
- "language_model.model.layers.6.router.proj": {
175
- "group_size": 64,
176
- "bits": 8
177
- },
178
- "language_model.model.layers.7.router.proj": {
179
- "group_size": 64,
180
- "bits": 8
181
- },
182
- "language_model.model.layers.8.router.proj": {
183
- "group_size": 64,
184
- "bits": 8
185
- },
186
- "language_model.model.layers.9.router.proj": {
187
- "group_size": 64,
188
- "bits": 8
189
- },
190
- "language_model.model.layers.10.router.proj": {
191
- "group_size": 64,
192
- "bits": 8
193
- },
194
- "language_model.model.layers.11.router.proj": {
195
- "group_size": 64,
196
- "bits": 8
197
- },
198
- "language_model.model.layers.12.router.proj": {
199
- "group_size": 64,
200
- "bits": 8
201
- },
202
- "language_model.model.layers.13.router.proj": {
203
- "group_size": 64,
204
- "bits": 8
205
- },
206
- "language_model.model.layers.14.router.proj": {
207
- "group_size": 64,
208
- "bits": 8
209
- },
210
- "language_model.model.layers.15.router.proj": {
211
- "group_size": 64,
212
- "bits": 8
213
- },
214
- "language_model.model.layers.16.router.proj": {
215
- "group_size": 64,
216
- "bits": 8
217
- },
218
- "language_model.model.layers.17.router.proj": {
219
- "group_size": 64,
220
- "bits": 8
221
- },
222
- "language_model.model.layers.18.router.proj": {
223
- "group_size": 64,
224
- "bits": 8
225
- },
226
- "language_model.model.layers.19.router.proj": {
227
- "group_size": 64,
228
- "bits": 8
229
- },
230
- "language_model.model.layers.20.router.proj": {
231
- "group_size": 64,
232
- "bits": 8
233
- },
234
- "language_model.model.layers.21.router.proj": {
235
- "group_size": 64,
236
- "bits": 8
237
- },
238
- "language_model.model.layers.22.router.proj": {
239
- "group_size": 64,
240
- "bits": 8
241
- },
242
- "language_model.model.layers.23.router.proj": {
243
- "group_size": 64,
244
- "bits": 8
245
- },
246
- "language_model.model.layers.24.router.proj": {
247
- "group_size": 64,
248
- "bits": 8
249
- },
250
- "language_model.model.layers.25.router.proj": {
251
- "group_size": 64,
252
- "bits": 8
253
- },
254
- "language_model.model.layers.26.router.proj": {
255
- "group_size": 64,
256
- "bits": 8
257
- },
258
- "language_model.model.layers.27.router.proj": {
259
- "group_size": 64,
260
- "bits": 8
261
- },
262
- "language_model.model.layers.28.router.proj": {
263
- "group_size": 64,
264
- "bits": 8
265
- },
266
- "language_model.model.layers.29.router.proj": {
267
- "group_size": 64,
268
- "bits": 8
269
- }
270
- },
271
- "text_config": {
272
- "attention_bias": false,
273
- "attention_dropout": 0.0,
274
- "attention_k_eq_v": true,
275
- "bos_token_id": 2,
276
- "dtype": "bfloat16",
277
- "enable_moe_block": true,
278
- "eos_token_id": 1,
279
- "final_logit_softcapping": 30.0,
280
- "global_head_dim": 512,
281
- "head_dim": 256,
282
- "hidden_activation": "gelu_pytorch_tanh",
283
- "hidden_size": 2816,
284
- "hidden_size_per_layer_input": 0,
285
- "initializer_range": 0.02,
286
- "intermediate_size": 2112,
287
- "layer_types": [
288
- "sliding_attention",
289
- "sliding_attention",
290
- "sliding_attention",
291
- "sliding_attention",
292
- "sliding_attention",
293
- "full_attention",
294
- "sliding_attention",
295
- "sliding_attention",
296
- "sliding_attention",
297
- "sliding_attention",
298
- "sliding_attention",
299
- "full_attention",
300
- "sliding_attention",
301
- "sliding_attention",
302
- "sliding_attention",
303
- "sliding_attention",
304
- "sliding_attention",
305
- "full_attention",
306
- "sliding_attention",
307
- "sliding_attention",
308
- "sliding_attention",
309
- "sliding_attention",
310
- "sliding_attention",
311
- "full_attention",
312
- "sliding_attention",
313
- "sliding_attention",
314
- "sliding_attention",
315
- "sliding_attention",
316
- "sliding_attention",
317
- "full_attention"
318
- ],
319
- "max_position_embeddings": 262144,
320
- "model_type": "gemma4_text",
321
- "moe_intermediate_size": 704,
322
- "num_attention_heads": 16,
323
- "num_experts": 128,
324
- "num_global_key_value_heads": 2,
325
- "num_hidden_layers": 30,
326
- "num_key_value_heads": 8,
327
- "num_kv_shared_layers": 0,
328
- "pad_token_id": 0,
329
- "rms_norm_eps": 1e-06,
330
- "rope_parameters": {
331
- "full_attention": {
332
- "partial_rotary_factor": 0.25,
333
- "rope_theta": 1000000.0,
334
- "rope_type": "proportional"
335
- },
336
- "sliding_attention": {
337
- "rope_theta": 10000.0,
338
- "rope_type": "default"
339
- }
340
- },
341
- "sliding_window": 1024,
342
- "tie_word_embeddings": true,
343
- "top_k_experts": 8,
344
- "use_bidirectional_attention": "vision",
345
- "use_cache": true,
346
- "use_double_wide_mlp": false,
347
- "vocab_size": 262144,
348
- "vocab_size_per_layer_input": 262144
349
  },
 
350
  "tie_word_embeddings": true,
351
- "transformers_version": "5.5.0.dev0",
352
- "video_token_id": 258884,
353
- "vision_soft_tokens_per_image": 280
 
 
 
 
 
 
 
 
354
  }
 
1
  {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": null,
6
+ "audio_token_id": 258881,
7
+ "boa_token_id": 256000,
8
+ "boi_token_id": 255999,
9
+ "dtype": "bfloat16",
10
+ "eoa_token_id": 258883,
11
+ "eoa_token_index": 258883,
12
+ "eoi_token_id": 258882,
13
+ "eos_token_id": [
14
+ 1,
15
+ 106
16
+ ],
17
+ "image_token_id": 258880,
18
+ "initializer_range": 0.02,
19
+ "model_type": "gemma4",
20
+ "quantization": {
21
+ "group_size": 64,
22
+ "bits": 4,
23
+ "mode": "affine",
24
+ "language_model.model.layers.0.router.proj": {
25
+ "group_size": 64,
26
+ "bits": 8
27
+ },
28
+ "language_model.model.layers.1.router.proj": {
29
+ "group_size": 64,
30
+ "bits": 8
31
+ },
32
+ "language_model.model.layers.2.router.proj": {
33
+ "group_size": 64,
34
+ "bits": 8
35
+ },
36
+ "language_model.model.layers.3.router.proj": {
37
+ "group_size": 64,
38
+ "bits": 8
39
+ },
40
+ "language_model.model.layers.4.router.proj": {
41
+ "group_size": 64,
42
+ "bits": 8
43
+ },
44
+ "language_model.model.layers.5.router.proj": {
45
+ "group_size": 64,
46
+ "bits": 8
47
+ },
48
+ "language_model.model.layers.6.router.proj": {
49
+ "group_size": 64,
50
+ "bits": 8
51
+ },
52
+ "language_model.model.layers.7.router.proj": {
53
+ "group_size": 64,
54
+ "bits": 8
55
+ },
56
+ "language_model.model.layers.8.router.proj": {
57
+ "group_size": 64,
58
+ "bits": 8
59
+ },
60
+ "language_model.model.layers.9.router.proj": {
61
+ "group_size": 64,
62
+ "bits": 8
63
+ },
64
+ "language_model.model.layers.10.router.proj": {
65
+ "group_size": 64,
66
+ "bits": 8
67
+ },
68
+ "language_model.model.layers.11.router.proj": {
69
+ "group_size": 64,
70
+ "bits": 8
71
+ },
72
+ "language_model.model.layers.12.router.proj": {
73
+ "group_size": 64,
74
+ "bits": 8
75
+ },
76
+ "language_model.model.layers.13.router.proj": {
77
+ "group_size": 64,
78
+ "bits": 8
79
+ },
80
+ "language_model.model.layers.14.router.proj": {
81
+ "group_size": 64,
82
+ "bits": 8
83
+ },
84
+ "language_model.model.layers.15.router.proj": {
85
+ "group_size": 64,
86
+ "bits": 8
87
+ },
88
+ "language_model.model.layers.16.router.proj": {
89
+ "group_size": 64,
90
+ "bits": 8
91
+ },
92
+ "language_model.model.layers.17.router.proj": {
93
+ "group_size": 64,
94
+ "bits": 8
95
+ },
96
+ "language_model.model.layers.18.router.proj": {
97
+ "group_size": 64,
98
+ "bits": 8
99
+ },
100
+ "language_model.model.layers.19.router.proj": {
101
+ "group_size": 64,
102
+ "bits": 8
103
+ },
104
+ "language_model.model.layers.20.router.proj": {
105
+ "group_size": 64,
106
+ "bits": 8
107
+ },
108
+ "language_model.model.layers.21.router.proj": {
109
+ "group_size": 64,
110
+ "bits": 8
111
+ },
112
+ "language_model.model.layers.22.router.proj": {
113
+ "group_size": 64,
114
+ "bits": 8
115
+ },
116
+ "language_model.model.layers.23.router.proj": {
117
+ "group_size": 64,
118
+ "bits": 8
119
+ },
120
+ "language_model.model.layers.24.router.proj": {
121
+ "group_size": 64,
122
+ "bits": 8
123
+ },
124
+ "language_model.model.layers.25.router.proj": {
125
+ "group_size": 64,
126
+ "bits": 8
127
+ },
128
+ "language_model.model.layers.26.router.proj": {
129
+ "group_size": 64,
130
+ "bits": 8
131
+ },
132
+ "language_model.model.layers.27.router.proj": {
133
+ "group_size": 64,
134
+ "bits": 8
135
+ },
136
+ "language_model.model.layers.28.router.proj": {
137
+ "group_size": 64,
138
+ "bits": 8
139
+ },
140
+ "language_model.model.layers.29.router.proj": {
141
+ "group_size": 64,
142
+ "bits": 8
143
+ }
144
+ },
145
+ "quantization_config": {
146
+ "group_size": 64,
147
+ "bits": 4,
148
+ "mode": "affine",
149
+ "language_model.model.layers.0.router.proj": {
150
+ "group_size": 64,
151
+ "bits": 8
152
+ },
153
+ "language_model.model.layers.1.router.proj": {
154
+ "group_size": 64,
155
+ "bits": 8
156
+ },
157
+ "language_model.model.layers.2.router.proj": {
158
+ "group_size": 64,
159
+ "bits": 8
160
+ },
161
+ "language_model.model.layers.3.router.proj": {
162
+ "group_size": 64,
163
+ "bits": 8
164
+ },
165
+ "language_model.model.layers.4.router.proj": {
166
+ "group_size": 64,
167
+ "bits": 8
168
+ },
169
+ "language_model.model.layers.5.router.proj": {
170
+ "group_size": 64,
171
+ "bits": 8
172
+ },
173
+ "language_model.model.layers.6.router.proj": {
174
+ "group_size": 64,
175
+ "bits": 8
176
+ },
177
+ "language_model.model.layers.7.router.proj": {
178
+ "group_size": 64,
179
+ "bits": 8
180
+ },
181
+ "language_model.model.layers.8.router.proj": {
182
+ "group_size": 64,
183
+ "bits": 8
184
+ },
185
+ "language_model.model.layers.9.router.proj": {
186
+ "group_size": 64,
187
+ "bits": 8
188
+ },
189
+ "language_model.model.layers.10.router.proj": {
190
+ "group_size": 64,
191
+ "bits": 8
192
+ },
193
+ "language_model.model.layers.11.router.proj": {
194
+ "group_size": 64,
195
+ "bits": 8
196
+ },
197
+ "language_model.model.layers.12.router.proj": {
198
+ "group_size": 64,
199
+ "bits": 8
200
+ },
201
+ "language_model.model.layers.13.router.proj": {
202
+ "group_size": 64,
203
+ "bits": 8
204
+ },
205
+ "language_model.model.layers.14.router.proj": {
206
+ "group_size": 64,
207
+ "bits": 8
208
+ },
209
+ "language_model.model.layers.15.router.proj": {
210
+ "group_size": 64,
211
+ "bits": 8
212
+ },
213
+ "language_model.model.layers.16.router.proj": {
214
+ "group_size": 64,
215
+ "bits": 8
216
+ },
217
+ "language_model.model.layers.17.router.proj": {
218
+ "group_size": 64,
219
+ "bits": 8
220
+ },
221
+ "language_model.model.layers.18.router.proj": {
222
+ "group_size": 64,
223
+ "bits": 8
224
+ },
225
+ "language_model.model.layers.19.router.proj": {
226
+ "group_size": 64,
227
+ "bits": 8
228
+ },
229
+ "language_model.model.layers.20.router.proj": {
230
+ "group_size": 64,
231
+ "bits": 8
232
+ },
233
+ "language_model.model.layers.21.router.proj": {
234
+ "group_size": 64,
235
+ "bits": 8
236
+ },
237
+ "language_model.model.layers.22.router.proj": {
238
+ "group_size": 64,
239
+ "bits": 8
240
+ },
241
+ "language_model.model.layers.23.router.proj": {
242
+ "group_size": 64,
243
+ "bits": 8
244
+ },
245
+ "language_model.model.layers.24.router.proj": {
246
+ "group_size": 64,
247
+ "bits": 8
248
+ },
249
+ "language_model.model.layers.25.router.proj": {
250
+ "group_size": 64,
251
+ "bits": 8
252
+ },
253
+ "language_model.model.layers.26.router.proj": {
254
+ "group_size": 64,
255
+ "bits": 8
256
+ },
257
+ "language_model.model.layers.27.router.proj": {
258
+ "group_size": 64,
259
+ "bits": 8
260
+ },
261
+ "language_model.model.layers.28.router.proj": {
262
+ "group_size": 64,
263
+ "bits": 8
264
+ },
265
+ "language_model.model.layers.29.router.proj": {
266
+ "group_size": 64,
267
+ "bits": 8
268
+ }
269
+ },
270
+ "text_config": {
271
+ "attention_bias": false,
272
+ "attention_dropout": 0.0,
273
+ "attention_k_eq_v": true,
274
+ "bos_token_id": 2,
275
  "dtype": "bfloat16",
276
+ "enable_moe_block": true,
277
+ "eos_token_id": 1,
278
+ "final_logit_softcapping": 30.0,
279
+ "global_head_dim": 512,
280
+ "head_dim": 256,
281
+ "hidden_activation": "gelu_pytorch_tanh",
282
+ "hidden_size": 2816,
283
+ "hidden_size_per_layer_input": 0,
 
284
  "initializer_range": 0.02,
285
+ "intermediate_size": 2112,
286
+ "layer_types": [
287
+ "sliding_attention",
288
+ "sliding_attention",
289
+ "sliding_attention",
290
+ "sliding_attention",
291
+ "sliding_attention",
292
+ "full_attention",
293
+ "sliding_attention",
294
+ "sliding_attention",
295
+ "sliding_attention",
296
+ "sliding_attention",
297
+ "sliding_attention",
298
+ "full_attention",
299
+ "sliding_attention",
300
+ "sliding_attention",
301
+ "sliding_attention",
302
+ "sliding_attention",
303
+ "sliding_attention",
304
+ "full_attention",
305
+ "sliding_attention",
306
+ "sliding_attention",
307
+ "sliding_attention",
308
+ "sliding_attention",
309
+ "sliding_attention",
310
+ "full_attention",
311
+ "sliding_attention",
312
+ "sliding_attention",
313
+ "sliding_attention",
314
+ "sliding_attention",
315
+ "sliding_attention",
316
+ "full_attention"
317
+ ],
318
+ "max_position_embeddings": 262144,
319
+ "model_type": "gemma4_text",
320
+ "moe_intermediate_size": 704,
321
+ "num_attention_heads": 16,
322
+ "num_experts": 128,
323
+ "num_global_key_value_heads": 2,
324
+ "num_hidden_layers": 30,
325
+ "num_key_value_heads": 8,
326
+ "num_kv_shared_layers": 0,
327
+ "pad_token_id": 0,
328
+ "rms_norm_eps": 1e-06,
329
+ "rope_parameters": {
330
+ "full_attention": {
331
+ "partial_rotary_factor": 0.25,
332
+ "rope_theta": 1000000.0,
333
+ "rope_type": "proportional"
334
+ },
335
+ "sliding_attention": {
336
+ "rope_theta": 10000.0,
337
+ "rope_type": "default"
338
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  },
340
+ "sliding_window": 1024,
341
  "tie_word_embeddings": true,
342
+ "top_k_experts": 8,
343
+ "use_bidirectional_attention": "vision",
344
+ "use_cache": true,
345
+ "use_double_wide_mlp": false,
346
+ "vocab_size": 262144,
347
+ "vocab_size_per_layer_input": 262144
348
+ },
349
+ "tie_word_embeddings": true,
350
+ "transformers_version": "5.5.0.dev0",
351
+ "video_token_id": 258884,
352
+ "vision_soft_tokens_per_image": 280
353
  }