lthn commited on
Commit
f9aae13
·
verified ·
1 Parent(s): 5bea3ed

fix(config): sync with upstream Google Gemma 4 day-1 fixes (b446025, b4a6011)

Browse files

- Restore vision_config block; clean eos_token_id to [1, 106]
- Align generation_config.json transformers_version field

Preserves local tokenizer_config.json (model_specific_special_tokens embedded into model during LEK training).

Files changed (1) hide show
  1. config.json +189 -1208
config.json CHANGED
@@ -1,1216 +1,197 @@
1
  {
2
- "architectures": [
3
- "Gemma4ForConditionalGeneration"
4
- ],
5
- "audio_config": {
6
- "_name_or_path": "",
7
- "architectures": null,
8
- "attention_chunk_size": 12,
9
- "attention_context_left": 13,
10
- "attention_context_right": 0,
11
- "attention_invalid_logits_value": -1000000000.0,
12
- "attention_logit_cap": 50.0,
13
- "chunk_size_feed_forward": 0,
14
- "conv_kernel_size": 5,
15
- "dtype": "bfloat16",
16
- "gradient_clipping": 10000000000.0,
17
- "hidden_act": "silu",
18
- "hidden_size": 1024,
19
- "id2label": {
20
- "0": "LABEL_0",
21
- "1": "LABEL_1"
22
- },
23
- "initializer_range": 0.02,
24
- "is_encoder_decoder": false,
25
- "label2id": {
26
- "LABEL_0": 0,
27
- "LABEL_1": 1
28
- },
29
- "model_type": "gemma4_audio",
30
- "num_attention_heads": 8,
31
- "num_hidden_layers": 12,
32
- "output_attentions": false,
33
- "output_hidden_states": false,
34
- "output_proj_dims": 1536,
35
- "problem_type": null,
36
- "residual_weight": 0.5,
37
- "return_dict": true,
38
- "rms_norm_eps": 1e-06,
39
- "subsampling_conv_channels": [
40
- 128,
41
- 32
42
- ],
43
- "use_clipped_linears": true
44
- },
45
- "audio_token_id": 258881,
46
- "boa_token_id": 256000,
47
- "boi_token_id": 255999,
48
  "dtype": "bfloat16",
49
- "eoa_token_id": 258883,
50
- "eoa_token_index": 258883,
51
- "eoi_token_id": 258882,
52
- "eos_token_id": [
53
- 1,
54
- 106,
55
- 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  ],
57
- "image_token_id": 258880,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  "initializer_range": 0.02,
59
- "model_type": "gemma4",
60
- "quantization": {
61
- "group_size": 64,
62
- "bits": 4,
63
- "mode": "affine",
64
- "language_model.model.layers.0.mlp.gate_proj": {
65
- "group_size": 64,
66
- "bits": 8
67
- },
68
- "language_model.model.layers.0.mlp.down_proj": {
69
- "group_size": 64,
70
- "bits": 8
71
- },
72
- "language_model.model.layers.0.mlp.up_proj": {
73
- "group_size": 64,
74
- "bits": 8
75
- },
76
- "language_model.model.layers.1.mlp.gate_proj": {
77
- "group_size": 64,
78
- "bits": 8
79
- },
80
- "language_model.model.layers.1.mlp.down_proj": {
81
- "group_size": 64,
82
- "bits": 8
83
- },
84
- "language_model.model.layers.1.mlp.up_proj": {
85
- "group_size": 64,
86
- "bits": 8
87
- },
88
- "language_model.model.layers.2.mlp.gate_proj": {
89
- "group_size": 64,
90
- "bits": 8
91
- },
92
- "language_model.model.layers.2.mlp.down_proj": {
93
- "group_size": 64,
94
- "bits": 8
95
- },
96
- "language_model.model.layers.2.mlp.up_proj": {
97
- "group_size": 64,
98
- "bits": 8
99
- },
100
- "language_model.model.layers.3.mlp.gate_proj": {
101
- "group_size": 64,
102
- "bits": 8
103
- },
104
- "language_model.model.layers.3.mlp.down_proj": {
105
- "group_size": 64,
106
- "bits": 8
107
- },
108
- "language_model.model.layers.3.mlp.up_proj": {
109
- "group_size": 64,
110
- "bits": 8
111
- },
112
- "language_model.model.layers.4.mlp.gate_proj": {
113
- "group_size": 64,
114
- "bits": 8
115
- },
116
- "language_model.model.layers.4.mlp.down_proj": {
117
- "group_size": 64,
118
- "bits": 8
119
- },
120
- "language_model.model.layers.4.mlp.up_proj": {
121
- "group_size": 64,
122
- "bits": 8
123
- },
124
- "language_model.model.layers.5.mlp.gate_proj": {
125
- "group_size": 64,
126
- "bits": 8
127
- },
128
- "language_model.model.layers.5.mlp.down_proj": {
129
- "group_size": 64,
130
- "bits": 8
131
- },
132
- "language_model.model.layers.5.mlp.up_proj": {
133
- "group_size": 64,
134
- "bits": 8
135
- },
136
- "language_model.model.layers.6.mlp.gate_proj": {
137
- "group_size": 64,
138
- "bits": 8
139
- },
140
- "language_model.model.layers.6.mlp.down_proj": {
141
- "group_size": 64,
142
- "bits": 8
143
- },
144
- "language_model.model.layers.6.mlp.up_proj": {
145
- "group_size": 64,
146
- "bits": 8
147
- },
148
- "language_model.model.layers.7.mlp.gate_proj": {
149
- "group_size": 64,
150
- "bits": 8
151
- },
152
- "language_model.model.layers.7.mlp.down_proj": {
153
- "group_size": 64,
154
- "bits": 8
155
- },
156
- "language_model.model.layers.7.mlp.up_proj": {
157
- "group_size": 64,
158
- "bits": 8
159
- },
160
- "language_model.model.layers.8.mlp.gate_proj": {
161
- "group_size": 64,
162
- "bits": 8
163
- },
164
- "language_model.model.layers.8.mlp.down_proj": {
165
- "group_size": 64,
166
- "bits": 8
167
- },
168
- "language_model.model.layers.8.mlp.up_proj": {
169
- "group_size": 64,
170
- "bits": 8
171
- },
172
- "language_model.model.layers.9.mlp.gate_proj": {
173
- "group_size": 64,
174
- "bits": 8
175
- },
176
- "language_model.model.layers.9.mlp.down_proj": {
177
- "group_size": 64,
178
- "bits": 8
179
- },
180
- "language_model.model.layers.9.mlp.up_proj": {
181
- "group_size": 64,
182
- "bits": 8
183
- },
184
- "language_model.model.layers.10.mlp.gate_proj": {
185
- "group_size": 64,
186
- "bits": 8
187
- },
188
- "language_model.model.layers.10.mlp.down_proj": {
189
- "group_size": 64,
190
- "bits": 8
191
- },
192
- "language_model.model.layers.10.mlp.up_proj": {
193
- "group_size": 64,
194
- "bits": 8
195
- },
196
- "language_model.model.layers.11.mlp.gate_proj": {
197
- "group_size": 64,
198
- "bits": 8
199
- },
200
- "language_model.model.layers.11.mlp.down_proj": {
201
- "group_size": 64,
202
- "bits": 8
203
- },
204
- "language_model.model.layers.11.mlp.up_proj": {
205
- "group_size": 64,
206
- "bits": 8
207
- },
208
- "language_model.model.layers.12.mlp.gate_proj": {
209
- "group_size": 64,
210
- "bits": 8
211
- },
212
- "language_model.model.layers.12.mlp.down_proj": {
213
- "group_size": 64,
214
- "bits": 8
215
- },
216
- "language_model.model.layers.12.mlp.up_proj": {
217
- "group_size": 64,
218
- "bits": 8
219
- },
220
- "language_model.model.layers.13.mlp.gate_proj": {
221
- "group_size": 64,
222
- "bits": 8
223
- },
224
- "language_model.model.layers.13.mlp.down_proj": {
225
- "group_size": 64,
226
- "bits": 8
227
- },
228
- "language_model.model.layers.13.mlp.up_proj": {
229
- "group_size": 64,
230
- "bits": 8
231
- },
232
- "language_model.model.layers.14.mlp.gate_proj": {
233
- "group_size": 64,
234
- "bits": 8
235
- },
236
- "language_model.model.layers.14.mlp.down_proj": {
237
- "group_size": 64,
238
- "bits": 8
239
- },
240
- "language_model.model.layers.14.mlp.up_proj": {
241
- "group_size": 64,
242
- "bits": 8
243
- },
244
- "language_model.model.layers.15.mlp.gate_proj": {
245
- "group_size": 64,
246
- "bits": 8
247
- },
248
- "language_model.model.layers.15.mlp.down_proj": {
249
- "group_size": 64,
250
- "bits": 8
251
- },
252
- "language_model.model.layers.15.mlp.up_proj": {
253
- "group_size": 64,
254
- "bits": 8
255
- },
256
- "language_model.model.layers.16.mlp.gate_proj": {
257
- "group_size": 64,
258
- "bits": 8
259
- },
260
- "language_model.model.layers.16.mlp.down_proj": {
261
- "group_size": 64,
262
- "bits": 8
263
- },
264
- "language_model.model.layers.16.mlp.up_proj": {
265
- "group_size": 64,
266
- "bits": 8
267
- },
268
- "language_model.model.layers.17.mlp.gate_proj": {
269
- "group_size": 64,
270
- "bits": 8
271
- },
272
- "language_model.model.layers.17.mlp.down_proj": {
273
- "group_size": 64,
274
- "bits": 8
275
- },
276
- "language_model.model.layers.17.mlp.up_proj": {
277
- "group_size": 64,
278
- "bits": 8
279
- },
280
- "language_model.model.layers.18.mlp.gate_proj": {
281
- "group_size": 64,
282
- "bits": 8
283
- },
284
- "language_model.model.layers.18.mlp.down_proj": {
285
- "group_size": 64,
286
- "bits": 8
287
- },
288
- "language_model.model.layers.18.mlp.up_proj": {
289
- "group_size": 64,
290
- "bits": 8
291
- },
292
- "language_model.model.layers.19.mlp.gate_proj": {
293
- "group_size": 64,
294
- "bits": 8
295
- },
296
- "language_model.model.layers.19.mlp.down_proj": {
297
- "group_size": 64,
298
- "bits": 8
299
- },
300
- "language_model.model.layers.19.mlp.up_proj": {
301
- "group_size": 64,
302
- "bits": 8
303
- },
304
- "language_model.model.layers.20.mlp.gate_proj": {
305
- "group_size": 64,
306
- "bits": 8
307
- },
308
- "language_model.model.layers.20.mlp.down_proj": {
309
- "group_size": 64,
310
- "bits": 8
311
- },
312
- "language_model.model.layers.20.mlp.up_proj": {
313
- "group_size": 64,
314
- "bits": 8
315
- },
316
- "language_model.model.layers.21.mlp.gate_proj": {
317
- "group_size": 64,
318
- "bits": 8
319
- },
320
- "language_model.model.layers.21.mlp.down_proj": {
321
- "group_size": 64,
322
- "bits": 8
323
- },
324
- "language_model.model.layers.21.mlp.up_proj": {
325
- "group_size": 64,
326
- "bits": 8
327
- },
328
- "language_model.model.layers.22.mlp.gate_proj": {
329
- "group_size": 64,
330
- "bits": 8
331
- },
332
- "language_model.model.layers.22.mlp.down_proj": {
333
- "group_size": 64,
334
- "bits": 8
335
- },
336
- "language_model.model.layers.22.mlp.up_proj": {
337
- "group_size": 64,
338
- "bits": 8
339
- },
340
- "language_model.model.layers.23.mlp.gate_proj": {
341
- "group_size": 64,
342
- "bits": 8
343
- },
344
- "language_model.model.layers.23.mlp.down_proj": {
345
- "group_size": 64,
346
- "bits": 8
347
- },
348
- "language_model.model.layers.23.mlp.up_proj": {
349
- "group_size": 64,
350
- "bits": 8
351
- },
352
- "language_model.model.layers.24.mlp.gate_proj": {
353
- "group_size": 64,
354
- "bits": 8
355
- },
356
- "language_model.model.layers.24.mlp.down_proj": {
357
- "group_size": 64,
358
- "bits": 8
359
- },
360
- "language_model.model.layers.24.mlp.up_proj": {
361
- "group_size": 64,
362
- "bits": 8
363
- },
364
- "language_model.model.layers.25.mlp.gate_proj": {
365
- "group_size": 64,
366
- "bits": 8
367
- },
368
- "language_model.model.layers.25.mlp.down_proj": {
369
- "group_size": 64,
370
- "bits": 8
371
- },
372
- "language_model.model.layers.25.mlp.up_proj": {
373
- "group_size": 64,
374
- "bits": 8
375
- },
376
- "language_model.model.layers.26.mlp.gate_proj": {
377
- "group_size": 64,
378
- "bits": 8
379
- },
380
- "language_model.model.layers.26.mlp.down_proj": {
381
- "group_size": 64,
382
- "bits": 8
383
- },
384
- "language_model.model.layers.26.mlp.up_proj": {
385
- "group_size": 64,
386
- "bits": 8
387
- },
388
- "language_model.model.layers.27.mlp.gate_proj": {
389
- "group_size": 64,
390
- "bits": 8
391
- },
392
- "language_model.model.layers.27.mlp.down_proj": {
393
- "group_size": 64,
394
- "bits": 8
395
- },
396
- "language_model.model.layers.27.mlp.up_proj": {
397
- "group_size": 64,
398
- "bits": 8
399
- },
400
- "language_model.model.layers.28.mlp.gate_proj": {
401
- "group_size": 64,
402
- "bits": 8
403
- },
404
- "language_model.model.layers.28.mlp.down_proj": {
405
- "group_size": 64,
406
- "bits": 8
407
- },
408
- "language_model.model.layers.28.mlp.up_proj": {
409
- "group_size": 64,
410
- "bits": 8
411
- },
412
- "language_model.model.layers.29.mlp.gate_proj": {
413
- "group_size": 64,
414
- "bits": 8
415
- },
416
- "language_model.model.layers.29.mlp.down_proj": {
417
- "group_size": 64,
418
- "bits": 8
419
- },
420
- "language_model.model.layers.29.mlp.up_proj": {
421
- "group_size": 64,
422
- "bits": 8
423
- },
424
- "language_model.model.layers.30.mlp.gate_proj": {
425
- "group_size": 64,
426
- "bits": 8
427
- },
428
- "language_model.model.layers.30.mlp.down_proj": {
429
- "group_size": 64,
430
- "bits": 8
431
- },
432
- "language_model.model.layers.30.mlp.up_proj": {
433
- "group_size": 64,
434
- "bits": 8
435
- },
436
- "language_model.model.layers.31.mlp.gate_proj": {
437
- "group_size": 64,
438
- "bits": 8
439
- },
440
- "language_model.model.layers.31.mlp.down_proj": {
441
- "group_size": 64,
442
- "bits": 8
443
- },
444
- "language_model.model.layers.31.mlp.up_proj": {
445
- "group_size": 64,
446
- "bits": 8
447
- },
448
- "language_model.model.layers.32.mlp.gate_proj": {
449
- "group_size": 64,
450
- "bits": 8
451
- },
452
- "language_model.model.layers.32.mlp.down_proj": {
453
- "group_size": 64,
454
- "bits": 8
455
- },
456
- "language_model.model.layers.32.mlp.up_proj": {
457
- "group_size": 64,
458
- "bits": 8
459
- },
460
- "language_model.model.layers.33.mlp.gate_proj": {
461
- "group_size": 64,
462
- "bits": 8
463
- },
464
- "language_model.model.layers.33.mlp.down_proj": {
465
- "group_size": 64,
466
- "bits": 8
467
- },
468
- "language_model.model.layers.33.mlp.up_proj": {
469
- "group_size": 64,
470
- "bits": 8
471
- },
472
- "language_model.model.layers.34.mlp.gate_proj": {
473
- "group_size": 64,
474
- "bits": 8
475
- },
476
- "language_model.model.layers.34.mlp.down_proj": {
477
- "group_size": 64,
478
- "bits": 8
479
- },
480
- "language_model.model.layers.34.mlp.up_proj": {
481
- "group_size": 64,
482
- "bits": 8
483
- },
484
- "language_model.model.layers.35.mlp.gate_proj": {
485
- "group_size": 64,
486
- "bits": 8
487
- },
488
- "language_model.model.layers.35.mlp.down_proj": {
489
- "group_size": 64,
490
- "bits": 8
491
- },
492
- "language_model.model.layers.35.mlp.up_proj": {
493
- "group_size": 64,
494
- "bits": 8
495
- },
496
- "language_model.model.layers.36.mlp.gate_proj": {
497
- "group_size": 64,
498
- "bits": 8
499
- },
500
- "language_model.model.layers.36.mlp.down_proj": {
501
- "group_size": 64,
502
- "bits": 8
503
- },
504
- "language_model.model.layers.36.mlp.up_proj": {
505
- "group_size": 64,
506
- "bits": 8
507
- },
508
- "language_model.model.layers.37.mlp.gate_proj": {
509
- "group_size": 64,
510
- "bits": 8
511
- },
512
- "language_model.model.layers.37.mlp.down_proj": {
513
- "group_size": 64,
514
- "bits": 8
515
- },
516
- "language_model.model.layers.37.mlp.up_proj": {
517
- "group_size": 64,
518
- "bits": 8
519
- },
520
- "language_model.model.layers.38.mlp.gate_proj": {
521
- "group_size": 64,
522
- "bits": 8
523
- },
524
- "language_model.model.layers.38.mlp.down_proj": {
525
- "group_size": 64,
526
- "bits": 8
527
- },
528
- "language_model.model.layers.38.mlp.up_proj": {
529
- "group_size": 64,
530
- "bits": 8
531
- },
532
- "language_model.model.layers.39.mlp.gate_proj": {
533
- "group_size": 64,
534
- "bits": 8
535
- },
536
- "language_model.model.layers.39.mlp.down_proj": {
537
- "group_size": 64,
538
- "bits": 8
539
- },
540
- "language_model.model.layers.39.mlp.up_proj": {
541
- "group_size": 64,
542
- "bits": 8
543
- },
544
- "language_model.model.layers.40.mlp.gate_proj": {
545
- "group_size": 64,
546
- "bits": 8
547
- },
548
- "language_model.model.layers.40.mlp.down_proj": {
549
- "group_size": 64,
550
- "bits": 8
551
- },
552
- "language_model.model.layers.40.mlp.up_proj": {
553
- "group_size": 64,
554
- "bits": 8
555
- },
556
- "language_model.model.layers.41.mlp.gate_proj": {
557
- "group_size": 64,
558
- "bits": 8
559
- },
560
- "language_model.model.layers.41.mlp.down_proj": {
561
- "group_size": 64,
562
- "bits": 8
563
- },
564
- "language_model.model.layers.41.mlp.up_proj": {
565
- "group_size": 64,
566
- "bits": 8
567
- }
568
  },
569
- "quantization_config": {
570
- "group_size": 64,
571
- "bits": 4,
572
- "mode": "affine",
573
- "language_model.model.layers.0.mlp.gate_proj": {
574
- "group_size": 64,
575
- "bits": 8
576
- },
577
- "language_model.model.layers.0.mlp.down_proj": {
578
- "group_size": 64,
579
- "bits": 8
580
- },
581
- "language_model.model.layers.0.mlp.up_proj": {
582
- "group_size": 64,
583
- "bits": 8
584
- },
585
- "language_model.model.layers.1.mlp.gate_proj": {
586
- "group_size": 64,
587
- "bits": 8
588
- },
589
- "language_model.model.layers.1.mlp.down_proj": {
590
- "group_size": 64,
591
- "bits": 8
592
- },
593
- "language_model.model.layers.1.mlp.up_proj": {
594
- "group_size": 64,
595
- "bits": 8
596
- },
597
- "language_model.model.layers.2.mlp.gate_proj": {
598
- "group_size": 64,
599
- "bits": 8
600
- },
601
- "language_model.model.layers.2.mlp.down_proj": {
602
- "group_size": 64,
603
- "bits": 8
604
- },
605
- "language_model.model.layers.2.mlp.up_proj": {
606
- "group_size": 64,
607
- "bits": 8
608
- },
609
- "language_model.model.layers.3.mlp.gate_proj": {
610
- "group_size": 64,
611
- "bits": 8
612
- },
613
- "language_model.model.layers.3.mlp.down_proj": {
614
- "group_size": 64,
615
- "bits": 8
616
- },
617
- "language_model.model.layers.3.mlp.up_proj": {
618
- "group_size": 64,
619
- "bits": 8
620
- },
621
- "language_model.model.layers.4.mlp.gate_proj": {
622
- "group_size": 64,
623
- "bits": 8
624
- },
625
- "language_model.model.layers.4.mlp.down_proj": {
626
- "group_size": 64,
627
- "bits": 8
628
- },
629
- "language_model.model.layers.4.mlp.up_proj": {
630
- "group_size": 64,
631
- "bits": 8
632
- },
633
- "language_model.model.layers.5.mlp.gate_proj": {
634
- "group_size": 64,
635
- "bits": 8
636
- },
637
- "language_model.model.layers.5.mlp.down_proj": {
638
- "group_size": 64,
639
- "bits": 8
640
- },
641
- "language_model.model.layers.5.mlp.up_proj": {
642
- "group_size": 64,
643
- "bits": 8
644
- },
645
- "language_model.model.layers.6.mlp.gate_proj": {
646
- "group_size": 64,
647
- "bits": 8
648
- },
649
- "language_model.model.layers.6.mlp.down_proj": {
650
- "group_size": 64,
651
- "bits": 8
652
- },
653
- "language_model.model.layers.6.mlp.up_proj": {
654
- "group_size": 64,
655
- "bits": 8
656
- },
657
- "language_model.model.layers.7.mlp.gate_proj": {
658
- "group_size": 64,
659
- "bits": 8
660
- },
661
- "language_model.model.layers.7.mlp.down_proj": {
662
- "group_size": 64,
663
- "bits": 8
664
- },
665
- "language_model.model.layers.7.mlp.up_proj": {
666
- "group_size": 64,
667
- "bits": 8
668
- },
669
- "language_model.model.layers.8.mlp.gate_proj": {
670
- "group_size": 64,
671
- "bits": 8
672
- },
673
- "language_model.model.layers.8.mlp.down_proj": {
674
- "group_size": 64,
675
- "bits": 8
676
- },
677
- "language_model.model.layers.8.mlp.up_proj": {
678
- "group_size": 64,
679
- "bits": 8
680
- },
681
- "language_model.model.layers.9.mlp.gate_proj": {
682
- "group_size": 64,
683
- "bits": 8
684
- },
685
- "language_model.model.layers.9.mlp.down_proj": {
686
- "group_size": 64,
687
- "bits": 8
688
- },
689
- "language_model.model.layers.9.mlp.up_proj": {
690
- "group_size": 64,
691
- "bits": 8
692
- },
693
- "language_model.model.layers.10.mlp.gate_proj": {
694
- "group_size": 64,
695
- "bits": 8
696
- },
697
- "language_model.model.layers.10.mlp.down_proj": {
698
- "group_size": 64,
699
- "bits": 8
700
- },
701
- "language_model.model.layers.10.mlp.up_proj": {
702
- "group_size": 64,
703
- "bits": 8
704
- },
705
- "language_model.model.layers.11.mlp.gate_proj": {
706
- "group_size": 64,
707
- "bits": 8
708
- },
709
- "language_model.model.layers.11.mlp.down_proj": {
710
- "group_size": 64,
711
- "bits": 8
712
- },
713
- "language_model.model.layers.11.mlp.up_proj": {
714
- "group_size": 64,
715
- "bits": 8
716
- },
717
- "language_model.model.layers.12.mlp.gate_proj": {
718
- "group_size": 64,
719
- "bits": 8
720
- },
721
- "language_model.model.layers.12.mlp.down_proj": {
722
- "group_size": 64,
723
- "bits": 8
724
- },
725
- "language_model.model.layers.12.mlp.up_proj": {
726
- "group_size": 64,
727
- "bits": 8
728
- },
729
- "language_model.model.layers.13.mlp.gate_proj": {
730
- "group_size": 64,
731
- "bits": 8
732
- },
733
- "language_model.model.layers.13.mlp.down_proj": {
734
- "group_size": 64,
735
- "bits": 8
736
- },
737
- "language_model.model.layers.13.mlp.up_proj": {
738
- "group_size": 64,
739
- "bits": 8
740
- },
741
- "language_model.model.layers.14.mlp.gate_proj": {
742
- "group_size": 64,
743
- "bits": 8
744
- },
745
- "language_model.model.layers.14.mlp.down_proj": {
746
- "group_size": 64,
747
- "bits": 8
748
- },
749
- "language_model.model.layers.14.mlp.up_proj": {
750
- "group_size": 64,
751
- "bits": 8
752
- },
753
- "language_model.model.layers.15.mlp.gate_proj": {
754
- "group_size": 64,
755
- "bits": 8
756
- },
757
- "language_model.model.layers.15.mlp.down_proj": {
758
- "group_size": 64,
759
- "bits": 8
760
- },
761
- "language_model.model.layers.15.mlp.up_proj": {
762
- "group_size": 64,
763
- "bits": 8
764
- },
765
- "language_model.model.layers.16.mlp.gate_proj": {
766
- "group_size": 64,
767
- "bits": 8
768
- },
769
- "language_model.model.layers.16.mlp.down_proj": {
770
- "group_size": 64,
771
- "bits": 8
772
- },
773
- "language_model.model.layers.16.mlp.up_proj": {
774
- "group_size": 64,
775
- "bits": 8
776
- },
777
- "language_model.model.layers.17.mlp.gate_proj": {
778
- "group_size": 64,
779
- "bits": 8
780
- },
781
- "language_model.model.layers.17.mlp.down_proj": {
782
- "group_size": 64,
783
- "bits": 8
784
- },
785
- "language_model.model.layers.17.mlp.up_proj": {
786
- "group_size": 64,
787
- "bits": 8
788
- },
789
- "language_model.model.layers.18.mlp.gate_proj": {
790
- "group_size": 64,
791
- "bits": 8
792
- },
793
- "language_model.model.layers.18.mlp.down_proj": {
794
- "group_size": 64,
795
- "bits": 8
796
- },
797
- "language_model.model.layers.18.mlp.up_proj": {
798
- "group_size": 64,
799
- "bits": 8
800
- },
801
- "language_model.model.layers.19.mlp.gate_proj": {
802
- "group_size": 64,
803
- "bits": 8
804
- },
805
- "language_model.model.layers.19.mlp.down_proj": {
806
- "group_size": 64,
807
- "bits": 8
808
- },
809
- "language_model.model.layers.19.mlp.up_proj": {
810
- "group_size": 64,
811
- "bits": 8
812
- },
813
- "language_model.model.layers.20.mlp.gate_proj": {
814
- "group_size": 64,
815
- "bits": 8
816
- },
817
- "language_model.model.layers.20.mlp.down_proj": {
818
- "group_size": 64,
819
- "bits": 8
820
- },
821
- "language_model.model.layers.20.mlp.up_proj": {
822
- "group_size": 64,
823
- "bits": 8
824
- },
825
- "language_model.model.layers.21.mlp.gate_proj": {
826
- "group_size": 64,
827
- "bits": 8
828
- },
829
- "language_model.model.layers.21.mlp.down_proj": {
830
- "group_size": 64,
831
- "bits": 8
832
- },
833
- "language_model.model.layers.21.mlp.up_proj": {
834
- "group_size": 64,
835
- "bits": 8
836
- },
837
- "language_model.model.layers.22.mlp.gate_proj": {
838
- "group_size": 64,
839
- "bits": 8
840
- },
841
- "language_model.model.layers.22.mlp.down_proj": {
842
- "group_size": 64,
843
- "bits": 8
844
- },
845
- "language_model.model.layers.22.mlp.up_proj": {
846
- "group_size": 64,
847
- "bits": 8
848
- },
849
- "language_model.model.layers.23.mlp.gate_proj": {
850
- "group_size": 64,
851
- "bits": 8
852
- },
853
- "language_model.model.layers.23.mlp.down_proj": {
854
- "group_size": 64,
855
- "bits": 8
856
- },
857
- "language_model.model.layers.23.mlp.up_proj": {
858
- "group_size": 64,
859
- "bits": 8
860
- },
861
- "language_model.model.layers.24.mlp.gate_proj": {
862
- "group_size": 64,
863
- "bits": 8
864
- },
865
- "language_model.model.layers.24.mlp.down_proj": {
866
- "group_size": 64,
867
- "bits": 8
868
- },
869
- "language_model.model.layers.24.mlp.up_proj": {
870
- "group_size": 64,
871
- "bits": 8
872
- },
873
- "language_model.model.layers.25.mlp.gate_proj": {
874
- "group_size": 64,
875
- "bits": 8
876
- },
877
- "language_model.model.layers.25.mlp.down_proj": {
878
- "group_size": 64,
879
- "bits": 8
880
- },
881
- "language_model.model.layers.25.mlp.up_proj": {
882
- "group_size": 64,
883
- "bits": 8
884
- },
885
- "language_model.model.layers.26.mlp.gate_proj": {
886
- "group_size": 64,
887
- "bits": 8
888
- },
889
- "language_model.model.layers.26.mlp.down_proj": {
890
- "group_size": 64,
891
- "bits": 8
892
- },
893
- "language_model.model.layers.26.mlp.up_proj": {
894
- "group_size": 64,
895
- "bits": 8
896
- },
897
- "language_model.model.layers.27.mlp.gate_proj": {
898
- "group_size": 64,
899
- "bits": 8
900
- },
901
- "language_model.model.layers.27.mlp.down_proj": {
902
- "group_size": 64,
903
- "bits": 8
904
- },
905
- "language_model.model.layers.27.mlp.up_proj": {
906
- "group_size": 64,
907
- "bits": 8
908
- },
909
- "language_model.model.layers.28.mlp.gate_proj": {
910
- "group_size": 64,
911
- "bits": 8
912
- },
913
- "language_model.model.layers.28.mlp.down_proj": {
914
- "group_size": 64,
915
- "bits": 8
916
- },
917
- "language_model.model.layers.28.mlp.up_proj": {
918
- "group_size": 64,
919
- "bits": 8
920
- },
921
- "language_model.model.layers.29.mlp.gate_proj": {
922
- "group_size": 64,
923
- "bits": 8
924
- },
925
- "language_model.model.layers.29.mlp.down_proj": {
926
- "group_size": 64,
927
- "bits": 8
928
- },
929
- "language_model.model.layers.29.mlp.up_proj": {
930
- "group_size": 64,
931
- "bits": 8
932
- },
933
- "language_model.model.layers.30.mlp.gate_proj": {
934
- "group_size": 64,
935
- "bits": 8
936
- },
937
- "language_model.model.layers.30.mlp.down_proj": {
938
- "group_size": 64,
939
- "bits": 8
940
- },
941
- "language_model.model.layers.30.mlp.up_proj": {
942
- "group_size": 64,
943
- "bits": 8
944
- },
945
- "language_model.model.layers.31.mlp.gate_proj": {
946
- "group_size": 64,
947
- "bits": 8
948
- },
949
- "language_model.model.layers.31.mlp.down_proj": {
950
- "group_size": 64,
951
- "bits": 8
952
- },
953
- "language_model.model.layers.31.mlp.up_proj": {
954
- "group_size": 64,
955
- "bits": 8
956
- },
957
- "language_model.model.layers.32.mlp.gate_proj": {
958
- "group_size": 64,
959
- "bits": 8
960
- },
961
- "language_model.model.layers.32.mlp.down_proj": {
962
- "group_size": 64,
963
- "bits": 8
964
- },
965
- "language_model.model.layers.32.mlp.up_proj": {
966
- "group_size": 64,
967
- "bits": 8
968
- },
969
- "language_model.model.layers.33.mlp.gate_proj": {
970
- "group_size": 64,
971
- "bits": 8
972
- },
973
- "language_model.model.layers.33.mlp.down_proj": {
974
- "group_size": 64,
975
- "bits": 8
976
- },
977
- "language_model.model.layers.33.mlp.up_proj": {
978
- "group_size": 64,
979
- "bits": 8
980
- },
981
- "language_model.model.layers.34.mlp.gate_proj": {
982
- "group_size": 64,
983
- "bits": 8
984
- },
985
- "language_model.model.layers.34.mlp.down_proj": {
986
- "group_size": 64,
987
- "bits": 8
988
- },
989
- "language_model.model.layers.34.mlp.up_proj": {
990
- "group_size": 64,
991
- "bits": 8
992
- },
993
- "language_model.model.layers.35.mlp.gate_proj": {
994
- "group_size": 64,
995
- "bits": 8
996
- },
997
- "language_model.model.layers.35.mlp.down_proj": {
998
- "group_size": 64,
999
- "bits": 8
1000
- },
1001
- "language_model.model.layers.35.mlp.up_proj": {
1002
- "group_size": 64,
1003
- "bits": 8
1004
- },
1005
- "language_model.model.layers.36.mlp.gate_proj": {
1006
- "group_size": 64,
1007
- "bits": 8
1008
- },
1009
- "language_model.model.layers.36.mlp.down_proj": {
1010
- "group_size": 64,
1011
- "bits": 8
1012
- },
1013
- "language_model.model.layers.36.mlp.up_proj": {
1014
- "group_size": 64,
1015
- "bits": 8
1016
- },
1017
- "language_model.model.layers.37.mlp.gate_proj": {
1018
- "group_size": 64,
1019
- "bits": 8
1020
- },
1021
- "language_model.model.layers.37.mlp.down_proj": {
1022
- "group_size": 64,
1023
- "bits": 8
1024
- },
1025
- "language_model.model.layers.37.mlp.up_proj": {
1026
- "group_size": 64,
1027
- "bits": 8
1028
- },
1029
- "language_model.model.layers.38.mlp.gate_proj": {
1030
- "group_size": 64,
1031
- "bits": 8
1032
- },
1033
- "language_model.model.layers.38.mlp.down_proj": {
1034
- "group_size": 64,
1035
- "bits": 8
1036
- },
1037
- "language_model.model.layers.38.mlp.up_proj": {
1038
- "group_size": 64,
1039
- "bits": 8
1040
- },
1041
- "language_model.model.layers.39.mlp.gate_proj": {
1042
- "group_size": 64,
1043
- "bits": 8
1044
- },
1045
- "language_model.model.layers.39.mlp.down_proj": {
1046
- "group_size": 64,
1047
- "bits": 8
1048
- },
1049
- "language_model.model.layers.39.mlp.up_proj": {
1050
- "group_size": 64,
1051
- "bits": 8
1052
- },
1053
- "language_model.model.layers.40.mlp.gate_proj": {
1054
- "group_size": 64,
1055
- "bits": 8
1056
- },
1057
- "language_model.model.layers.40.mlp.down_proj": {
1058
- "group_size": 64,
1059
- "bits": 8
1060
- },
1061
- "language_model.model.layers.40.mlp.up_proj": {
1062
- "group_size": 64,
1063
- "bits": 8
1064
- },
1065
- "language_model.model.layers.41.mlp.gate_proj": {
1066
- "group_size": 64,
1067
- "bits": 8
1068
- },
1069
- "language_model.model.layers.41.mlp.down_proj": {
1070
- "group_size": 64,
1071
- "bits": 8
1072
- },
1073
- "language_model.model.layers.41.mlp.up_proj": {
1074
- "group_size": 64,
1075
- "bits": 8
1076
- }
1077
  },
1078
- "text_config": {
1079
- "attention_bias": false,
1080
- "attention_dropout": 0.0,
1081
- "attention_k_eq_v": false,
1082
- "bos_token_id": 2,
1083
- "dtype": "bfloat16",
1084
- "enable_moe_block": false,
1085
- "eos_token_id": 1,
1086
- "expert_intermediate_size": null,
1087
- "final_logit_softcapping": 30.0,
1088
- "global_head_dim": 512,
1089
- "head_dim": 256,
1090
- "hidden_activation": "gelu_pytorch_tanh",
1091
- "hidden_size": 2560,
1092
- "hidden_size_per_layer_input": 256,
1093
- "initializer_range": 0.02,
1094
- "intermediate_size": 10240,
1095
- "layer_types": [
1096
- "sliding_attention",
1097
- "sliding_attention",
1098
- "sliding_attention",
1099
- "sliding_attention",
1100
- "sliding_attention",
1101
- "full_attention",
1102
- "sliding_attention",
1103
- "sliding_attention",
1104
- "sliding_attention",
1105
- "sliding_attention",
1106
- "sliding_attention",
1107
- "full_attention",
1108
- "sliding_attention",
1109
- "sliding_attention",
1110
- "sliding_attention",
1111
- "sliding_attention",
1112
- "sliding_attention",
1113
- "full_attention",
1114
- "sliding_attention",
1115
- "sliding_attention",
1116
- "sliding_attention",
1117
- "sliding_attention",
1118
- "sliding_attention",
1119
- "full_attention",
1120
- "sliding_attention",
1121
- "sliding_attention",
1122
- "sliding_attention",
1123
- "sliding_attention",
1124
- "sliding_attention",
1125
- "full_attention",
1126
- "sliding_attention",
1127
- "sliding_attention",
1128
- "sliding_attention",
1129
- "sliding_attention",
1130
- "sliding_attention",
1131
- "full_attention",
1132
- "sliding_attention",
1133
- "sliding_attention",
1134
- "sliding_attention",
1135
- "sliding_attention",
1136
- "sliding_attention",
1137
- "full_attention"
1138
- ],
1139
- "max_position_embeddings": 131072,
1140
- "model_type": "gemma4_text",
1141
- "num_attention_heads": 8,
1142
- "num_experts": null,
1143
- "num_global_key_value_heads": null,
1144
- "num_hidden_layers": 42,
1145
- "num_key_value_heads": 2,
1146
- "num_kv_shared_layers": 18,
1147
- "pad_token_id": 0,
1148
- "rms_norm_eps": 1e-06,
1149
- "rope_parameters": {
1150
- "full_attention": {
1151
- "partial_rotary_factor": 0.25,
1152
- "rope_theta": 1000000.0,
1153
- "rope_type": "proportional"
1154
- },
1155
- "sliding_attention": {
1156
- "rope_theta": 10000.0,
1157
- "rope_type": "default"
1158
- }
1159
- },
1160
- "sliding_window": 512,
1161
- "tie_word_embeddings": true,
1162
- "top_k_experts": null,
1163
- "use_bidirectional_attention": null,
1164
- "use_cache": true,
1165
- "use_double_wide_mlp": false,
1166
- "vocab_size": 262144,
1167
- "vocab_size_per_layer_input": 262144
1168
  },
1169
- "tie_word_embeddings": true,
1170
- "transformers_version": "5.5.0.dev0",
1171
- "video_token_id": 258884,
1172
- "vision_config": {
1173
- "_name_or_path": "",
1174
- "architectures": null,
1175
- "attention_bias": false,
1176
- "attention_dropout": 0.0,
1177
- "chunk_size_feed_forward": 0,
1178
- "default_output_length": 280,
1179
- "dtype": "bfloat16",
1180
- "global_head_dim": 64,
1181
- "head_dim": 64,
1182
- "hidden_activation": "gelu_pytorch_tanh",
1183
- "hidden_size": 768,
1184
- "id2label": {
1185
- "0": "LABEL_0",
1186
- "1": "LABEL_1"
1187
- },
1188
- "initializer_range": 0.02,
1189
- "intermediate_size": 3072,
1190
- "is_encoder_decoder": false,
1191
- "label2id": {
1192
- "LABEL_0": 0,
1193
- "LABEL_1": 1
1194
- },
1195
- "max_position_embeddings": 131072,
1196
- "model_type": "gemma4_vision",
1197
- "num_attention_heads": 12,
1198
- "num_hidden_layers": 16,
1199
- "num_key_value_heads": 12,
1200
- "output_attentions": false,
1201
- "output_hidden_states": false,
1202
- "patch_size": 16,
1203
- "pooling_kernel_size": 3,
1204
- "position_embedding_size": 10240,
1205
- "problem_type": null,
1206
- "return_dict": true,
1207
- "rms_norm_eps": 1e-06,
1208
- "rope_parameters": {
1209
- "rope_theta": 100.0,
1210
- "rope_type": "default"
1211
- },
1212
- "standardize": false,
1213
- "use_clipped_linears": true
1214
  },
1215
- "vision_soft_tokens_per_image": 280
1216
- }
 
 
 
 
1
  {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": {
6
+ "_name_or_path": "",
7
+ "architectures": null,
8
+ "attention_chunk_size": 12,
9
+ "attention_context_left": 13,
10
+ "attention_context_right": 0,
11
+ "attention_invalid_logits_value": -1000000000.0,
12
+ "attention_logit_cap": 50.0,
13
+ "chunk_size_feed_forward": 0,
14
+ "conv_kernel_size": 5,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "dtype": "bfloat16",
16
+ "gradient_clipping": 10000000000.0,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 1024,
19
+ "id2label": {
20
+ "0": "LABEL_0",
21
+ "1": "LABEL_1"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "is_encoder_decoder": false,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1
28
+ },
29
+ "model_type": "gemma4_audio",
30
+ "num_attention_heads": 8,
31
+ "num_hidden_layers": 12,
32
+ "output_attentions": false,
33
+ "output_hidden_states": false,
34
+ "output_proj_dims": 1536,
35
+ "problem_type": null,
36
+ "residual_weight": 0.5,
37
+ "return_dict": true,
38
+ "rms_norm_eps": 1e-06,
39
+ "subsampling_conv_channels": [
40
+ 128,
41
+ 32
42
  ],
43
+ "use_clipped_linears": true
44
+ },
45
+ "audio_token_id": 258881,
46
+ "boa_token_id": 256000,
47
+ "boi_token_id": 255999,
48
+ "dtype": "bfloat16",
49
+ "eoa_token_id": 258883,
50
+ "eoa_token_index": 258883,
51
+ "eoi_token_id": 258882,
52
+ "eos_token_id": [
53
+ 1,
54
+ 106
55
+ ],
56
+ "image_token_id": 258880,
57
+ "initializer_range": 0.02,
58
+ "model_type": "gemma4",
59
+ "text_config": {
60
+ "attention_bias": false,
61
+ "attention_dropout": 0.0,
62
+ "attention_k_eq_v": false,
63
+ "bos_token_id": 2,
64
+ "dtype": "bfloat16",
65
+ "enable_moe_block": false,
66
+ "eos_token_id": 1,
67
+ "expert_intermediate_size": null,
68
+ "final_logit_softcapping": 30.0,
69
+ "global_head_dim": 512,
70
+ "head_dim": 256,
71
+ "hidden_activation": "gelu_pytorch_tanh",
72
+ "hidden_size": 2560,
73
+ "hidden_size_per_layer_input": 256,
74
  "initializer_range": 0.02,
75
+ "intermediate_size": 10240,
76
+ "layer_types": [
77
+ "sliding_attention",
78
+ "sliding_attention",
79
+ "sliding_attention",
80
+ "sliding_attention",
81
+ "sliding_attention",
82
+ "full_attention",
83
+ "sliding_attention",
84
+ "sliding_attention",
85
+ "sliding_attention",
86
+ "sliding_attention",
87
+ "sliding_attention",
88
+ "full_attention",
89
+ "sliding_attention",
90
+ "sliding_attention",
91
+ "sliding_attention",
92
+ "sliding_attention",
93
+ "sliding_attention",
94
+ "full_attention",
95
+ "sliding_attention",
96
+ "sliding_attention",
97
+ "sliding_attention",
98
+ "sliding_attention",
99
+ "sliding_attention",
100
+ "full_attention",
101
+ "sliding_attention",
102
+ "sliding_attention",
103
+ "sliding_attention",
104
+ "sliding_attention",
105
+ "sliding_attention",
106
+ "full_attention",
107
+ "sliding_attention",
108
+ "sliding_attention",
109
+ "sliding_attention",
110
+ "sliding_attention",
111
+ "sliding_attention",
112
+ "full_attention",
113
+ "sliding_attention",
114
+ "sliding_attention",
115
+ "sliding_attention",
116
+ "sliding_attention",
117
+ "sliding_attention",
118
+ "full_attention"
119
+ ],
120
+ "max_position_embeddings": 131072,
121
+ "model_type": "gemma4_text",
122
+ "num_attention_heads": 8,
123
+ "num_experts": null,
124
+ "num_global_key_value_heads": null,
125
+ "num_hidden_layers": 42,
126
+ "num_key_value_heads": 2,
127
+ "num_kv_shared_layers": 18,
128
+ "pad_token_id": 0,
129
+ "rms_norm_eps": 1e-06,
130
+ "rope_parameters": {
131
+ "full_attention": {
132
+ "partial_rotary_factor": 0.25,
133
+ "rope_theta": 1000000.0,
134
+ "rope_type": "proportional"
135
+ },
136
+ "sliding_attention": {
137
+ "rope_theta": 10000.0,
138
+ "rope_type": "default"
139
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  },
141
+ "sliding_window": 512,
142
+ "tie_word_embeddings": true,
143
+ "top_k_experts": null,
144
+ "use_bidirectional_attention": null,
145
+ "use_cache": true,
146
+ "use_double_wide_mlp": false,
147
+ "vocab_size": 262144,
148
+ "vocab_size_per_layer_input": 262144
149
+ },
150
+ "tie_word_embeddings": true,
151
+ "transformers_version": "5.5.0.dev0",
152
+ "video_token_id": 258884,
153
+ "vision_config": {
154
+ "_name_or_path": "",
155
+ "architectures": null,
156
+ "attention_bias": false,
157
+ "attention_dropout": 0.0,
158
+ "chunk_size_feed_forward": 0,
159
+ "default_output_length": 280,
160
+ "dtype": "bfloat16",
161
+ "global_head_dim": 64,
162
+ "head_dim": 64,
163
+ "hidden_activation": "gelu_pytorch_tanh",
164
+ "hidden_size": 768,
165
+ "id2label": {
166
+ "0": "LABEL_0",
167
+ "1": "LABEL_1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  },
169
+ "initializer_range": 0.02,
170
+ "intermediate_size": 3072,
171
+ "is_encoder_decoder": false,
172
+ "label2id": {
173
+ "LABEL_0": 0,
174
+ "LABEL_1": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  },
176
+ "max_position_embeddings": 131072,
177
+ "model_type": "gemma4_vision",
178
+ "num_attention_heads": 12,
179
+ "num_hidden_layers": 16,
180
+ "num_key_value_heads": 12,
181
+ "output_attentions": false,
182
+ "output_hidden_states": false,
183
+ "patch_size": 16,
184
+ "pooling_kernel_size": 3,
185
+ "position_embedding_size": 10240,
186
+ "problem_type": null,
187
+ "return_dict": true,
188
+ "rms_norm_eps": 1e-06,
189
+ "rope_parameters": {
190
+ "rope_theta": 100.0,
191
+ "rope_type": "default"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  },
193
+ "standardize": false,
194
+ "use_clipped_linears": true
195
+ },
196
+ "vision_soft_tokens_per_image": 280
197
+ }