3v324v23 commited on
Commit
b16a95e
·
1 Parent(s): b41d1de

fix vision

Browse files
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "architectures": [
3
- "Qwen3_5ForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
@@ -57,7 +57,7 @@
57
  "max_position_embeddings": 262144,
58
  "mlp_only_layers": [],
59
  "model_name": "./zora-dpo-v2-merged",
60
- "model_type": "qwen3_5_text",
61
  "mtp_num_hidden_layers": 1,
62
  "mtp_use_dedicated_embeddings": false,
63
  "num_attention_heads": 16,
@@ -82,4 +82,4 @@
82
  "unsloth_version": "2026.3.4",
83
  "use_cache": true,
84
  "vocab_size": 248320
85
- }
 
1
  {
2
  "architectures": [
3
+ "Qwen3_5ForConditionalGeneration"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
57
  "max_position_embeddings": 262144,
58
  "mlp_only_layers": [],
59
  "model_name": "./zora-dpo-v2-merged",
60
+ "model_type": "qwen3_5",
61
  "mtp_num_hidden_layers": 1,
62
  "mtp_use_dedicated_embeddings": false,
63
  "num_attention_heads": 16,
 
82
  "unsloth_version": "2026.3.4",
83
  "use_cache": true,
84
  "vocab_size": 248320
85
+ }
model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21cbc389b5f199f015ec197b0271831c4bf3f04d8f9cba1a62434f8540ac94de
3
+ size 1398640840
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 8953803264,
4
- "total_size": 17907606528
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00001-of-00005.safetensors",
@@ -430,6 +430,354 @@
430
  "model.language_model.layers.9.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
431
  "model.language_model.layers.9.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
432
  "model.language_model.layers.9.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
433
- "model.language_model.norm.weight": "model-00005-of-00005.safetensors"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  }
435
- }
 
1
  {
2
  "metadata": {
3
  "total_parameters": 8953803264,
4
+ "total_size": 19306208736
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00001-of-00005.safetensors",
 
430
  "model.language_model.layers.9.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
431
  "model.language_model.layers.9.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
432
  "model.language_model.layers.9.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
433
+ "model.language_model.norm.weight": "model-00005-of-00005.safetensors",
434
+ "model.visual.blocks.4.norm1.bias": "model-00006-of-00006.safetensors",
435
+ "model.visual.blocks.4.norm2.bias": "model-00006-of-00006.safetensors",
436
+ "model.visual.blocks.6.norm2.weight": "model-00006-of-00006.safetensors",
437
+ "model.visual.blocks.5.norm2.weight": "model-00006-of-00006.safetensors",
438
+ "model.visual.blocks.20.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
439
+ "model.visual.blocks.24.norm1.bias": "model-00006-of-00006.safetensors",
440
+ "model.visual.blocks.23.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
441
+ "model.visual.blocks.18.attn.proj.bias": "model-00006-of-00006.safetensors",
442
+ "model.visual.blocks.1.norm1.bias": "model-00006-of-00006.safetensors",
443
+ "model.visual.blocks.15.norm2.bias": "model-00006-of-00006.safetensors",
444
+ "model.visual.blocks.7.attn.qkv.bias": "model-00006-of-00006.safetensors",
445
+ "model.visual.blocks.26.attn.qkv.weight": "model-00006-of-00006.safetensors",
446
+ "model.visual.blocks.0.norm1.bias": "model-00006-of-00006.safetensors",
447
+ "model.visual.blocks.17.attn.proj.bias": "model-00006-of-00006.safetensors",
448
+ "model.visual.blocks.11.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
449
+ "model.visual.blocks.20.attn.proj.bias": "model-00006-of-00006.safetensors",
450
+ "model.visual.blocks.0.attn.qkv.weight": "model-00006-of-00006.safetensors",
451
+ "model.visual.blocks.0.norm1.weight": "model-00006-of-00006.safetensors",
452
+ "model.visual.blocks.11.norm1.bias": "model-00006-of-00006.safetensors",
453
+ "model.visual.blocks.11.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
454
+ "model.visual.blocks.6.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
455
+ "model.visual.blocks.14.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
456
+ "model.visual.blocks.24.attn.qkv.bias": "model-00006-of-00006.safetensors",
457
+ "model.visual.blocks.16.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
458
+ "model.visual.blocks.12.attn.proj.bias": "model-00006-of-00006.safetensors",
459
+ "model.visual.blocks.20.attn.proj.weight": "model-00006-of-00006.safetensors",
460
+ "model.visual.blocks.20.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
461
+ "mtp.layers.0.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
462
+ "model.visual.blocks.0.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
463
+ "model.visual.blocks.3.attn.qkv.weight": "model-00006-of-00006.safetensors",
464
+ "model.visual.blocks.25.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
465
+ "model.visual.blocks.2.norm1.weight": "model-00006-of-00006.safetensors",
466
+ "model.visual.blocks.5.attn.proj.weight": "model-00006-of-00006.safetensors",
467
+ "model.visual.blocks.7.norm2.weight": "model-00006-of-00006.safetensors",
468
+ "mtp.layers.0.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
469
+ "model.visual.blocks.24.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
470
+ "model.visual.blocks.12.attn.qkv.weight": "model-00006-of-00006.safetensors",
471
+ "model.visual.blocks.2.norm1.bias": "model-00006-of-00006.safetensors",
472
+ "model.visual.blocks.4.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
473
+ "model.visual.blocks.24.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
474
+ "model.visual.blocks.20.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
475
+ "model.visual.blocks.15.norm1.weight": "model-00006-of-00006.safetensors",
476
+ "model.visual.blocks.9.attn.proj.bias": "model-00006-of-00006.safetensors",
477
+ "model.visual.blocks.24.attn.qkv.weight": "model-00006-of-00006.safetensors",
478
+ "model.visual.blocks.16.attn.qkv.bias": "model-00006-of-00006.safetensors",
479
+ "model.visual.blocks.13.attn.qkv.bias": "model-00006-of-00006.safetensors",
480
+ "model.visual.merger.linear_fc1.weight": "model-00006-of-00006.safetensors",
481
+ "mtp.norm.weight": "model-00006-of-00006.safetensors",
482
+ "model.visual.blocks.2.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
483
+ "model.visual.blocks.15.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
484
+ "model.visual.blocks.1.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
485
+ "model.visual.blocks.25.attn.qkv.weight": "model-00006-of-00006.safetensors",
486
+ "model.visual.blocks.12.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
487
+ "model.visual.blocks.9.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
488
+ "model.visual.blocks.13.attn.proj.bias": "model-00006-of-00006.safetensors",
489
+ "model.visual.blocks.22.norm2.bias": "model-00006-of-00006.safetensors",
490
+ "model.visual.blocks.7.norm1.weight": "model-00006-of-00006.safetensors",
491
+ "model.visual.blocks.0.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
492
+ "mtp.layers.0.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
493
+ "model.visual.blocks.11.attn.proj.weight": "model-00006-of-00006.safetensors",
494
+ "mtp.layers.0.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
495
+ "model.visual.blocks.4.attn.proj.bias": "model-00006-of-00006.safetensors",
496
+ "model.visual.blocks.25.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
497
+ "model.visual.blocks.18.norm2.bias": "model-00006-of-00006.safetensors",
498
+ "model.visual.blocks.13.norm2.bias": "model-00006-of-00006.safetensors",
499
+ "model.visual.blocks.4.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
500
+ "model.visual.blocks.7.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
501
+ "model.visual.blocks.24.norm2.bias": "model-00006-of-00006.safetensors",
502
+ "model.visual.blocks.14.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
503
+ "model.visual.blocks.2.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
504
+ "model.visual.blocks.21.norm1.weight": "model-00006-of-00006.safetensors",
505
+ "model.visual.blocks.17.norm1.weight": "model-00006-of-00006.safetensors",
506
+ "model.visual.blocks.15.norm2.weight": "model-00006-of-00006.safetensors",
507
+ "mtp.layers.0.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
508
+ "model.visual.blocks.19.attn.qkv.bias": "model-00006-of-00006.safetensors",
509
+ "model.visual.blocks.23.norm1.bias": "model-00006-of-00006.safetensors",
510
+ "model.visual.blocks.17.attn.qkv.weight": "model-00006-of-00006.safetensors",
511
+ "model.visual.blocks.0.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
512
+ "model.visual.blocks.15.attn.proj.bias": "model-00006-of-00006.safetensors",
513
+ "model.visual.blocks.23.attn.proj.bias": "model-00006-of-00006.safetensors",
514
+ "model.visual.blocks.13.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
515
+ "model.visual.blocks.12.norm2.bias": "model-00006-of-00006.safetensors",
516
+ "model.visual.blocks.15.norm1.bias": "model-00006-of-00006.safetensors",
517
+ "model.visual.blocks.4.attn.qkv.weight": "model-00006-of-00006.safetensors",
518
+ "model.visual.blocks.14.attn.qkv.weight": "model-00006-of-00006.safetensors",
519
+ "model.visual.blocks.26.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
520
+ "model.visual.blocks.16.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
521
+ "model.visual.blocks.5.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
522
+ "model.visual.blocks.26.norm2.bias": "model-00006-of-00006.safetensors",
523
+ "model.visual.blocks.3.norm2.weight": "model-00006-of-00006.safetensors",
524
+ "model.visual.blocks.14.norm1.bias": "model-00006-of-00006.safetensors",
525
+ "model.visual.blocks.5.attn.qkv.weight": "model-00006-of-00006.safetensors",
526
+ "model.visual.blocks.10.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
527
+ "model.visual.blocks.20.norm2.bias": "model-00006-of-00006.safetensors",
528
+ "model.visual.blocks.9.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
529
+ "model.visual.blocks.22.norm2.weight": "model-00006-of-00006.safetensors",
530
+ "model.visual.blocks.13.attn.qkv.weight": "model-00006-of-00006.safetensors",
531
+ "model.visual.blocks.9.attn.qkv.bias": "model-00006-of-00006.safetensors",
532
+ "model.visual.blocks.9.attn.proj.weight": "model-00006-of-00006.safetensors",
533
+ "model.visual.blocks.1.attn.proj.weight": "model-00006-of-00006.safetensors",
534
+ "model.visual.blocks.8.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
535
+ "model.visual.blocks.12.norm1.weight": "model-00006-of-00006.safetensors",
536
+ "model.visual.blocks.18.norm1.weight": "model-00006-of-00006.safetensors",
537
+ "model.visual.blocks.7.attn.proj.weight": "model-00006-of-00006.safetensors",
538
+ "model.visual.blocks.22.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
539
+ "model.visual.blocks.23.attn.qkv.bias": "model-00006-of-00006.safetensors",
540
+ "model.visual.blocks.8.attn.proj.bias": "model-00006-of-00006.safetensors",
541
+ "model.visual.blocks.8.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
542
+ "model.visual.blocks.13.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
543
+ "model.visual.blocks.2.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
544
+ "model.visual.blocks.17.norm2.bias": "model-00006-of-00006.safetensors",
545
+ "model.visual.blocks.19.attn.proj.weight": "model-00006-of-00006.safetensors",
546
+ "model.visual.blocks.5.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
547
+ "model.visual.blocks.18.attn.qkv.bias": "model-00006-of-00006.safetensors",
548
+ "model.visual.blocks.9.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
549
+ "model.visual.blocks.10.attn.proj.weight": "model-00006-of-00006.safetensors",
550
+ "model.visual.blocks.9.norm2.weight": "model-00006-of-00006.safetensors",
551
+ "model.visual.blocks.26.norm1.weight": "model-00006-of-00006.safetensors",
552
+ "model.visual.blocks.26.norm1.bias": "model-00006-of-00006.safetensors",
553
+ "model.visual.blocks.20.norm1.weight": "model-00006-of-00006.safetensors",
554
+ "model.visual.blocks.8.norm2.weight": "model-00006-of-00006.safetensors",
555
+ "model.visual.blocks.1.attn.proj.bias": "model-00006-of-00006.safetensors",
556
+ "mtp.layers.0.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
557
+ "model.visual.blocks.9.norm1.bias": "model-00006-of-00006.safetensors",
558
+ "model.visual.blocks.14.attn.qkv.bias": "model-00006-of-00006.safetensors",
559
+ "model.visual.blocks.17.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
560
+ "model.visual.blocks.10.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
561
+ "model.visual.blocks.17.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
562
+ "model.visual.blocks.10.attn.qkv.bias": "model-00006-of-00006.safetensors",
563
+ "model.visual.blocks.16.norm2.bias": "model-00006-of-00006.safetensors",
564
+ "model.visual.blocks.12.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
565
+ "model.visual.blocks.0.attn.proj.weight": "model-00006-of-00006.safetensors",
566
+ "model.visual.blocks.8.attn.qkv.bias": "model-00006-of-00006.safetensors",
567
+ "model.visual.blocks.26.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
568
+ "model.visual.blocks.12.attn.proj.weight": "model-00006-of-00006.safetensors",
569
+ "model.visual.blocks.24.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
570
+ "model.visual.merger.linear_fc2.bias": "model-00006-of-00006.safetensors",
571
+ "model.visual.pos_embed.weight": "model-00006-of-00006.safetensors",
572
+ "model.visual.blocks.11.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
573
+ "model.visual.blocks.15.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
574
+ "model.visual.blocks.7.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
575
+ "model.visual.blocks.3.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
576
+ "model.visual.blocks.10.attn.proj.bias": "model-00006-of-00006.safetensors",
577
+ "model.visual.blocks.16.attn.proj.bias": "model-00006-of-00006.safetensors",
578
+ "model.visual.blocks.19.norm1.bias": "model-00006-of-00006.safetensors",
579
+ "model.visual.blocks.22.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
580
+ "model.visual.blocks.23.attn.qkv.weight": "model-00006-of-00006.safetensors",
581
+ "model.visual.blocks.18.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
582
+ "model.visual.blocks.22.attn.qkv.bias": "model-00006-of-00006.safetensors",
583
+ "model.visual.blocks.26.attn.qkv.bias": "model-00006-of-00006.safetensors",
584
+ "model.visual.blocks.23.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
585
+ "model.visual.blocks.0.attn.qkv.bias": "model-00006-of-00006.safetensors",
586
+ "model.visual.blocks.0.norm2.weight": "model-00006-of-00006.safetensors",
587
+ "model.visual.blocks.14.attn.proj.weight": "model-00006-of-00006.safetensors",
588
+ "model.visual.blocks.2.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
589
+ "model.visual.blocks.2.norm2.weight": "model-00006-of-00006.safetensors",
590
+ "model.visual.blocks.22.attn.qkv.weight": "model-00006-of-00006.safetensors",
591
+ "model.visual.blocks.4.attn.proj.weight": "model-00006-of-00006.safetensors",
592
+ "model.visual.blocks.3.attn.qkv.bias": "model-00006-of-00006.safetensors",
593
+ "model.visual.blocks.22.attn.proj.weight": "model-00006-of-00006.safetensors",
594
+ "model.visual.blocks.20.attn.qkv.weight": "model-00006-of-00006.safetensors",
595
+ "model.visual.blocks.15.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
596
+ "model.visual.blocks.4.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
597
+ "model.visual.blocks.5.norm1.weight": "model-00006-of-00006.safetensors",
598
+ "mtp.fc.weight": "model-00006-of-00006.safetensors",
599
+ "model.visual.blocks.2.attn.proj.weight": "model-00006-of-00006.safetensors",
600
+ "model.visual.blocks.15.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
601
+ "model.visual.blocks.8.attn.qkv.weight": "model-00006-of-00006.safetensors",
602
+ "model.visual.blocks.21.attn.proj.bias": "model-00006-of-00006.safetensors",
603
+ "model.visual.merger.norm.weight": "model-00006-of-00006.safetensors",
604
+ "model.visual.blocks.22.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
605
+ "model.visual.blocks.21.norm1.bias": "model-00006-of-00006.safetensors",
606
+ "model.visual.blocks.13.norm1.weight": "model-00006-of-00006.safetensors",
607
+ "model.visual.blocks.18.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
608
+ "model.visual.blocks.3.norm2.bias": "model-00006-of-00006.safetensors",
609
+ "model.visual.blocks.17.norm1.bias": "model-00006-of-00006.safetensors",
610
+ "model.visual.blocks.11.attn.proj.bias": "model-00006-of-00006.safetensors",
611
+ "model.visual.blocks.19.attn.qkv.weight": "model-00006-of-00006.safetensors",
612
+ "model.visual.blocks.4.norm1.weight": "model-00006-of-00006.safetensors",
613
+ "model.visual.blocks.26.norm2.weight": "model-00006-of-00006.safetensors",
614
+ "model.visual.blocks.20.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
615
+ "model.visual.blocks.7.norm2.bias": "model-00006-of-00006.safetensors",
616
+ "model.visual.blocks.14.attn.proj.bias": "model-00006-of-00006.safetensors",
617
+ "model.visual.blocks.11.norm2.weight": "model-00006-of-00006.safetensors",
618
+ "model.visual.blocks.19.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
619
+ "model.visual.blocks.6.attn.proj.weight": "model-00006-of-00006.safetensors",
620
+ "model.visual.blocks.22.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
621
+ "model.visual.blocks.26.attn.proj.weight": "model-00006-of-00006.safetensors",
622
+ "model.visual.blocks.17.attn.proj.weight": "model-00006-of-00006.safetensors",
623
+ "model.visual.blocks.26.attn.proj.bias": "model-00006-of-00006.safetensors",
624
+ "model.visual.blocks.19.norm2.bias": "model-00006-of-00006.safetensors",
625
+ "model.visual.blocks.21.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
626
+ "model.visual.blocks.19.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
627
+ "model.visual.blocks.13.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
628
+ "model.visual.blocks.5.norm2.bias": "model-00006-of-00006.safetensors",
629
+ "model.visual.blocks.16.norm2.weight": "model-00006-of-00006.safetensors",
630
+ "model.visual.blocks.21.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
631
+ "model.visual.blocks.16.attn.proj.weight": "model-00006-of-00006.safetensors",
632
+ "model.visual.blocks.1.norm2.weight": "model-00006-of-00006.safetensors",
633
+ "model.visual.blocks.6.attn.qkv.weight": "model-00006-of-00006.safetensors",
634
+ "model.visual.blocks.16.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
635
+ "model.visual.blocks.18.norm1.bias": "model-00006-of-00006.safetensors",
636
+ "model.visual.blocks.6.attn.proj.bias": "model-00006-of-00006.safetensors",
637
+ "model.visual.blocks.5.attn.qkv.bias": "model-00006-of-00006.safetensors",
638
+ "model.visual.blocks.17.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
639
+ "mtp.pre_fc_norm_embedding.weight": "model-00006-of-00006.safetensors",
640
+ "model.visual.blocks.0.attn.proj.bias": "model-00006-of-00006.safetensors",
641
+ "model.visual.blocks.1.attn.qkv.bias": "model-00006-of-00006.safetensors",
642
+ "model.visual.blocks.21.norm2.bias": "model-00006-of-00006.safetensors",
643
+ "model.visual.blocks.23.attn.proj.weight": "model-00006-of-00006.safetensors",
644
+ "model.visual.blocks.9.norm2.bias": "model-00006-of-00006.safetensors",
645
+ "model.visual.blocks.10.norm2.bias": "model-00006-of-00006.safetensors",
646
+ "model.visual.blocks.1.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
647
+ "model.visual.blocks.22.norm1.weight": "model-00006-of-00006.safetensors",
648
+ "model.visual.blocks.11.attn.qkv.bias": "model-00006-of-00006.safetensors",
649
+ "model.visual.blocks.20.norm1.bias": "model-00006-of-00006.safetensors",
650
+ "model.visual.blocks.18.attn.qkv.weight": "model-00006-of-00006.safetensors",
651
+ "model.visual.blocks.3.norm1.bias": "model-00006-of-00006.safetensors",
652
+ "model.visual.merger.norm.bias": "model-00006-of-00006.safetensors",
653
+ "model.visual.blocks.19.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
654
+ "model.visual.blocks.10.norm1.bias": "model-00006-of-00006.safetensors",
655
+ "model.visual.patch_embed.proj.weight": "model-00006-of-00006.safetensors",
656
+ "model.visual.blocks.17.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
657
+ "model.visual.blocks.16.attn.qkv.weight": "model-00006-of-00006.safetensors",
658
+ "model.visual.blocks.21.attn.proj.weight": "model-00006-of-00006.safetensors",
659
+ "model.visual.blocks.6.norm1.bias": "model-00006-of-00006.safetensors",
660
+ "model.visual.blocks.8.norm2.bias": "model-00006-of-00006.safetensors",
661
+ "model.visual.blocks.2.norm2.bias": "model-00006-of-00006.safetensors",
662
+ "model.visual.blocks.5.norm1.bias": "model-00006-of-00006.safetensors",
663
+ "model.visual.blocks.8.attn.proj.weight": "model-00006-of-00006.safetensors",
664
+ "model.visual.blocks.25.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
665
+ "model.visual.blocks.23.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
666
+ "model.visual.blocks.22.attn.proj.bias": "model-00006-of-00006.safetensors",
667
+ "model.visual.blocks.26.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
668
+ "model.visual.blocks.9.attn.qkv.weight": "model-00006-of-00006.safetensors",
669
+ "model.visual.blocks.7.attn.qkv.weight": "model-00006-of-00006.safetensors",
670
+ "model.visual.blocks.22.norm1.bias": "model-00006-of-00006.safetensors",
671
+ "mtp.pre_fc_norm_hidden.weight": "model-00006-of-00006.safetensors",
672
+ "model.visual.blocks.8.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
673
+ "model.visual.blocks.10.attn.qkv.weight": "model-00006-of-00006.safetensors",
674
+ "model.visual.blocks.11.norm2.bias": "model-00006-of-00006.safetensors",
675
+ "mtp.layers.0.input_layernorm.weight": "model-00006-of-00006.safetensors",
676
+ "model.visual.blocks.24.attn.proj.weight": "model-00006-of-00006.safetensors",
677
+ "model.visual.blocks.25.norm1.weight": "model-00006-of-00006.safetensors",
678
+ "model.visual.blocks.9.norm1.weight": "model-00006-of-00006.safetensors",
679
+ "mtp.layers.0.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
680
+ "model.visual.blocks.8.norm1.weight": "model-00006-of-00006.safetensors",
681
+ "model.visual.blocks.14.norm2.bias": "model-00006-of-00006.safetensors",
682
+ "model.visual.blocks.8.norm1.bias": "model-00006-of-00006.safetensors",
683
+ "model.visual.blocks.14.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
684
+ "model.visual.blocks.16.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
685
+ "model.visual.blocks.25.norm2.weight": "model-00006-of-00006.safetensors",
686
+ "model.visual.blocks.0.norm2.bias": "model-00006-of-00006.safetensors",
687
+ "model.visual.blocks.25.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
688
+ "model.visual.blocks.23.norm2.weight": "model-00006-of-00006.safetensors",
689
+ "model.visual.blocks.4.attn.qkv.bias": "model-00006-of-00006.safetensors",
690
+ "model.visual.blocks.12.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
691
+ "model.visual.blocks.24.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
692
+ "model.visual.patch_embed.proj.bias": "model-00006-of-00006.safetensors",
693
+ "model.visual.blocks.4.norm2.weight": "model-00006-of-00006.safetensors",
694
+ "mtp.layers.0.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
695
+ "model.visual.blocks.2.attn.qkv.weight": "model-00006-of-00006.safetensors",
696
+ "model.visual.blocks.6.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
697
+ "model.visual.blocks.23.norm2.bias": "model-00006-of-00006.safetensors",
698
+ "model.visual.blocks.1.norm2.bias": "model-00006-of-00006.safetensors",
699
+ "model.visual.blocks.11.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
700
+ "model.visual.blocks.19.attn.proj.bias": "model-00006-of-00006.safetensors",
701
+ "model.visual.blocks.6.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
702
+ "mtp.layers.0.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
703
+ "model.visual.blocks.23.norm1.weight": "model-00006-of-00006.safetensors",
704
+ "model.visual.blocks.6.norm1.weight": "model-00006-of-00006.safetensors",
705
+ "model.visual.blocks.2.attn.qkv.bias": "model-00006-of-00006.safetensors",
706
+ "model.visual.blocks.7.attn.proj.bias": "model-00006-of-00006.safetensors",
707
+ "model.visual.merger.linear_fc1.bias": "model-00006-of-00006.safetensors",
708
+ "model.visual.blocks.15.attn.qkv.bias": "model-00006-of-00006.safetensors",
709
+ "model.visual.blocks.12.attn.qkv.bias": "model-00006-of-00006.safetensors",
710
+ "model.visual.blocks.25.attn.proj.bias": "model-00006-of-00006.safetensors",
711
+ "model.visual.blocks.18.norm2.weight": "model-00006-of-00006.safetensors",
712
+ "model.visual.blocks.1.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
713
+ "model.visual.blocks.10.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
714
+ "model.visual.blocks.10.norm2.weight": "model-00006-of-00006.safetensors",
715
+ "model.visual.blocks.0.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
716
+ "model.visual.blocks.20.norm2.weight": "model-00006-of-00006.safetensors",
717
+ "model.visual.blocks.13.norm2.weight": "model-00006-of-00006.safetensors",
718
+ "model.visual.blocks.19.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
719
+ "model.visual.blocks.7.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
720
+ "model.visual.blocks.1.norm1.weight": "model-00006-of-00006.safetensors",
721
+ "model.visual.blocks.19.norm2.weight": "model-00006-of-00006.safetensors",
722
+ "model.visual.blocks.17.attn.qkv.bias": "model-00006-of-00006.safetensors",
723
+ "model.visual.blocks.15.attn.qkv.weight": "model-00006-of-00006.safetensors",
724
+ "model.visual.blocks.13.norm1.bias": "model-00006-of-00006.safetensors",
725
+ "model.visual.blocks.25.attn.qkv.bias": "model-00006-of-00006.safetensors",
726
+ "model.visual.blocks.25.norm2.bias": "model-00006-of-00006.safetensors",
727
+ "model.visual.blocks.3.attn.proj.weight": "model-00006-of-00006.safetensors",
728
+ "model.visual.blocks.19.norm1.weight": "model-00006-of-00006.safetensors",
729
+ "mtp.layers.0.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
730
+ "model.visual.blocks.26.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
731
+ "model.visual.blocks.24.attn.proj.bias": "model-00006-of-00006.safetensors",
732
+ "model.visual.blocks.7.norm1.bias": "model-00006-of-00006.safetensors",
733
+ "model.visual.blocks.12.norm2.weight": "model-00006-of-00006.safetensors",
734
+ "model.visual.blocks.10.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
735
+ "model.visual.blocks.13.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
736
+ "model.visual.blocks.3.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
737
+ "model.visual.blocks.21.attn.qkv.bias": "model-00006-of-00006.safetensors",
738
+ "model.visual.blocks.5.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
739
+ "model.visual.blocks.11.norm1.weight": "model-00006-of-00006.safetensors",
740
+ "model.visual.blocks.20.attn.qkv.bias": "model-00006-of-00006.safetensors",
741
+ "model.visual.blocks.10.norm1.weight": "model-00006-of-00006.safetensors",
742
+ "model.visual.blocks.9.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
743
+ "model.visual.blocks.25.norm1.bias": "model-00006-of-00006.safetensors",
744
+ "model.visual.blocks.1.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
745
+ "model.visual.blocks.21.attn.qkv.weight": "model-00006-of-00006.safetensors",
746
+ "model.visual.merger.linear_fc2.weight": "model-00006-of-00006.safetensors",
747
+ "model.visual.blocks.2.attn.proj.bias": "model-00006-of-00006.safetensors",
748
+ "model.visual.blocks.25.attn.proj.weight": "model-00006-of-00006.safetensors",
749
+ "model.visual.blocks.8.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
750
+ "model.visual.blocks.13.attn.proj.weight": "model-00006-of-00006.safetensors",
751
+ "model.visual.blocks.16.norm1.weight": "model-00006-of-00006.safetensors",
752
+ "model.visual.blocks.17.norm2.weight": "model-00006-of-00006.safetensors",
753
+ "model.visual.blocks.16.norm1.bias": "model-00006-of-00006.safetensors",
754
+ "model.visual.blocks.21.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
755
+ "model.visual.blocks.3.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
756
+ "model.visual.blocks.15.attn.proj.weight": "model-00006-of-00006.safetensors",
757
+ "model.visual.blocks.18.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
758
+ "model.visual.blocks.3.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
759
+ "model.visual.blocks.14.norm2.weight": "model-00006-of-00006.safetensors",
760
+ "model.visual.blocks.21.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
761
+ "model.visual.blocks.11.attn.qkv.weight": "model-00006-of-00006.safetensors",
762
+ "model.visual.blocks.3.attn.proj.bias": "model-00006-of-00006.safetensors",
763
+ "model.visual.blocks.23.mlp.linear_fc2.weight": "model-00006-of-00006.safetensors",
764
+ "model.visual.blocks.21.norm2.weight": "model-00006-of-00006.safetensors",
765
+ "model.visual.blocks.6.attn.qkv.bias": "model-00006-of-00006.safetensors",
766
+ "model.visual.blocks.18.attn.proj.weight": "model-00006-of-00006.safetensors",
767
+ "model.visual.blocks.5.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
768
+ "model.visual.blocks.12.norm1.bias": "model-00006-of-00006.safetensors",
769
+ "model.visual.blocks.14.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
770
+ "model.visual.blocks.5.attn.proj.bias": "model-00006-of-00006.safetensors",
771
+ "model.visual.blocks.6.norm2.bias": "model-00006-of-00006.safetensors",
772
+ "model.visual.blocks.18.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
773
+ "model.visual.blocks.6.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
774
+ "model.visual.blocks.24.norm2.weight": "model-00006-of-00006.safetensors",
775
+ "model.visual.blocks.12.mlp.linear_fc1.weight": "model-00006-of-00006.safetensors",
776
+ "model.visual.blocks.1.attn.qkv.weight": "model-00006-of-00006.safetensors",
777
+ "model.visual.blocks.4.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
778
+ "model.visual.blocks.3.norm1.weight": "model-00006-of-00006.safetensors",
779
+ "model.visual.blocks.7.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
780
+ "model.visual.blocks.24.norm1.weight": "model-00006-of-00006.safetensors",
781
+ "model.visual.blocks.14.norm1.weight": "model-00006-of-00006.safetensors"
782
  }
783
+ }