RyanMullins commited on
Commit
ec856cd
·
verified ·
1 Parent(s): 11d45b1

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -147,35 +147,6 @@
147
  "hidden_activation": "gelu_pytorch_tanh",
148
  "hidden_size": 1152,
149
  "intermediate_size": 4304,
150
- "layer_types": [
151
- "full_attention",
152
- "full_attention",
153
- "full_attention",
154
- "full_attention",
155
- "full_attention",
156
- "full_attention",
157
- "full_attention",
158
- "full_attention",
159
- "full_attention",
160
- "full_attention",
161
- "full_attention",
162
- "full_attention",
163
- "full_attention",
164
- "full_attention",
165
- "full_attention",
166
- "full_attention",
167
- "full_attention",
168
- "full_attention",
169
- "full_attention",
170
- "full_attention",
171
- "full_attention",
172
- "full_attention",
173
- "full_attention",
174
- "full_attention",
175
- "full_attention",
176
- "full_attention",
177
- "full_attention"
178
- ],
179
  "max_position_embeddings": 131072,
180
  "model_type": "gemma4_vision",
181
  "num_attention_heads": 16,
@@ -186,12 +157,10 @@
186
  "position_embedding_size": 10240,
187
  "rms_norm_eps": 1e-06,
188
  "rope_parameters": {
189
- "full_attention": {
190
- "rope_theta": 100.0,
191
- "rope_type": "default"
192
- }
193
  },
194
- "use_bidirectional_attention": "vision",
195
  "use_clipped_linears": false
196
  },
197
  "vision_soft_tokens_per_image": 280
 
147
  "hidden_activation": "gelu_pytorch_tanh",
148
  "hidden_size": 1152,
149
  "intermediate_size": 4304,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  "max_position_embeddings": 131072,
151
  "model_type": "gemma4_vision",
152
  "num_attention_heads": 16,
 
157
  "position_embedding_size": 10240,
158
  "rms_norm_eps": 1e-06,
159
  "rope_parameters": {
160
+ "rope_theta": 100.0,
161
+ "rope_type": "default"
 
 
162
  },
163
+ "standardize": true,
164
  "use_clipped_linears": false
165
  },
166
  "vision_soft_tokens_per_image": 280
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a001086872f8e5d237251df64f490f8bf5c3cdc6c29f6cbd16fe1a045a233dcc
3
- size 49784782228
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcdca8aebc489295a267a97a1b368d10be542f4b057f19b17bf42341d4b16532
3
+ size 49784788364
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 32682372656,
4
- "total_size": 62546173144
5
  },
6
  "weight_map": {
7
  "model.embed_vision.embedding_projection.weight": "model-00001-of-00002.safetensors",
@@ -838,357 +838,359 @@
838
  "model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
839
  "model.language_model.norm.weight": "model-00001-of-00002.safetensors",
840
  "model.vision_tower.encoder.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
841
- "model.vision_tower.encoder.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
842
- "model.vision_tower.encoder.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
843
- "model.vision_tower.encoder.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
844
  "model.vision_tower.encoder.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
845
  "model.vision_tower.encoder.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
846
  "model.vision_tower.encoder.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
847
  "model.vision_tower.encoder.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
848
- "model.vision_tower.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
849
- "model.vision_tower.encoder.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
850
  "model.vision_tower.encoder.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
851
- "model.vision_tower.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
852
- "model.vision_tower.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
853
  "model.vision_tower.encoder.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
854
- "model.vision_tower.encoder.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
855
- "model.vision_tower.encoder.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
856
- "model.vision_tower.encoder.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
857
  "model.vision_tower.encoder.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
858
  "model.vision_tower.encoder.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
859
  "model.vision_tower.encoder.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
860
  "model.vision_tower.encoder.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
861
- "model.vision_tower.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
862
- "model.vision_tower.encoder.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
863
  "model.vision_tower.encoder.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
864
- "model.vision_tower.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
865
- "model.vision_tower.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
866
  "model.vision_tower.encoder.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
867
- "model.vision_tower.encoder.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
868
- "model.vision_tower.encoder.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
869
- "model.vision_tower.encoder.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
870
  "model.vision_tower.encoder.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
871
  "model.vision_tower.encoder.layers.10.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
872
  "model.vision_tower.encoder.layers.10.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
873
  "model.vision_tower.encoder.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
874
- "model.vision_tower.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
875
- "model.vision_tower.encoder.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
876
  "model.vision_tower.encoder.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
877
- "model.vision_tower.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
878
- "model.vision_tower.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
879
  "model.vision_tower.encoder.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
880
- "model.vision_tower.encoder.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
881
- "model.vision_tower.encoder.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
882
- "model.vision_tower.encoder.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
883
  "model.vision_tower.encoder.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
884
  "model.vision_tower.encoder.layers.11.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
885
  "model.vision_tower.encoder.layers.11.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
886
  "model.vision_tower.encoder.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
887
- "model.vision_tower.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
888
- "model.vision_tower.encoder.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
889
  "model.vision_tower.encoder.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
890
- "model.vision_tower.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
891
- "model.vision_tower.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
892
  "model.vision_tower.encoder.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
893
- "model.vision_tower.encoder.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
894
- "model.vision_tower.encoder.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
895
- "model.vision_tower.encoder.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
896
  "model.vision_tower.encoder.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
897
  "model.vision_tower.encoder.layers.12.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
898
  "model.vision_tower.encoder.layers.12.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
899
  "model.vision_tower.encoder.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
900
- "model.vision_tower.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
901
- "model.vision_tower.encoder.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
902
  "model.vision_tower.encoder.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
903
- "model.vision_tower.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
904
- "model.vision_tower.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
905
  "model.vision_tower.encoder.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
906
- "model.vision_tower.encoder.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
907
- "model.vision_tower.encoder.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
908
- "model.vision_tower.encoder.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
909
  "model.vision_tower.encoder.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
910
  "model.vision_tower.encoder.layers.13.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
911
  "model.vision_tower.encoder.layers.13.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
912
  "model.vision_tower.encoder.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
913
- "model.vision_tower.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
914
- "model.vision_tower.encoder.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
915
  "model.vision_tower.encoder.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
916
- "model.vision_tower.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
917
- "model.vision_tower.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
918
  "model.vision_tower.encoder.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
919
- "model.vision_tower.encoder.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
920
- "model.vision_tower.encoder.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
921
- "model.vision_tower.encoder.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
922
  "model.vision_tower.encoder.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
923
  "model.vision_tower.encoder.layers.14.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
924
  "model.vision_tower.encoder.layers.14.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
925
  "model.vision_tower.encoder.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
926
- "model.vision_tower.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
927
- "model.vision_tower.encoder.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
928
  "model.vision_tower.encoder.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
929
- "model.vision_tower.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
930
- "model.vision_tower.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
931
  "model.vision_tower.encoder.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
932
- "model.vision_tower.encoder.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
933
- "model.vision_tower.encoder.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
934
- "model.vision_tower.encoder.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
935
  "model.vision_tower.encoder.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
936
  "model.vision_tower.encoder.layers.15.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
937
  "model.vision_tower.encoder.layers.15.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
938
  "model.vision_tower.encoder.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
939
- "model.vision_tower.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
940
- "model.vision_tower.encoder.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
941
  "model.vision_tower.encoder.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
942
- "model.vision_tower.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
943
- "model.vision_tower.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
944
  "model.vision_tower.encoder.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
945
- "model.vision_tower.encoder.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
946
- "model.vision_tower.encoder.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
947
- "model.vision_tower.encoder.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
948
  "model.vision_tower.encoder.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
949
  "model.vision_tower.encoder.layers.16.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
950
  "model.vision_tower.encoder.layers.16.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
951
  "model.vision_tower.encoder.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
952
- "model.vision_tower.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
953
- "model.vision_tower.encoder.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
954
  "model.vision_tower.encoder.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
955
- "model.vision_tower.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
956
- "model.vision_tower.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
957
  "model.vision_tower.encoder.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
958
- "model.vision_tower.encoder.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
959
- "model.vision_tower.encoder.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
960
- "model.vision_tower.encoder.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
961
  "model.vision_tower.encoder.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
962
  "model.vision_tower.encoder.layers.17.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
963
  "model.vision_tower.encoder.layers.17.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
964
  "model.vision_tower.encoder.layers.17.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
965
- "model.vision_tower.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
966
- "model.vision_tower.encoder.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
967
  "model.vision_tower.encoder.layers.17.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
968
- "model.vision_tower.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
969
- "model.vision_tower.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
970
  "model.vision_tower.encoder.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
971
- "model.vision_tower.encoder.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
972
- "model.vision_tower.encoder.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
973
- "model.vision_tower.encoder.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
974
  "model.vision_tower.encoder.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
975
  "model.vision_tower.encoder.layers.18.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
976
  "model.vision_tower.encoder.layers.18.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
977
  "model.vision_tower.encoder.layers.18.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
978
- "model.vision_tower.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
979
- "model.vision_tower.encoder.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
980
  "model.vision_tower.encoder.layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
981
- "model.vision_tower.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
982
- "model.vision_tower.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
983
  "model.vision_tower.encoder.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
984
- "model.vision_tower.encoder.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
985
- "model.vision_tower.encoder.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
986
- "model.vision_tower.encoder.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
987
  "model.vision_tower.encoder.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
988
  "model.vision_tower.encoder.layers.19.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
989
  "model.vision_tower.encoder.layers.19.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
990
  "model.vision_tower.encoder.layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
991
- "model.vision_tower.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
992
- "model.vision_tower.encoder.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
993
  "model.vision_tower.encoder.layers.19.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
994
- "model.vision_tower.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
995
- "model.vision_tower.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
996
  "model.vision_tower.encoder.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
997
- "model.vision_tower.encoder.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
998
- "model.vision_tower.encoder.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
999
- "model.vision_tower.encoder.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1000
  "model.vision_tower.encoder.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1001
  "model.vision_tower.encoder.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1002
  "model.vision_tower.encoder.layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1003
  "model.vision_tower.encoder.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1004
- "model.vision_tower.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1005
- "model.vision_tower.encoder.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1006
  "model.vision_tower.encoder.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1007
- "model.vision_tower.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1008
- "model.vision_tower.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1009
  "model.vision_tower.encoder.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
1010
- "model.vision_tower.encoder.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1011
- "model.vision_tower.encoder.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1012
- "model.vision_tower.encoder.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1013
  "model.vision_tower.encoder.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1014
  "model.vision_tower.encoder.layers.20.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1015
  "model.vision_tower.encoder.layers.20.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1016
  "model.vision_tower.encoder.layers.20.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1017
- "model.vision_tower.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1018
- "model.vision_tower.encoder.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1019
  "model.vision_tower.encoder.layers.20.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1020
- "model.vision_tower.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1021
- "model.vision_tower.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1022
  "model.vision_tower.encoder.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
1023
- "model.vision_tower.encoder.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1024
- "model.vision_tower.encoder.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1025
- "model.vision_tower.encoder.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1026
  "model.vision_tower.encoder.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1027
  "model.vision_tower.encoder.layers.21.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1028
  "model.vision_tower.encoder.layers.21.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1029
  "model.vision_tower.encoder.layers.21.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1030
- "model.vision_tower.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1031
- "model.vision_tower.encoder.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1032
  "model.vision_tower.encoder.layers.21.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1033
- "model.vision_tower.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1034
- "model.vision_tower.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1035
  "model.vision_tower.encoder.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
1036
- "model.vision_tower.encoder.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1037
- "model.vision_tower.encoder.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1038
- "model.vision_tower.encoder.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1039
  "model.vision_tower.encoder.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1040
  "model.vision_tower.encoder.layers.22.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1041
  "model.vision_tower.encoder.layers.22.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1042
  "model.vision_tower.encoder.layers.22.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1043
- "model.vision_tower.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1044
- "model.vision_tower.encoder.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1045
  "model.vision_tower.encoder.layers.22.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1046
- "model.vision_tower.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1047
- "model.vision_tower.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1048
  "model.vision_tower.encoder.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
1049
- "model.vision_tower.encoder.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1050
- "model.vision_tower.encoder.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1051
- "model.vision_tower.encoder.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1052
  "model.vision_tower.encoder.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1053
  "model.vision_tower.encoder.layers.23.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1054
  "model.vision_tower.encoder.layers.23.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1055
  "model.vision_tower.encoder.layers.23.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1056
- "model.vision_tower.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1057
- "model.vision_tower.encoder.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1058
  "model.vision_tower.encoder.layers.23.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1059
- "model.vision_tower.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1060
- "model.vision_tower.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1061
  "model.vision_tower.encoder.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
1062
- "model.vision_tower.encoder.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1063
- "model.vision_tower.encoder.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1064
- "model.vision_tower.encoder.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1065
  "model.vision_tower.encoder.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1066
  "model.vision_tower.encoder.layers.24.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1067
  "model.vision_tower.encoder.layers.24.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1068
  "model.vision_tower.encoder.layers.24.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1069
- "model.vision_tower.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1070
- "model.vision_tower.encoder.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1071
  "model.vision_tower.encoder.layers.24.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1072
- "model.vision_tower.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1073
- "model.vision_tower.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1074
  "model.vision_tower.encoder.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
1075
- "model.vision_tower.encoder.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1076
- "model.vision_tower.encoder.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1077
- "model.vision_tower.encoder.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1078
  "model.vision_tower.encoder.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1079
  "model.vision_tower.encoder.layers.25.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1080
  "model.vision_tower.encoder.layers.25.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1081
  "model.vision_tower.encoder.layers.25.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1082
- "model.vision_tower.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1083
- "model.vision_tower.encoder.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1084
  "model.vision_tower.encoder.layers.25.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1085
- "model.vision_tower.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1086
- "model.vision_tower.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1087
  "model.vision_tower.encoder.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
1088
- "model.vision_tower.encoder.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1089
- "model.vision_tower.encoder.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1090
- "model.vision_tower.encoder.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1091
  "model.vision_tower.encoder.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1092
  "model.vision_tower.encoder.layers.26.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1093
  "model.vision_tower.encoder.layers.26.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1094
  "model.vision_tower.encoder.layers.26.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1095
- "model.vision_tower.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1096
- "model.vision_tower.encoder.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1097
  "model.vision_tower.encoder.layers.26.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1098
- "model.vision_tower.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1099
- "model.vision_tower.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1100
  "model.vision_tower.encoder.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
1101
- "model.vision_tower.encoder.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1102
- "model.vision_tower.encoder.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1103
- "model.vision_tower.encoder.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1104
  "model.vision_tower.encoder.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1105
  "model.vision_tower.encoder.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1106
  "model.vision_tower.encoder.layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1107
  "model.vision_tower.encoder.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1108
- "model.vision_tower.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1109
- "model.vision_tower.encoder.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1110
  "model.vision_tower.encoder.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1111
- "model.vision_tower.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1112
- "model.vision_tower.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1113
  "model.vision_tower.encoder.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
1114
- "model.vision_tower.encoder.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1115
- "model.vision_tower.encoder.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1116
- "model.vision_tower.encoder.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1117
  "model.vision_tower.encoder.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1118
  "model.vision_tower.encoder.layers.4.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1119
  "model.vision_tower.encoder.layers.4.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1120
  "model.vision_tower.encoder.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1121
- "model.vision_tower.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1122
- "model.vision_tower.encoder.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1123
  "model.vision_tower.encoder.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1124
- "model.vision_tower.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1125
- "model.vision_tower.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1126
  "model.vision_tower.encoder.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
1127
- "model.vision_tower.encoder.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1128
- "model.vision_tower.encoder.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1129
- "model.vision_tower.encoder.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1130
  "model.vision_tower.encoder.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1131
  "model.vision_tower.encoder.layers.5.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1132
  "model.vision_tower.encoder.layers.5.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1133
  "model.vision_tower.encoder.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1134
- "model.vision_tower.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1135
- "model.vision_tower.encoder.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1136
  "model.vision_tower.encoder.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1137
- "model.vision_tower.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1138
- "model.vision_tower.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1139
  "model.vision_tower.encoder.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
1140
- "model.vision_tower.encoder.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1141
- "model.vision_tower.encoder.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1142
- "model.vision_tower.encoder.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1143
  "model.vision_tower.encoder.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1144
  "model.vision_tower.encoder.layers.6.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1145
  "model.vision_tower.encoder.layers.6.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1146
  "model.vision_tower.encoder.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1147
- "model.vision_tower.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1148
- "model.vision_tower.encoder.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1149
  "model.vision_tower.encoder.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1150
- "model.vision_tower.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1151
- "model.vision_tower.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1152
  "model.vision_tower.encoder.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
1153
- "model.vision_tower.encoder.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1154
- "model.vision_tower.encoder.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1155
- "model.vision_tower.encoder.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1156
  "model.vision_tower.encoder.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1157
  "model.vision_tower.encoder.layers.7.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1158
  "model.vision_tower.encoder.layers.7.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1159
  "model.vision_tower.encoder.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1160
- "model.vision_tower.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1161
- "model.vision_tower.encoder.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1162
  "model.vision_tower.encoder.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1163
- "model.vision_tower.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1164
- "model.vision_tower.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1165
  "model.vision_tower.encoder.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
1166
- "model.vision_tower.encoder.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1167
- "model.vision_tower.encoder.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1168
- "model.vision_tower.encoder.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1169
  "model.vision_tower.encoder.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1170
  "model.vision_tower.encoder.layers.8.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1171
  "model.vision_tower.encoder.layers.8.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1172
  "model.vision_tower.encoder.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1173
- "model.vision_tower.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1174
- "model.vision_tower.encoder.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1175
  "model.vision_tower.encoder.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1176
- "model.vision_tower.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1177
- "model.vision_tower.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1178
  "model.vision_tower.encoder.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
1179
- "model.vision_tower.encoder.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
1180
- "model.vision_tower.encoder.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
1181
- "model.vision_tower.encoder.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
1182
  "model.vision_tower.encoder.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1183
  "model.vision_tower.encoder.layers.9.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1184
  "model.vision_tower.encoder.layers.9.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1185
  "model.vision_tower.encoder.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1186
- "model.vision_tower.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1187
- "model.vision_tower.encoder.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
1188
  "model.vision_tower.encoder.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1189
- "model.vision_tower.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1190
- "model.vision_tower.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1191
  "model.vision_tower.patch_embedder.input_proj.weight": "model-00001-of-00002.safetensors",
1192
- "model.vision_tower.patch_embedder.position_embedding_table": "model-00001-of-00002.safetensors"
 
 
1193
  }
1194
  }
 
1
  {
2
  "metadata": {
3
  "total_parameters": 32682372656,
4
+ "total_size": 62546177752
5
  },
6
  "weight_map": {
7
  "model.embed_vision.embedding_projection.weight": "model-00001-of-00002.safetensors",
 
838
  "model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
839
  "model.language_model.norm.weight": "model-00001-of-00002.safetensors",
840
  "model.vision_tower.encoder.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
841
+ "model.vision_tower.encoder.layers.0.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
842
+ "model.vision_tower.encoder.layers.0.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
843
+ "model.vision_tower.encoder.layers.0.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
844
  "model.vision_tower.encoder.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
845
  "model.vision_tower.encoder.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
846
  "model.vision_tower.encoder.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
847
  "model.vision_tower.encoder.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
848
+ "model.vision_tower.encoder.layers.0.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
849
+ "model.vision_tower.encoder.layers.0.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
850
  "model.vision_tower.encoder.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
851
+ "model.vision_tower.encoder.layers.0.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
852
+ "model.vision_tower.encoder.layers.0.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
853
  "model.vision_tower.encoder.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
854
+ "model.vision_tower.encoder.layers.1.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
855
+ "model.vision_tower.encoder.layers.1.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
856
+ "model.vision_tower.encoder.layers.1.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
857
  "model.vision_tower.encoder.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
858
  "model.vision_tower.encoder.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
859
  "model.vision_tower.encoder.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
860
  "model.vision_tower.encoder.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
861
+ "model.vision_tower.encoder.layers.1.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
862
+ "model.vision_tower.encoder.layers.1.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
863
  "model.vision_tower.encoder.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
864
+ "model.vision_tower.encoder.layers.1.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
865
+ "model.vision_tower.encoder.layers.1.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
866
  "model.vision_tower.encoder.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
867
+ "model.vision_tower.encoder.layers.10.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
868
+ "model.vision_tower.encoder.layers.10.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
869
+ "model.vision_tower.encoder.layers.10.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
870
  "model.vision_tower.encoder.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
871
  "model.vision_tower.encoder.layers.10.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
872
  "model.vision_tower.encoder.layers.10.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
873
  "model.vision_tower.encoder.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
874
+ "model.vision_tower.encoder.layers.10.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
875
+ "model.vision_tower.encoder.layers.10.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
876
  "model.vision_tower.encoder.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
877
+ "model.vision_tower.encoder.layers.10.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
878
+ "model.vision_tower.encoder.layers.10.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
879
  "model.vision_tower.encoder.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
880
+ "model.vision_tower.encoder.layers.11.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
881
+ "model.vision_tower.encoder.layers.11.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
882
+ "model.vision_tower.encoder.layers.11.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
883
  "model.vision_tower.encoder.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
884
  "model.vision_tower.encoder.layers.11.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
885
  "model.vision_tower.encoder.layers.11.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
886
  "model.vision_tower.encoder.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
887
+ "model.vision_tower.encoder.layers.11.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
888
+ "model.vision_tower.encoder.layers.11.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
889
  "model.vision_tower.encoder.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
890
+ "model.vision_tower.encoder.layers.11.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
891
+ "model.vision_tower.encoder.layers.11.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
892
  "model.vision_tower.encoder.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
893
+ "model.vision_tower.encoder.layers.12.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
894
+ "model.vision_tower.encoder.layers.12.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
895
+ "model.vision_tower.encoder.layers.12.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
896
  "model.vision_tower.encoder.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
897
  "model.vision_tower.encoder.layers.12.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
898
  "model.vision_tower.encoder.layers.12.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
899
  "model.vision_tower.encoder.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
900
+ "model.vision_tower.encoder.layers.12.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
901
+ "model.vision_tower.encoder.layers.12.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
902
  "model.vision_tower.encoder.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
903
+ "model.vision_tower.encoder.layers.12.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
904
+ "model.vision_tower.encoder.layers.12.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
905
  "model.vision_tower.encoder.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
906
+ "model.vision_tower.encoder.layers.13.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
907
+ "model.vision_tower.encoder.layers.13.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
908
+ "model.vision_tower.encoder.layers.13.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
909
  "model.vision_tower.encoder.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
910
  "model.vision_tower.encoder.layers.13.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
911
  "model.vision_tower.encoder.layers.13.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
912
  "model.vision_tower.encoder.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
913
+ "model.vision_tower.encoder.layers.13.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
914
+ "model.vision_tower.encoder.layers.13.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
915
  "model.vision_tower.encoder.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
916
+ "model.vision_tower.encoder.layers.13.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
917
+ "model.vision_tower.encoder.layers.13.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
918
  "model.vision_tower.encoder.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
919
+ "model.vision_tower.encoder.layers.14.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
920
+ "model.vision_tower.encoder.layers.14.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
921
+ "model.vision_tower.encoder.layers.14.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
922
  "model.vision_tower.encoder.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
923
  "model.vision_tower.encoder.layers.14.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
924
  "model.vision_tower.encoder.layers.14.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
925
  "model.vision_tower.encoder.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
926
+ "model.vision_tower.encoder.layers.14.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
927
+ "model.vision_tower.encoder.layers.14.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
928
  "model.vision_tower.encoder.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
929
+ "model.vision_tower.encoder.layers.14.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
930
+ "model.vision_tower.encoder.layers.14.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
931
  "model.vision_tower.encoder.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
932
+ "model.vision_tower.encoder.layers.15.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
933
+ "model.vision_tower.encoder.layers.15.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
934
+ "model.vision_tower.encoder.layers.15.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
935
  "model.vision_tower.encoder.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
936
  "model.vision_tower.encoder.layers.15.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
937
  "model.vision_tower.encoder.layers.15.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
938
  "model.vision_tower.encoder.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
939
+ "model.vision_tower.encoder.layers.15.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
940
+ "model.vision_tower.encoder.layers.15.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
941
  "model.vision_tower.encoder.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
942
+ "model.vision_tower.encoder.layers.15.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
943
+ "model.vision_tower.encoder.layers.15.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
944
  "model.vision_tower.encoder.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
945
+ "model.vision_tower.encoder.layers.16.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
946
+ "model.vision_tower.encoder.layers.16.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
947
+ "model.vision_tower.encoder.layers.16.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
948
  "model.vision_tower.encoder.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
949
  "model.vision_tower.encoder.layers.16.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
950
  "model.vision_tower.encoder.layers.16.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
951
  "model.vision_tower.encoder.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
952
+ "model.vision_tower.encoder.layers.16.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
953
+ "model.vision_tower.encoder.layers.16.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
954
  "model.vision_tower.encoder.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
955
+ "model.vision_tower.encoder.layers.16.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
956
+ "model.vision_tower.encoder.layers.16.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
957
  "model.vision_tower.encoder.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
958
+ "model.vision_tower.encoder.layers.17.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
959
+ "model.vision_tower.encoder.layers.17.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
960
+ "model.vision_tower.encoder.layers.17.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
961
  "model.vision_tower.encoder.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
962
  "model.vision_tower.encoder.layers.17.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
963
  "model.vision_tower.encoder.layers.17.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
964
  "model.vision_tower.encoder.layers.17.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
965
+ "model.vision_tower.encoder.layers.17.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
966
+ "model.vision_tower.encoder.layers.17.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
967
  "model.vision_tower.encoder.layers.17.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
968
+ "model.vision_tower.encoder.layers.17.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
969
+ "model.vision_tower.encoder.layers.17.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
970
  "model.vision_tower.encoder.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
971
+ "model.vision_tower.encoder.layers.18.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
972
+ "model.vision_tower.encoder.layers.18.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
973
+ "model.vision_tower.encoder.layers.18.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
974
  "model.vision_tower.encoder.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
975
  "model.vision_tower.encoder.layers.18.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
976
  "model.vision_tower.encoder.layers.18.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
977
  "model.vision_tower.encoder.layers.18.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
978
+ "model.vision_tower.encoder.layers.18.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
979
+ "model.vision_tower.encoder.layers.18.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
980
  "model.vision_tower.encoder.layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
981
+ "model.vision_tower.encoder.layers.18.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
982
+ "model.vision_tower.encoder.layers.18.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
983
  "model.vision_tower.encoder.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
984
+ "model.vision_tower.encoder.layers.19.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
985
+ "model.vision_tower.encoder.layers.19.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
986
+ "model.vision_tower.encoder.layers.19.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
987
  "model.vision_tower.encoder.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
988
  "model.vision_tower.encoder.layers.19.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
989
  "model.vision_tower.encoder.layers.19.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
990
  "model.vision_tower.encoder.layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
991
+ "model.vision_tower.encoder.layers.19.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
992
+ "model.vision_tower.encoder.layers.19.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
993
  "model.vision_tower.encoder.layers.19.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
994
+ "model.vision_tower.encoder.layers.19.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
995
+ "model.vision_tower.encoder.layers.19.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
996
  "model.vision_tower.encoder.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
997
+ "model.vision_tower.encoder.layers.2.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
998
+ "model.vision_tower.encoder.layers.2.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
999
+ "model.vision_tower.encoder.layers.2.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1000
  "model.vision_tower.encoder.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1001
  "model.vision_tower.encoder.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1002
  "model.vision_tower.encoder.layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1003
  "model.vision_tower.encoder.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1004
+ "model.vision_tower.encoder.layers.2.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1005
+ "model.vision_tower.encoder.layers.2.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1006
  "model.vision_tower.encoder.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1007
+ "model.vision_tower.encoder.layers.2.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1008
+ "model.vision_tower.encoder.layers.2.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1009
  "model.vision_tower.encoder.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
1010
+ "model.vision_tower.encoder.layers.20.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1011
+ "model.vision_tower.encoder.layers.20.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1012
+ "model.vision_tower.encoder.layers.20.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1013
  "model.vision_tower.encoder.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1014
  "model.vision_tower.encoder.layers.20.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1015
  "model.vision_tower.encoder.layers.20.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1016
  "model.vision_tower.encoder.layers.20.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1017
+ "model.vision_tower.encoder.layers.20.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1018
+ "model.vision_tower.encoder.layers.20.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1019
  "model.vision_tower.encoder.layers.20.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1020
+ "model.vision_tower.encoder.layers.20.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1021
+ "model.vision_tower.encoder.layers.20.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1022
  "model.vision_tower.encoder.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
1023
+ "model.vision_tower.encoder.layers.21.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1024
+ "model.vision_tower.encoder.layers.21.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1025
+ "model.vision_tower.encoder.layers.21.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1026
  "model.vision_tower.encoder.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1027
  "model.vision_tower.encoder.layers.21.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1028
  "model.vision_tower.encoder.layers.21.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1029
  "model.vision_tower.encoder.layers.21.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1030
+ "model.vision_tower.encoder.layers.21.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1031
+ "model.vision_tower.encoder.layers.21.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1032
  "model.vision_tower.encoder.layers.21.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1033
+ "model.vision_tower.encoder.layers.21.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1034
+ "model.vision_tower.encoder.layers.21.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1035
  "model.vision_tower.encoder.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
1036
+ "model.vision_tower.encoder.layers.22.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1037
+ "model.vision_tower.encoder.layers.22.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1038
+ "model.vision_tower.encoder.layers.22.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1039
  "model.vision_tower.encoder.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1040
  "model.vision_tower.encoder.layers.22.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1041
  "model.vision_tower.encoder.layers.22.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1042
  "model.vision_tower.encoder.layers.22.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1043
+ "model.vision_tower.encoder.layers.22.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1044
+ "model.vision_tower.encoder.layers.22.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1045
  "model.vision_tower.encoder.layers.22.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1046
+ "model.vision_tower.encoder.layers.22.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1047
+ "model.vision_tower.encoder.layers.22.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1048
  "model.vision_tower.encoder.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
1049
+ "model.vision_tower.encoder.layers.23.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1050
+ "model.vision_tower.encoder.layers.23.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1051
+ "model.vision_tower.encoder.layers.23.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1052
  "model.vision_tower.encoder.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1053
  "model.vision_tower.encoder.layers.23.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1054
  "model.vision_tower.encoder.layers.23.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1055
  "model.vision_tower.encoder.layers.23.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1056
+ "model.vision_tower.encoder.layers.23.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1057
+ "model.vision_tower.encoder.layers.23.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1058
  "model.vision_tower.encoder.layers.23.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1059
+ "model.vision_tower.encoder.layers.23.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1060
+ "model.vision_tower.encoder.layers.23.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1061
  "model.vision_tower.encoder.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
1062
+ "model.vision_tower.encoder.layers.24.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1063
+ "model.vision_tower.encoder.layers.24.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1064
+ "model.vision_tower.encoder.layers.24.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1065
  "model.vision_tower.encoder.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1066
  "model.vision_tower.encoder.layers.24.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1067
  "model.vision_tower.encoder.layers.24.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1068
  "model.vision_tower.encoder.layers.24.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1069
+ "model.vision_tower.encoder.layers.24.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1070
+ "model.vision_tower.encoder.layers.24.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1071
  "model.vision_tower.encoder.layers.24.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1072
+ "model.vision_tower.encoder.layers.24.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1073
+ "model.vision_tower.encoder.layers.24.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1074
  "model.vision_tower.encoder.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
1075
+ "model.vision_tower.encoder.layers.25.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1076
+ "model.vision_tower.encoder.layers.25.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1077
+ "model.vision_tower.encoder.layers.25.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1078
  "model.vision_tower.encoder.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1079
  "model.vision_tower.encoder.layers.25.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1080
  "model.vision_tower.encoder.layers.25.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1081
  "model.vision_tower.encoder.layers.25.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1082
+ "model.vision_tower.encoder.layers.25.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1083
+ "model.vision_tower.encoder.layers.25.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1084
  "model.vision_tower.encoder.layers.25.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1085
+ "model.vision_tower.encoder.layers.25.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1086
+ "model.vision_tower.encoder.layers.25.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1087
  "model.vision_tower.encoder.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
1088
+ "model.vision_tower.encoder.layers.26.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1089
+ "model.vision_tower.encoder.layers.26.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1090
+ "model.vision_tower.encoder.layers.26.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1091
  "model.vision_tower.encoder.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1092
  "model.vision_tower.encoder.layers.26.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1093
  "model.vision_tower.encoder.layers.26.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1094
  "model.vision_tower.encoder.layers.26.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1095
+ "model.vision_tower.encoder.layers.26.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1096
+ "model.vision_tower.encoder.layers.26.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1097
  "model.vision_tower.encoder.layers.26.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1098
+ "model.vision_tower.encoder.layers.26.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1099
+ "model.vision_tower.encoder.layers.26.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1100
  "model.vision_tower.encoder.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
1101
+ "model.vision_tower.encoder.layers.3.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1102
+ "model.vision_tower.encoder.layers.3.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1103
+ "model.vision_tower.encoder.layers.3.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1104
  "model.vision_tower.encoder.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1105
  "model.vision_tower.encoder.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1106
  "model.vision_tower.encoder.layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1107
  "model.vision_tower.encoder.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1108
+ "model.vision_tower.encoder.layers.3.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1109
+ "model.vision_tower.encoder.layers.3.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1110
  "model.vision_tower.encoder.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1111
+ "model.vision_tower.encoder.layers.3.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1112
+ "model.vision_tower.encoder.layers.3.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1113
  "model.vision_tower.encoder.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
1114
+ "model.vision_tower.encoder.layers.4.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1115
+ "model.vision_tower.encoder.layers.4.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1116
+ "model.vision_tower.encoder.layers.4.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1117
  "model.vision_tower.encoder.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1118
  "model.vision_tower.encoder.layers.4.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1119
  "model.vision_tower.encoder.layers.4.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1120
  "model.vision_tower.encoder.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1121
+ "model.vision_tower.encoder.layers.4.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1122
+ "model.vision_tower.encoder.layers.4.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1123
  "model.vision_tower.encoder.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1124
+ "model.vision_tower.encoder.layers.4.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1125
+ "model.vision_tower.encoder.layers.4.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1126
  "model.vision_tower.encoder.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
1127
+ "model.vision_tower.encoder.layers.5.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1128
+ "model.vision_tower.encoder.layers.5.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1129
+ "model.vision_tower.encoder.layers.5.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1130
  "model.vision_tower.encoder.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1131
  "model.vision_tower.encoder.layers.5.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1132
  "model.vision_tower.encoder.layers.5.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1133
  "model.vision_tower.encoder.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1134
+ "model.vision_tower.encoder.layers.5.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1135
+ "model.vision_tower.encoder.layers.5.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1136
  "model.vision_tower.encoder.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1137
+ "model.vision_tower.encoder.layers.5.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1138
+ "model.vision_tower.encoder.layers.5.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1139
  "model.vision_tower.encoder.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
1140
+ "model.vision_tower.encoder.layers.6.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1141
+ "model.vision_tower.encoder.layers.6.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1142
+ "model.vision_tower.encoder.layers.6.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1143
  "model.vision_tower.encoder.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1144
  "model.vision_tower.encoder.layers.6.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1145
  "model.vision_tower.encoder.layers.6.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1146
  "model.vision_tower.encoder.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1147
+ "model.vision_tower.encoder.layers.6.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1148
+ "model.vision_tower.encoder.layers.6.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1149
  "model.vision_tower.encoder.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1150
+ "model.vision_tower.encoder.layers.6.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1151
+ "model.vision_tower.encoder.layers.6.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1152
  "model.vision_tower.encoder.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
1153
+ "model.vision_tower.encoder.layers.7.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1154
+ "model.vision_tower.encoder.layers.7.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1155
+ "model.vision_tower.encoder.layers.7.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1156
  "model.vision_tower.encoder.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1157
  "model.vision_tower.encoder.layers.7.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1158
  "model.vision_tower.encoder.layers.7.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1159
  "model.vision_tower.encoder.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1160
+ "model.vision_tower.encoder.layers.7.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1161
+ "model.vision_tower.encoder.layers.7.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1162
  "model.vision_tower.encoder.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1163
+ "model.vision_tower.encoder.layers.7.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1164
+ "model.vision_tower.encoder.layers.7.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1165
  "model.vision_tower.encoder.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
1166
+ "model.vision_tower.encoder.layers.8.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1167
+ "model.vision_tower.encoder.layers.8.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1168
+ "model.vision_tower.encoder.layers.8.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1169
  "model.vision_tower.encoder.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1170
  "model.vision_tower.encoder.layers.8.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1171
  "model.vision_tower.encoder.layers.8.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1172
  "model.vision_tower.encoder.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1173
+ "model.vision_tower.encoder.layers.8.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1174
+ "model.vision_tower.encoder.layers.8.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1175
  "model.vision_tower.encoder.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1176
+ "model.vision_tower.encoder.layers.8.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1177
+ "model.vision_tower.encoder.layers.8.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1178
  "model.vision_tower.encoder.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
1179
+ "model.vision_tower.encoder.layers.9.mlp.down_proj.linear.weight": "model-00001-of-00002.safetensors",
1180
+ "model.vision_tower.encoder.layers.9.mlp.gate_proj.linear.weight": "model-00001-of-00002.safetensors",
1181
+ "model.vision_tower.encoder.layers.9.mlp.up_proj.linear.weight": "model-00001-of-00002.safetensors",
1182
  "model.vision_tower.encoder.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1183
  "model.vision_tower.encoder.layers.9.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1184
  "model.vision_tower.encoder.layers.9.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1185
  "model.vision_tower.encoder.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1186
+ "model.vision_tower.encoder.layers.9.self_attn.k_proj.linear.weight": "model-00001-of-00002.safetensors",
1187
+ "model.vision_tower.encoder.layers.9.self_attn.o_proj.linear.weight": "model-00001-of-00002.safetensors",
1188
  "model.vision_tower.encoder.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
1189
+ "model.vision_tower.encoder.layers.9.self_attn.q_proj.linear.weight": "model-00001-of-00002.safetensors",
1190
+ "model.vision_tower.encoder.layers.9.self_attn.v_proj.linear.weight": "model-00001-of-00002.safetensors",
1191
  "model.vision_tower.patch_embedder.input_proj.weight": "model-00001-of-00002.safetensors",
1192
+ "model.vision_tower.patch_embedder.position_embedding_table": "model-00001-of-00002.safetensors",
1193
+ "model.vision_tower.std_bias": "model-00001-of-00002.safetensors",
1194
+ "model.vision_tower.std_scale": "model-00001-of-00002.safetensors"
1195
  }
1196
  }
processor_config.json CHANGED
@@ -28,26 +28,22 @@
28
  "do_rescale": true,
29
  "do_resize": true,
30
  "image_mean": [
31
- 0.5,
32
- 0.5,
33
- 0.5
34
  ],
35
  "image_processor_type": "Gemma4ImageProcessor",
36
  "image_seq_length": 280,
37
  "image_std": [
38
- 0.5,
39
- 0.5,
40
- 0.5
41
  ],
42
  "max_soft_tokens": 280,
43
  "patch_size": 16,
44
  "pooling_kernel_size": 3,
45
  "resample": 3,
46
- "rescale_factor": 0.00392156862745098,
47
- "size": {
48
- "height": 224,
49
- "width": 224
50
- }
51
  },
52
  "image_seq_length": 280,
53
  "processor_class": "Gemma4Processor"
 
28
  "do_rescale": true,
29
  "do_resize": true,
30
  "image_mean": [
31
+ 0.0,
32
+ 0.0,
33
+ 0.0
34
  ],
35
  "image_processor_type": "Gemma4ImageProcessor",
36
  "image_seq_length": 280,
37
  "image_std": [
38
+ 1.0,
39
+ 1.0,
40
+ 1.0
41
  ],
42
  "max_soft_tokens": 280,
43
  "patch_size": 16,
44
  "pooling_kernel_size": 3,
45
  "resample": 3,
46
+ "rescale_factor": 0.00392156862745098
 
 
 
 
47
  },
48
  "image_seq_length": 280,
49
  "processor_class": "Gemma4Processor"