Upload folder using huggingface_hub
Browse files- convert_molmo_point_to_hf.py +2 -2
- model-00001-of-00008.safetensors +2 -2
- model-00002-of-00008.safetensors +2 -2
- model-00003-of-00008.safetensors +2 -2
- model-00004-of-00008.safetensors +1 -1
- model-00005-of-00008.safetensors +1 -1
- model-00006-of-00008.safetensors +1 -1
- model-00007-of-00008.safetensors +2 -2
- model-00008-of-00008.safetensors +2 -2
- model.safetensors.index.json +436 -436
- modeling_molmo_point.py +37 -21
convert_molmo_point_to_hf.py
CHANGED
|
@@ -204,9 +204,9 @@ def convert_molmo2(
|
|
| 204 |
new_state_dict = {}
|
| 205 |
for key, val in state_dict.items():
|
| 206 |
if key == "transformer.ff_out.new_weight":
|
| 207 |
-
new_key = "new_output_embeddings"
|
| 208 |
elif key == "transformer.ff_out.weight":
|
| 209 |
-
new_key = "output_embeddings"
|
| 210 |
else:
|
| 211 |
new_key = f"{base_model_prefix}.{key}"
|
| 212 |
new_state_dict[new_key] = val
|
|
|
|
| 204 |
new_state_dict = {}
|
| 205 |
for key, val in state_dict.items():
|
| 206 |
if key == "transformer.ff_out.new_weight":
|
| 207 |
+
new_key = "lm_head.new_output_embeddings"
|
| 208 |
elif key == "transformer.ff_out.weight":
|
| 209 |
+
new_key = "lm_head.output_embeddings"
|
| 210 |
else:
|
| 211 |
new_key = f"{base_model_prefix}.{key}"
|
| 212 |
new_state_dict[new_key] = val
|
model-00001-of-00008.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19ab63a529d72b101000396a03df34f2dc32e8d744b0c5cec50f0542eea6b8db
|
| 3 |
+
size 4974567112
|
model-00002-of-00008.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19fbea500ac2cb32175131e3009b4aebfc3c373e48b07ea51783b1049df34761
|
| 3 |
+
size 4630720272
|
model-00003-of-00008.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c66ec2e00cd832d67a5f24ace0e4330e13627b82444784986062df6a2a973ac4
|
| 3 |
+
size 4630720296
|
model-00004-of-00008.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4630720320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20b4298b55a145d9ccdef463ffe3c344cbfd90e7fa67d0778d50f66688dacd27
|
| 3 |
size 4630720320
|
model-00005-of-00008.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4630720320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eeac368f9670bad559e45aceea971ff78db86fafee604f17e28c1e58932a27de
|
| 3 |
size 4630720320
|
model-00006-of-00008.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4630720320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3738d142536fdc8e9217e7a853240006aff3fe5c157d982cff3e52addb05f011
|
| 3 |
size 4630720320
|
model-00007-of-00008.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30485f5c86cefbfccb5eef21be09adf1149e4ec6f9cc75072c162900e6972226
|
| 3 |
+
size 4091924852
|
model-00008-of-00008.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b87b59f8beb9a114e4867e522e3ec5116d05442f199901b48b1beb4fda6962d1
|
| 3 |
+
size 2491416816
|
model.safetensors.index.json
CHANGED
|
@@ -4,44 +4,46 @@
|
|
| 4 |
"total_size": 34711420260
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"model.
|
| 10 |
-
"model.
|
| 11 |
-
"model.
|
| 12 |
-
"model.connector.image_pooling_2d.
|
| 13 |
-
"model.connector.image_pooling_2d.
|
| 14 |
-
"model.connector.image_pooling_2d.
|
| 15 |
-
"model.connector.image_pooling_2d.
|
| 16 |
-
"model.connector.
|
| 17 |
-
"model.connector.
|
| 18 |
-
"model.connector.image_projector.
|
| 19 |
-
"model.
|
| 20 |
-
"model.
|
| 21 |
-
"model.
|
| 22 |
-
"model.
|
| 23 |
-
"model.
|
| 24 |
-
"model.
|
| 25 |
-
"model.
|
| 26 |
-
"model.
|
| 27 |
-
"model.
|
| 28 |
-
"model.
|
| 29 |
-
"model.
|
| 30 |
-
"model.
|
| 31 |
-
"model.transformer.blocks.0.
|
| 32 |
-
"model.transformer.blocks.0.
|
| 33 |
-
"model.transformer.blocks.0.
|
| 34 |
-
"model.transformer.blocks.0.
|
| 35 |
-
"model.transformer.blocks.0.self_attn.
|
| 36 |
-
"model.transformer.blocks.0.self_attn.
|
| 37 |
-
"model.transformer.blocks.
|
| 38 |
-
"model.transformer.blocks.
|
| 39 |
-
"model.transformer.blocks.1.
|
| 40 |
-
"model.transformer.blocks.1.
|
| 41 |
-
"model.transformer.blocks.1.
|
| 42 |
-
"model.transformer.blocks.1.
|
| 43 |
-
"model.transformer.blocks.1.self_attn.
|
| 44 |
-
"model.transformer.blocks.1.self_attn.
|
|
|
|
|
|
|
| 45 |
"model.transformer.blocks.10.attn_norm.weight": "model-00003-of-00008.safetensors",
|
| 46 |
"model.transformer.blocks.10.ff_norm.weight": "model-00003-of-00008.safetensors",
|
| 47 |
"model.transformer.blocks.10.mlp.ff_out.weight": "model-00003-of-00008.safetensors",
|
|
@@ -59,37 +61,37 @@
|
|
| 59 |
"model.transformer.blocks.11.self_attn.k_norm.weight": "model-00003-of-00008.safetensors",
|
| 60 |
"model.transformer.blocks.11.self_attn.q_norm.weight": "model-00003-of-00008.safetensors",
|
| 61 |
"model.transformer.blocks.12.attn_norm.weight": "model-00003-of-00008.safetensors",
|
| 62 |
-
"model.transformer.blocks.12.ff_norm.weight": "model-
|
| 63 |
-
"model.transformer.blocks.12.mlp.ff_out.weight": "model-
|
| 64 |
-
"model.transformer.blocks.12.mlp.ff_proj.weight": "model-
|
| 65 |
"model.transformer.blocks.12.self_attn.att_proj.weight": "model-00003-of-00008.safetensors",
|
| 66 |
"model.transformer.blocks.12.self_attn.attn_out.weight": "model-00003-of-00008.safetensors",
|
| 67 |
"model.transformer.blocks.12.self_attn.k_norm.weight": "model-00003-of-00008.safetensors",
|
| 68 |
"model.transformer.blocks.12.self_attn.q_norm.weight": "model-00003-of-00008.safetensors",
|
| 69 |
-
"model.transformer.blocks.13.attn_norm.weight": "model-
|
| 70 |
-
"model.transformer.blocks.13.ff_norm.weight": "model-
|
| 71 |
-
"model.transformer.blocks.13.mlp.ff_out.weight": "model-
|
| 72 |
-
"model.transformer.blocks.13.mlp.ff_proj.weight": "model-
|
| 73 |
-
"model.transformer.blocks.13.self_attn.att_proj.weight": "model-
|
| 74 |
-
"model.transformer.blocks.13.self_attn.attn_out.weight": "model-
|
| 75 |
-
"model.transformer.blocks.13.self_attn.k_norm.weight": "model-
|
| 76 |
-
"model.transformer.blocks.13.self_attn.q_norm.weight": "model-
|
| 77 |
-
"model.transformer.blocks.14.attn_norm.weight": "model-
|
| 78 |
-
"model.transformer.blocks.14.ff_norm.weight": "model-
|
| 79 |
-
"model.transformer.blocks.14.mlp.ff_out.weight": "model-
|
| 80 |
-
"model.transformer.blocks.14.mlp.ff_proj.weight": "model-
|
| 81 |
-
"model.transformer.blocks.14.self_attn.att_proj.weight": "model-
|
| 82 |
-
"model.transformer.blocks.14.self_attn.attn_out.weight": "model-
|
| 83 |
-
"model.transformer.blocks.14.self_attn.k_norm.weight": "model-
|
| 84 |
-
"model.transformer.blocks.14.self_attn.q_norm.weight": "model-
|
| 85 |
-
"model.transformer.blocks.15.attn_norm.weight": "model-
|
| 86 |
"model.transformer.blocks.15.ff_norm.weight": "model-00004-of-00008.safetensors",
|
| 87 |
"model.transformer.blocks.15.mlp.ff_out.weight": "model-00004-of-00008.safetensors",
|
| 88 |
"model.transformer.blocks.15.mlp.ff_proj.weight": "model-00004-of-00008.safetensors",
|
| 89 |
-
"model.transformer.blocks.15.self_attn.att_proj.weight": "model-
|
| 90 |
-
"model.transformer.blocks.15.self_attn.attn_out.weight": "model-
|
| 91 |
-
"model.transformer.blocks.15.self_attn.k_norm.weight": "model-
|
| 92 |
-
"model.transformer.blocks.15.self_attn.q_norm.weight": "model-
|
| 93 |
"model.transformer.blocks.16.attn_norm.weight": "model-00004-of-00008.safetensors",
|
| 94 |
"model.transformer.blocks.16.ff_norm.weight": "model-00004-of-00008.safetensors",
|
| 95 |
"model.transformer.blocks.16.mlp.ff_out.weight": "model-00004-of-00008.safetensors",
|
|
@@ -107,45 +109,45 @@
|
|
| 107 |
"model.transformer.blocks.17.self_attn.k_norm.weight": "model-00004-of-00008.safetensors",
|
| 108 |
"model.transformer.blocks.17.self_attn.q_norm.weight": "model-00004-of-00008.safetensors",
|
| 109 |
"model.transformer.blocks.18.attn_norm.weight": "model-00004-of-00008.safetensors",
|
| 110 |
-
"model.transformer.blocks.18.ff_norm.weight": "model-
|
| 111 |
-
"model.transformer.blocks.18.mlp.ff_out.weight": "model-
|
| 112 |
-
"model.transformer.blocks.18.mlp.ff_proj.weight": "model-
|
| 113 |
"model.transformer.blocks.18.self_attn.att_proj.weight": "model-00004-of-00008.safetensors",
|
| 114 |
"model.transformer.blocks.18.self_attn.attn_out.weight": "model-00004-of-00008.safetensors",
|
| 115 |
"model.transformer.blocks.18.self_attn.k_norm.weight": "model-00004-of-00008.safetensors",
|
| 116 |
"model.transformer.blocks.18.self_attn.q_norm.weight": "model-00004-of-00008.safetensors",
|
| 117 |
-
"model.transformer.blocks.19.attn_norm.weight": "model-
|
| 118 |
-
"model.transformer.blocks.19.ff_norm.weight": "model-
|
| 119 |
-
"model.transformer.blocks.19.mlp.ff_out.weight": "model-
|
| 120 |
-
"model.transformer.blocks.19.mlp.ff_proj.weight": "model-
|
| 121 |
-
"model.transformer.blocks.19.self_attn.att_proj.weight": "model-
|
| 122 |
-
"model.transformer.blocks.19.self_attn.attn_out.weight": "model-
|
| 123 |
-
"model.transformer.blocks.19.self_attn.k_norm.weight": "model-
|
| 124 |
-
"model.transformer.blocks.19.self_attn.q_norm.weight": "model-
|
| 125 |
-
"model.transformer.blocks.2.attn_norm.weight": "model-
|
| 126 |
-
"model.transformer.blocks.2.ff_norm.weight": "model-
|
| 127 |
-
"model.transformer.blocks.2.mlp.ff_out.weight": "model-
|
| 128 |
-
"model.transformer.blocks.2.mlp.ff_proj.weight": "model-
|
| 129 |
-
"model.transformer.blocks.2.self_attn.att_proj.weight": "model-
|
| 130 |
-
"model.transformer.blocks.2.self_attn.attn_out.weight": "model-
|
| 131 |
-
"model.transformer.blocks.2.self_attn.k_norm.weight": "model-
|
| 132 |
-
"model.transformer.blocks.2.self_attn.q_norm.weight": "model-
|
| 133 |
-
"model.transformer.blocks.20.attn_norm.weight": "model-
|
| 134 |
-
"model.transformer.blocks.20.ff_norm.weight": "model-
|
| 135 |
-
"model.transformer.blocks.20.mlp.ff_out.weight": "model-
|
| 136 |
-
"model.transformer.blocks.20.mlp.ff_proj.weight": "model-
|
| 137 |
-
"model.transformer.blocks.20.self_attn.att_proj.weight": "model-
|
| 138 |
-
"model.transformer.blocks.20.self_attn.attn_out.weight": "model-
|
| 139 |
-
"model.transformer.blocks.20.self_attn.k_norm.weight": "model-
|
| 140 |
-
"model.transformer.blocks.20.self_attn.q_norm.weight": "model-
|
| 141 |
-
"model.transformer.blocks.21.attn_norm.weight": "model-
|
| 142 |
"model.transformer.blocks.21.ff_norm.weight": "model-00005-of-00008.safetensors",
|
| 143 |
"model.transformer.blocks.21.mlp.ff_out.weight": "model-00005-of-00008.safetensors",
|
| 144 |
"model.transformer.blocks.21.mlp.ff_proj.weight": "model-00005-of-00008.safetensors",
|
| 145 |
-
"model.transformer.blocks.21.self_attn.att_proj.weight": "model-
|
| 146 |
-
"model.transformer.blocks.21.self_attn.attn_out.weight": "model-
|
| 147 |
-
"model.transformer.blocks.21.self_attn.k_norm.weight": "model-
|
| 148 |
-
"model.transformer.blocks.21.self_attn.q_norm.weight": "model-
|
| 149 |
"model.transformer.blocks.22.attn_norm.weight": "model-00005-of-00008.safetensors",
|
| 150 |
"model.transformer.blocks.22.ff_norm.weight": "model-00005-of-00008.safetensors",
|
| 151 |
"model.transformer.blocks.22.mlp.ff_out.weight": "model-00005-of-00008.safetensors",
|
|
@@ -163,37 +165,37 @@
|
|
| 163 |
"model.transformer.blocks.23.self_attn.k_norm.weight": "model-00005-of-00008.safetensors",
|
| 164 |
"model.transformer.blocks.23.self_attn.q_norm.weight": "model-00005-of-00008.safetensors",
|
| 165 |
"model.transformer.blocks.24.attn_norm.weight": "model-00005-of-00008.safetensors",
|
| 166 |
-
"model.transformer.blocks.24.ff_norm.weight": "model-
|
| 167 |
-
"model.transformer.blocks.24.mlp.ff_out.weight": "model-
|
| 168 |
-
"model.transformer.blocks.24.mlp.ff_proj.weight": "model-
|
| 169 |
"model.transformer.blocks.24.self_attn.att_proj.weight": "model-00005-of-00008.safetensors",
|
| 170 |
"model.transformer.blocks.24.self_attn.attn_out.weight": "model-00005-of-00008.safetensors",
|
| 171 |
"model.transformer.blocks.24.self_attn.k_norm.weight": "model-00005-of-00008.safetensors",
|
| 172 |
"model.transformer.blocks.24.self_attn.q_norm.weight": "model-00005-of-00008.safetensors",
|
| 173 |
-
"model.transformer.blocks.25.attn_norm.weight": "model-
|
| 174 |
-
"model.transformer.blocks.25.ff_norm.weight": "model-
|
| 175 |
-
"model.transformer.blocks.25.mlp.ff_out.weight": "model-
|
| 176 |
-
"model.transformer.blocks.25.mlp.ff_proj.weight": "model-
|
| 177 |
-
"model.transformer.blocks.25.self_attn.att_proj.weight": "model-
|
| 178 |
-
"model.transformer.blocks.25.self_attn.attn_out.weight": "model-
|
| 179 |
-
"model.transformer.blocks.25.self_attn.k_norm.weight": "model-
|
| 180 |
-
"model.transformer.blocks.25.self_attn.q_norm.weight": "model-
|
| 181 |
-
"model.transformer.blocks.26.attn_norm.weight": "model-
|
| 182 |
-
"model.transformer.blocks.26.ff_norm.weight": "model-
|
| 183 |
-
"model.transformer.blocks.26.mlp.ff_out.weight": "model-
|
| 184 |
-
"model.transformer.blocks.26.mlp.ff_proj.weight": "model-
|
| 185 |
-
"model.transformer.blocks.26.self_attn.att_proj.weight": "model-
|
| 186 |
-
"model.transformer.blocks.26.self_attn.attn_out.weight": "model-
|
| 187 |
-
"model.transformer.blocks.26.self_attn.k_norm.weight": "model-
|
| 188 |
-
"model.transformer.blocks.26.self_attn.q_norm.weight": "model-
|
| 189 |
-
"model.transformer.blocks.27.attn_norm.weight": "model-
|
| 190 |
"model.transformer.blocks.27.ff_norm.weight": "model-00006-of-00008.safetensors",
|
| 191 |
"model.transformer.blocks.27.mlp.ff_out.weight": "model-00006-of-00008.safetensors",
|
| 192 |
"model.transformer.blocks.27.mlp.ff_proj.weight": "model-00006-of-00008.safetensors",
|
| 193 |
-
"model.transformer.blocks.27.self_attn.att_proj.weight": "model-
|
| 194 |
-
"model.transformer.blocks.27.self_attn.attn_out.weight": "model-
|
| 195 |
-
"model.transformer.blocks.27.self_attn.k_norm.weight": "model-
|
| 196 |
-
"model.transformer.blocks.27.self_attn.q_norm.weight": "model-
|
| 197 |
"model.transformer.blocks.28.attn_norm.weight": "model-00006-of-00008.safetensors",
|
| 198 |
"model.transformer.blocks.28.ff_norm.weight": "model-00006-of-00008.safetensors",
|
| 199 |
"model.transformer.blocks.28.mlp.ff_out.weight": "model-00006-of-00008.safetensors",
|
|
@@ -210,46 +212,46 @@
|
|
| 210 |
"model.transformer.blocks.29.self_attn.attn_out.weight": "model-00006-of-00008.safetensors",
|
| 211 |
"model.transformer.blocks.29.self_attn.k_norm.weight": "model-00006-of-00008.safetensors",
|
| 212 |
"model.transformer.blocks.29.self_attn.q_norm.weight": "model-00006-of-00008.safetensors",
|
| 213 |
-
"model.transformer.blocks.3.attn_norm.weight": "model-
|
| 214 |
"model.transformer.blocks.3.ff_norm.weight": "model-00002-of-00008.safetensors",
|
| 215 |
"model.transformer.blocks.3.mlp.ff_out.weight": "model-00002-of-00008.safetensors",
|
| 216 |
"model.transformer.blocks.3.mlp.ff_proj.weight": "model-00002-of-00008.safetensors",
|
| 217 |
-
"model.transformer.blocks.3.self_attn.att_proj.weight": "model-
|
| 218 |
-
"model.transformer.blocks.3.self_attn.attn_out.weight": "model-
|
| 219 |
-
"model.transformer.blocks.3.self_attn.k_norm.weight": "model-
|
| 220 |
-
"model.transformer.blocks.3.self_attn.q_norm.weight": "model-
|
| 221 |
"model.transformer.blocks.30.attn_norm.weight": "model-00006-of-00008.safetensors",
|
| 222 |
-
"model.transformer.blocks.30.ff_norm.weight": "model-
|
| 223 |
-
"model.transformer.blocks.30.mlp.ff_out.weight": "model-
|
| 224 |
-
"model.transformer.blocks.30.mlp.ff_proj.weight": "model-
|
| 225 |
"model.transformer.blocks.30.self_attn.att_proj.weight": "model-00006-of-00008.safetensors",
|
| 226 |
"model.transformer.blocks.30.self_attn.attn_out.weight": "model-00006-of-00008.safetensors",
|
| 227 |
"model.transformer.blocks.30.self_attn.k_norm.weight": "model-00006-of-00008.safetensors",
|
| 228 |
"model.transformer.blocks.30.self_attn.q_norm.weight": "model-00006-of-00008.safetensors",
|
| 229 |
-
"model.transformer.blocks.31.attn_norm.weight": "model-
|
| 230 |
-
"model.transformer.blocks.31.ff_norm.weight": "model-
|
| 231 |
-
"model.transformer.blocks.31.mlp.ff_out.weight": "model-
|
| 232 |
-
"model.transformer.blocks.31.mlp.ff_proj.weight": "model-
|
| 233 |
-
"model.transformer.blocks.31.self_attn.att_proj.weight": "model-
|
| 234 |
-
"model.transformer.blocks.31.self_attn.attn_out.weight": "model-
|
| 235 |
-
"model.transformer.blocks.31.self_attn.k_norm.weight": "model-
|
| 236 |
-
"model.transformer.blocks.31.self_attn.q_norm.weight": "model-
|
| 237 |
-
"model.transformer.blocks.32.attn_norm.weight": "model-
|
| 238 |
-
"model.transformer.blocks.32.ff_norm.weight": "model-
|
| 239 |
-
"model.transformer.blocks.32.mlp.ff_out.weight": "model-
|
| 240 |
-
"model.transformer.blocks.32.mlp.ff_proj.weight": "model-
|
| 241 |
-
"model.transformer.blocks.32.self_attn.att_proj.weight": "model-
|
| 242 |
-
"model.transformer.blocks.32.self_attn.attn_out.weight": "model-
|
| 243 |
-
"model.transformer.blocks.32.self_attn.k_norm.weight": "model-
|
| 244 |
-
"model.transformer.blocks.32.self_attn.q_norm.weight": "model-
|
| 245 |
-
"model.transformer.blocks.33.attn_norm.weight": "model-
|
| 246 |
"model.transformer.blocks.33.ff_norm.weight": "model-00007-of-00008.safetensors",
|
| 247 |
"model.transformer.blocks.33.mlp.ff_out.weight": "model-00007-of-00008.safetensors",
|
| 248 |
"model.transformer.blocks.33.mlp.ff_proj.weight": "model-00007-of-00008.safetensors",
|
| 249 |
-
"model.transformer.blocks.33.self_attn.att_proj.weight": "model-
|
| 250 |
-
"model.transformer.blocks.33.self_attn.attn_out.weight": "model-
|
| 251 |
-
"model.transformer.blocks.33.self_attn.k_norm.weight": "model-
|
| 252 |
-
"model.transformer.blocks.33.self_attn.q_norm.weight": "model-
|
| 253 |
"model.transformer.blocks.34.attn_norm.weight": "model-00007-of-00008.safetensors",
|
| 254 |
"model.transformer.blocks.34.ff_norm.weight": "model-00007-of-00008.safetensors",
|
| 255 |
"model.transformer.blocks.34.mlp.ff_out.weight": "model-00007-of-00008.safetensors",
|
|
@@ -283,37 +285,37 @@
|
|
| 283 |
"model.transformer.blocks.5.self_attn.k_norm.weight": "model-00002-of-00008.safetensors",
|
| 284 |
"model.transformer.blocks.5.self_attn.q_norm.weight": "model-00002-of-00008.safetensors",
|
| 285 |
"model.transformer.blocks.6.attn_norm.weight": "model-00002-of-00008.safetensors",
|
| 286 |
-
"model.transformer.blocks.6.ff_norm.weight": "model-
|
| 287 |
-
"model.transformer.blocks.6.mlp.ff_out.weight": "model-
|
| 288 |
-
"model.transformer.blocks.6.mlp.ff_proj.weight": "model-
|
| 289 |
"model.transformer.blocks.6.self_attn.att_proj.weight": "model-00002-of-00008.safetensors",
|
| 290 |
"model.transformer.blocks.6.self_attn.attn_out.weight": "model-00002-of-00008.safetensors",
|
| 291 |
"model.transformer.blocks.6.self_attn.k_norm.weight": "model-00002-of-00008.safetensors",
|
| 292 |
"model.transformer.blocks.6.self_attn.q_norm.weight": "model-00002-of-00008.safetensors",
|
| 293 |
-
"model.transformer.blocks.7.attn_norm.weight": "model-
|
| 294 |
-
"model.transformer.blocks.7.ff_norm.weight": "model-
|
| 295 |
-
"model.transformer.blocks.7.mlp.ff_out.weight": "model-
|
| 296 |
-
"model.transformer.blocks.7.mlp.ff_proj.weight": "model-
|
| 297 |
-
"model.transformer.blocks.7.self_attn.att_proj.weight": "model-
|
| 298 |
-
"model.transformer.blocks.7.self_attn.attn_out.weight": "model-
|
| 299 |
-
"model.transformer.blocks.7.self_attn.k_norm.weight": "model-
|
| 300 |
-
"model.transformer.blocks.7.self_attn.q_norm.weight": "model-
|
| 301 |
-
"model.transformer.blocks.8.attn_norm.weight": "model-
|
| 302 |
-
"model.transformer.blocks.8.ff_norm.weight": "model-
|
| 303 |
-
"model.transformer.blocks.8.mlp.ff_out.weight": "model-
|
| 304 |
-
"model.transformer.blocks.8.mlp.ff_proj.weight": "model-
|
| 305 |
-
"model.transformer.blocks.8.self_attn.att_proj.weight": "model-
|
| 306 |
-
"model.transformer.blocks.8.self_attn.attn_out.weight": "model-
|
| 307 |
-
"model.transformer.blocks.8.self_attn.k_norm.weight": "model-
|
| 308 |
-
"model.transformer.blocks.8.self_attn.q_norm.weight": "model-
|
| 309 |
-
"model.transformer.blocks.9.attn_norm.weight": "model-
|
| 310 |
"model.transformer.blocks.9.ff_norm.weight": "model-00003-of-00008.safetensors",
|
| 311 |
"model.transformer.blocks.9.mlp.ff_out.weight": "model-00003-of-00008.safetensors",
|
| 312 |
"model.transformer.blocks.9.mlp.ff_proj.weight": "model-00003-of-00008.safetensors",
|
| 313 |
-
"model.transformer.blocks.9.self_attn.att_proj.weight": "model-
|
| 314 |
-
"model.transformer.blocks.9.self_attn.attn_out.weight": "model-
|
| 315 |
-
"model.transformer.blocks.9.self_attn.k_norm.weight": "model-
|
| 316 |
-
"model.transformer.blocks.9.self_attn.q_norm.weight": "model-
|
| 317 |
"model.transformer.ln_f.weight": "model-00007-of-00008.safetensors",
|
| 318 |
"model.transformer.wte.embedding": "model-00001-of-00008.safetensors",
|
| 319 |
"model.transformer.wte.new_embedding": "model-00001-of-00008.safetensors",
|
|
@@ -352,166 +354,166 @@
|
|
| 352 |
"model.vit.transformer.resblocks.1.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 353 |
"model.vit.transformer.resblocks.1.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 354 |
"model.vit.transformer.resblocks.1.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 355 |
-
"model.vit.transformer.resblocks.10.attention.wk.bias": "model-
|
| 356 |
-
"model.vit.transformer.resblocks.10.attention.wk.weight": "model-
|
| 357 |
-
"model.vit.transformer.resblocks.10.attention.wo.bias": "model-
|
| 358 |
-
"model.vit.transformer.resblocks.10.attention.wo.weight": "model-
|
| 359 |
-
"model.vit.transformer.resblocks.10.attention.wq.bias": "model-
|
| 360 |
-
"model.vit.transformer.resblocks.10.attention.wq.weight": "model-
|
| 361 |
-
"model.vit.transformer.resblocks.10.attention.wv.bias": "model-
|
| 362 |
-
"model.vit.transformer.resblocks.10.attention.wv.weight": "model-
|
| 363 |
-
"model.vit.transformer.resblocks.10.attention_norm.bias": "model-
|
| 364 |
-
"model.vit.transformer.resblocks.10.attention_norm.weight": "model-
|
| 365 |
-
"model.vit.transformer.resblocks.10.feed_forward.w1.bias": "model-
|
| 366 |
-
"model.vit.transformer.resblocks.10.feed_forward.w1.weight": "model-
|
| 367 |
-
"model.vit.transformer.resblocks.10.feed_forward.w2.bias": "model-
|
| 368 |
-
"model.vit.transformer.resblocks.10.feed_forward.w2.weight": "model-
|
| 369 |
-
"model.vit.transformer.resblocks.10.ffn_norm.bias": "model-
|
| 370 |
-
"model.vit.transformer.resblocks.10.ffn_norm.weight": "model-
|
| 371 |
-
"model.vit.transformer.resblocks.11.attention.wk.bias": "model-
|
| 372 |
-
"model.vit.transformer.resblocks.11.attention.wk.weight": "model-
|
| 373 |
-
"model.vit.transformer.resblocks.11.attention.wo.bias": "model-
|
| 374 |
-
"model.vit.transformer.resblocks.11.attention.wo.weight": "model-
|
| 375 |
-
"model.vit.transformer.resblocks.11.attention.wq.bias": "model-
|
| 376 |
-
"model.vit.transformer.resblocks.11.attention.wq.weight": "model-
|
| 377 |
-
"model.vit.transformer.resblocks.11.attention.wv.bias": "model-
|
| 378 |
-
"model.vit.transformer.resblocks.11.attention.wv.weight": "model-
|
| 379 |
-
"model.vit.transformer.resblocks.11.attention_norm.bias": "model-
|
| 380 |
-
"model.vit.transformer.resblocks.11.attention_norm.weight": "model-
|
| 381 |
-
"model.vit.transformer.resblocks.11.feed_forward.w1.bias": "model-
|
| 382 |
-
"model.vit.transformer.resblocks.11.feed_forward.w1.weight": "model-
|
| 383 |
-
"model.vit.transformer.resblocks.11.feed_forward.w2.bias": "model-
|
| 384 |
-
"model.vit.transformer.resblocks.11.feed_forward.w2.weight": "model-
|
| 385 |
-
"model.vit.transformer.resblocks.11.ffn_norm.bias": "model-
|
| 386 |
-
"model.vit.transformer.resblocks.11.ffn_norm.weight": "model-
|
| 387 |
-
"model.vit.transformer.resblocks.12.attention.wk.bias": "model-
|
| 388 |
-
"model.vit.transformer.resblocks.12.attention.wk.weight": "model-
|
| 389 |
-
"model.vit.transformer.resblocks.12.attention.wo.bias": "model-
|
| 390 |
-
"model.vit.transformer.resblocks.12.attention.wo.weight": "model-
|
| 391 |
-
"model.vit.transformer.resblocks.12.attention.wq.bias": "model-
|
| 392 |
-
"model.vit.transformer.resblocks.12.attention.wq.weight": "model-
|
| 393 |
-
"model.vit.transformer.resblocks.12.attention.wv.bias": "model-
|
| 394 |
-
"model.vit.transformer.resblocks.12.attention.wv.weight": "model-
|
| 395 |
-
"model.vit.transformer.resblocks.12.attention_norm.bias": "model-
|
| 396 |
-
"model.vit.transformer.resblocks.12.attention_norm.weight": "model-
|
| 397 |
-
"model.vit.transformer.resblocks.12.feed_forward.w1.bias": "model-
|
| 398 |
-
"model.vit.transformer.resblocks.12.feed_forward.w1.weight": "model-
|
| 399 |
-
"model.vit.transformer.resblocks.12.feed_forward.w2.bias": "model-
|
| 400 |
-
"model.vit.transformer.resblocks.12.feed_forward.w2.weight": "model-
|
| 401 |
-
"model.vit.transformer.resblocks.12.ffn_norm.bias": "model-
|
| 402 |
-
"model.vit.transformer.resblocks.12.ffn_norm.weight": "model-
|
| 403 |
-
"model.vit.transformer.resblocks.13.attention.wk.bias": "model-
|
| 404 |
-
"model.vit.transformer.resblocks.13.attention.wk.weight": "model-
|
| 405 |
-
"model.vit.transformer.resblocks.13.attention.wo.bias": "model-
|
| 406 |
-
"model.vit.transformer.resblocks.13.attention.wo.weight": "model-
|
| 407 |
-
"model.vit.transformer.resblocks.13.attention.wq.bias": "model-
|
| 408 |
-
"model.vit.transformer.resblocks.13.attention.wq.weight": "model-
|
| 409 |
-
"model.vit.transformer.resblocks.13.attention.wv.bias": "model-
|
| 410 |
-
"model.vit.transformer.resblocks.13.attention.wv.weight": "model-
|
| 411 |
-
"model.vit.transformer.resblocks.13.attention_norm.bias": "model-
|
| 412 |
-
"model.vit.transformer.resblocks.13.attention_norm.weight": "model-
|
| 413 |
-
"model.vit.transformer.resblocks.13.feed_forward.w1.bias": "model-
|
| 414 |
-
"model.vit.transformer.resblocks.13.feed_forward.w1.weight": "model-
|
| 415 |
-
"model.vit.transformer.resblocks.13.feed_forward.w2.bias": "model-
|
| 416 |
-
"model.vit.transformer.resblocks.13.feed_forward.w2.weight": "model-
|
| 417 |
-
"model.vit.transformer.resblocks.13.ffn_norm.bias": "model-
|
| 418 |
-
"model.vit.transformer.resblocks.13.ffn_norm.weight": "model-
|
| 419 |
-
"model.vit.transformer.resblocks.14.attention.wk.bias": "model-
|
| 420 |
-
"model.vit.transformer.resblocks.14.attention.wk.weight": "model-
|
| 421 |
-
"model.vit.transformer.resblocks.14.attention.wo.bias": "model-
|
| 422 |
-
"model.vit.transformer.resblocks.14.attention.wo.weight": "model-
|
| 423 |
-
"model.vit.transformer.resblocks.14.attention.wq.bias": "model-
|
| 424 |
-
"model.vit.transformer.resblocks.14.attention.wq.weight": "model-
|
| 425 |
-
"model.vit.transformer.resblocks.14.attention.wv.bias": "model-
|
| 426 |
-
"model.vit.transformer.resblocks.14.attention.wv.weight": "model-
|
| 427 |
-
"model.vit.transformer.resblocks.14.attention_norm.bias": "model-
|
| 428 |
-
"model.vit.transformer.resblocks.14.attention_norm.weight": "model-
|
| 429 |
-
"model.vit.transformer.resblocks.14.feed_forward.w1.bias": "model-
|
| 430 |
-
"model.vit.transformer.resblocks.14.feed_forward.w1.weight": "model-
|
| 431 |
-
"model.vit.transformer.resblocks.14.feed_forward.w2.bias": "model-
|
| 432 |
-
"model.vit.transformer.resblocks.14.feed_forward.w2.weight": "model-
|
| 433 |
-
"model.vit.transformer.resblocks.14.ffn_norm.bias": "model-
|
| 434 |
-
"model.vit.transformer.resblocks.14.ffn_norm.weight": "model-
|
| 435 |
-
"model.vit.transformer.resblocks.15.attention.wk.bias": "model-
|
| 436 |
-
"model.vit.transformer.resblocks.15.attention.wk.weight": "model-
|
| 437 |
-
"model.vit.transformer.resblocks.15.attention.wo.bias": "model-
|
| 438 |
-
"model.vit.transformer.resblocks.15.attention.wo.weight": "model-
|
| 439 |
-
"model.vit.transformer.resblocks.15.attention.wq.bias": "model-
|
| 440 |
-
"model.vit.transformer.resblocks.15.attention.wq.weight": "model-
|
| 441 |
-
"model.vit.transformer.resblocks.15.attention.wv.bias": "model-
|
| 442 |
-
"model.vit.transformer.resblocks.15.attention.wv.weight": "model-
|
| 443 |
-
"model.vit.transformer.resblocks.15.attention_norm.bias": "model-
|
| 444 |
-
"model.vit.transformer.resblocks.15.attention_norm.weight": "model-
|
| 445 |
-
"model.vit.transformer.resblocks.15.feed_forward.w1.bias": "model-
|
| 446 |
-
"model.vit.transformer.resblocks.15.feed_forward.w1.weight": "model-
|
| 447 |
-
"model.vit.transformer.resblocks.15.feed_forward.w2.bias": "model-
|
| 448 |
-
"model.vit.transformer.resblocks.15.feed_forward.w2.weight": "model-
|
| 449 |
-
"model.vit.transformer.resblocks.15.ffn_norm.bias": "model-
|
| 450 |
-
"model.vit.transformer.resblocks.15.ffn_norm.weight": "model-
|
| 451 |
-
"model.vit.transformer.resblocks.16.attention.wk.bias": "model-
|
| 452 |
-
"model.vit.transformer.resblocks.16.attention.wk.weight": "model-
|
| 453 |
-
"model.vit.transformer.resblocks.16.attention.wo.bias": "model-
|
| 454 |
-
"model.vit.transformer.resblocks.16.attention.wo.weight": "model-
|
| 455 |
-
"model.vit.transformer.resblocks.16.attention.wq.bias": "model-
|
| 456 |
-
"model.vit.transformer.resblocks.16.attention.wq.weight": "model-
|
| 457 |
-
"model.vit.transformer.resblocks.16.attention.wv.bias": "model-
|
| 458 |
-
"model.vit.transformer.resblocks.16.attention.wv.weight": "model-
|
| 459 |
-
"model.vit.transformer.resblocks.16.attention_norm.bias": "model-
|
| 460 |
-
"model.vit.transformer.resblocks.16.attention_norm.weight": "model-
|
| 461 |
-
"model.vit.transformer.resblocks.16.feed_forward.w1.bias": "model-
|
| 462 |
-
"model.vit.transformer.resblocks.16.feed_forward.w1.weight": "model-
|
| 463 |
-
"model.vit.transformer.resblocks.16.feed_forward.w2.bias": "model-
|
| 464 |
-
"model.vit.transformer.resblocks.16.feed_forward.w2.weight": "model-
|
| 465 |
-
"model.vit.transformer.resblocks.16.ffn_norm.bias": "model-
|
| 466 |
-
"model.vit.transformer.resblocks.16.ffn_norm.weight": "model-
|
| 467 |
-
"model.vit.transformer.resblocks.17.attention.wk.bias": "model-
|
| 468 |
-
"model.vit.transformer.resblocks.17.attention.wk.weight": "model-
|
| 469 |
-
"model.vit.transformer.resblocks.17.attention.wo.bias": "model-
|
| 470 |
-
"model.vit.transformer.resblocks.17.attention.wo.weight": "model-
|
| 471 |
-
"model.vit.transformer.resblocks.17.attention.wq.bias": "model-
|
| 472 |
-
"model.vit.transformer.resblocks.17.attention.wq.weight": "model-
|
| 473 |
-
"model.vit.transformer.resblocks.17.attention.wv.bias": "model-
|
| 474 |
-
"model.vit.transformer.resblocks.17.attention.wv.weight": "model-
|
| 475 |
-
"model.vit.transformer.resblocks.17.attention_norm.bias": "model-
|
| 476 |
-
"model.vit.transformer.resblocks.17.attention_norm.weight": "model-
|
| 477 |
-
"model.vit.transformer.resblocks.17.feed_forward.w1.bias": "model-
|
| 478 |
-
"model.vit.transformer.resblocks.17.feed_forward.w1.weight": "model-
|
| 479 |
-
"model.vit.transformer.resblocks.17.feed_forward.w2.bias": "model-
|
| 480 |
-
"model.vit.transformer.resblocks.17.feed_forward.w2.weight": "model-
|
| 481 |
-
"model.vit.transformer.resblocks.17.ffn_norm.bias": "model-
|
| 482 |
-
"model.vit.transformer.resblocks.17.ffn_norm.weight": "model-
|
| 483 |
-
"model.vit.transformer.resblocks.18.attention.wk.bias": "model-
|
| 484 |
-
"model.vit.transformer.resblocks.18.attention.wk.weight": "model-
|
| 485 |
-
"model.vit.transformer.resblocks.18.attention.wo.bias": "model-
|
| 486 |
-
"model.vit.transformer.resblocks.18.attention.wo.weight": "model-
|
| 487 |
-
"model.vit.transformer.resblocks.18.attention.wq.bias": "model-
|
| 488 |
-
"model.vit.transformer.resblocks.18.attention.wq.weight": "model-
|
| 489 |
-
"model.vit.transformer.resblocks.18.attention.wv.bias": "model-
|
| 490 |
-
"model.vit.transformer.resblocks.18.attention.wv.weight": "model-
|
| 491 |
-
"model.vit.transformer.resblocks.18.attention_norm.bias": "model-
|
| 492 |
-
"model.vit.transformer.resblocks.18.attention_norm.weight": "model-
|
| 493 |
-
"model.vit.transformer.resblocks.18.feed_forward.w1.bias": "model-
|
| 494 |
-
"model.vit.transformer.resblocks.18.feed_forward.w1.weight": "model-
|
| 495 |
-
"model.vit.transformer.resblocks.18.feed_forward.w2.bias": "model-
|
| 496 |
-
"model.vit.transformer.resblocks.18.feed_forward.w2.weight": "model-
|
| 497 |
-
"model.vit.transformer.resblocks.18.ffn_norm.bias": "model-
|
| 498 |
-
"model.vit.transformer.resblocks.18.ffn_norm.weight": "model-
|
| 499 |
-
"model.vit.transformer.resblocks.19.attention.wk.bias": "model-
|
| 500 |
-
"model.vit.transformer.resblocks.19.attention.wk.weight": "model-
|
| 501 |
-
"model.vit.transformer.resblocks.19.attention.wo.bias": "model-
|
| 502 |
-
"model.vit.transformer.resblocks.19.attention.wo.weight": "model-
|
| 503 |
-
"model.vit.transformer.resblocks.19.attention.wq.bias": "model-
|
| 504 |
-
"model.vit.transformer.resblocks.19.attention.wq.weight": "model-
|
| 505 |
-
"model.vit.transformer.resblocks.19.attention.wv.bias": "model-
|
| 506 |
-
"model.vit.transformer.resblocks.19.attention.wv.weight": "model-
|
| 507 |
-
"model.vit.transformer.resblocks.19.attention_norm.bias": "model-
|
| 508 |
-
"model.vit.transformer.resblocks.19.attention_norm.weight": "model-
|
| 509 |
-
"model.vit.transformer.resblocks.19.feed_forward.w1.bias": "model-
|
| 510 |
-
"model.vit.transformer.resblocks.19.feed_forward.w1.weight": "model-
|
| 511 |
-
"model.vit.transformer.resblocks.19.feed_forward.w2.bias": "model-
|
| 512 |
-
"model.vit.transformer.resblocks.19.feed_forward.w2.weight": "model-
|
| 513 |
-
"model.vit.transformer.resblocks.19.ffn_norm.bias": "model-
|
| 514 |
-
"model.vit.transformer.resblocks.19.ffn_norm.weight": "model-
|
| 515 |
"model.vit.transformer.resblocks.2.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 516 |
"model.vit.transformer.resblocks.2.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 517 |
"model.vit.transformer.resblocks.2.attention.wo.bias": "model-00007-of-00008.safetensors",
|
|
@@ -528,86 +530,86 @@
|
|
| 528 |
"model.vit.transformer.resblocks.2.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 529 |
"model.vit.transformer.resblocks.2.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 530 |
"model.vit.transformer.resblocks.2.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 531 |
-
"model.vit.transformer.resblocks.20.attention.wk.bias": "model-
|
| 532 |
-
"model.vit.transformer.resblocks.20.attention.wk.weight": "model-
|
| 533 |
-
"model.vit.transformer.resblocks.20.attention.wo.bias": "model-
|
| 534 |
-
"model.vit.transformer.resblocks.20.attention.wo.weight": "model-
|
| 535 |
-
"model.vit.transformer.resblocks.20.attention.wq.bias": "model-
|
| 536 |
-
"model.vit.transformer.resblocks.20.attention.wq.weight": "model-
|
| 537 |
-
"model.vit.transformer.resblocks.20.attention.wv.bias": "model-
|
| 538 |
-
"model.vit.transformer.resblocks.20.attention.wv.weight": "model-
|
| 539 |
-
"model.vit.transformer.resblocks.20.attention_norm.bias": "model-
|
| 540 |
-
"model.vit.transformer.resblocks.20.attention_norm.weight": "model-
|
| 541 |
-
"model.vit.transformer.resblocks.20.feed_forward.w1.bias": "model-
|
| 542 |
-
"model.vit.transformer.resblocks.20.feed_forward.w1.weight": "model-
|
| 543 |
-
"model.vit.transformer.resblocks.20.feed_forward.w2.bias": "model-
|
| 544 |
-
"model.vit.transformer.resblocks.20.feed_forward.w2.weight": "model-
|
| 545 |
-
"model.vit.transformer.resblocks.20.ffn_norm.bias": "model-
|
| 546 |
-
"model.vit.transformer.resblocks.20.ffn_norm.weight": "model-
|
| 547 |
-
"model.vit.transformer.resblocks.21.attention.wk.bias": "model-
|
| 548 |
-
"model.vit.transformer.resblocks.21.attention.wk.weight": "model-
|
| 549 |
-
"model.vit.transformer.resblocks.21.attention.wo.bias": "model-
|
| 550 |
-
"model.vit.transformer.resblocks.21.attention.wo.weight": "model-
|
| 551 |
-
"model.vit.transformer.resblocks.21.attention.wq.bias": "model-
|
| 552 |
-
"model.vit.transformer.resblocks.21.attention.wq.weight": "model-
|
| 553 |
-
"model.vit.transformer.resblocks.21.attention.wv.bias": "model-
|
| 554 |
-
"model.vit.transformer.resblocks.21.attention.wv.weight": "model-
|
| 555 |
-
"model.vit.transformer.resblocks.21.attention_norm.bias": "model-
|
| 556 |
-
"model.vit.transformer.resblocks.21.attention_norm.weight": "model-
|
| 557 |
-
"model.vit.transformer.resblocks.21.feed_forward.w1.bias": "model-
|
| 558 |
-
"model.vit.transformer.resblocks.21.feed_forward.w1.weight": "model-
|
| 559 |
-
"model.vit.transformer.resblocks.21.feed_forward.w2.bias": "model-
|
| 560 |
-
"model.vit.transformer.resblocks.21.feed_forward.w2.weight": "model-
|
| 561 |
-
"model.vit.transformer.resblocks.21.ffn_norm.bias": "model-
|
| 562 |
-
"model.vit.transformer.resblocks.21.ffn_norm.weight": "model-
|
| 563 |
-
"model.vit.transformer.resblocks.22.attention.wk.bias": "model-
|
| 564 |
-
"model.vit.transformer.resblocks.22.attention.wk.weight": "model-
|
| 565 |
-
"model.vit.transformer.resblocks.22.attention.wo.bias": "model-
|
| 566 |
-
"model.vit.transformer.resblocks.22.attention.wo.weight": "model-
|
| 567 |
-
"model.vit.transformer.resblocks.22.attention.wq.bias": "model-
|
| 568 |
-
"model.vit.transformer.resblocks.22.attention.wq.weight": "model-
|
| 569 |
-
"model.vit.transformer.resblocks.22.attention.wv.bias": "model-
|
| 570 |
-
"model.vit.transformer.resblocks.22.attention.wv.weight": "model-
|
| 571 |
-
"model.vit.transformer.resblocks.22.attention_norm.bias": "model-
|
| 572 |
-
"model.vit.transformer.resblocks.22.attention_norm.weight": "model-
|
| 573 |
-
"model.vit.transformer.resblocks.22.feed_forward.w1.bias": "model-
|
| 574 |
-
"model.vit.transformer.resblocks.22.feed_forward.w1.weight": "model-
|
| 575 |
-
"model.vit.transformer.resblocks.22.feed_forward.w2.bias": "model-
|
| 576 |
-
"model.vit.transformer.resblocks.22.feed_forward.w2.weight": "model-
|
| 577 |
-
"model.vit.transformer.resblocks.22.ffn_norm.bias": "model-
|
| 578 |
-
"model.vit.transformer.resblocks.22.ffn_norm.weight": "model-
|
| 579 |
-
"model.vit.transformer.resblocks.23.attention.wk.bias": "model-
|
| 580 |
-
"model.vit.transformer.resblocks.23.attention.wk.weight": "model-
|
| 581 |
-
"model.vit.transformer.resblocks.23.attention.wo.bias": "model-
|
| 582 |
-
"model.vit.transformer.resblocks.23.attention.wo.weight": "model-
|
| 583 |
-
"model.vit.transformer.resblocks.23.attention.wq.bias": "model-
|
| 584 |
-
"model.vit.transformer.resblocks.23.attention.wq.weight": "model-
|
| 585 |
-
"model.vit.transformer.resblocks.23.attention.wv.bias": "model-
|
| 586 |
-
"model.vit.transformer.resblocks.23.attention.wv.weight": "model-
|
| 587 |
-
"model.vit.transformer.resblocks.23.attention_norm.bias": "model-
|
| 588 |
-
"model.vit.transformer.resblocks.23.attention_norm.weight": "model-
|
| 589 |
-
"model.vit.transformer.resblocks.23.feed_forward.w1.bias": "model-
|
| 590 |
-
"model.vit.transformer.resblocks.23.feed_forward.w1.weight": "model-
|
| 591 |
-
"model.vit.transformer.resblocks.23.feed_forward.w2.bias": "model-
|
| 592 |
-
"model.vit.transformer.resblocks.23.feed_forward.w2.weight": "model-
|
| 593 |
-
"model.vit.transformer.resblocks.23.ffn_norm.bias": "model-
|
| 594 |
-
"model.vit.transformer.resblocks.23.ffn_norm.weight": "model-
|
| 595 |
-
"model.vit.transformer.resblocks.24.attention.wk.bias": "model-
|
| 596 |
-
"model.vit.transformer.resblocks.24.attention.wk.weight": "model-
|
| 597 |
-
"model.vit.transformer.resblocks.24.attention.wo.bias": "model-
|
| 598 |
-
"model.vit.transformer.resblocks.24.attention.wo.weight": "model-
|
| 599 |
-
"model.vit.transformer.resblocks.24.attention.wq.bias": "model-
|
| 600 |
-
"model.vit.transformer.resblocks.24.attention.wq.weight": "model-
|
| 601 |
-
"model.vit.transformer.resblocks.24.attention.wv.bias": "model-
|
| 602 |
-
"model.vit.transformer.resblocks.24.attention.wv.weight": "model-
|
| 603 |
-
"model.vit.transformer.resblocks.24.attention_norm.bias": "model-
|
| 604 |
-
"model.vit.transformer.resblocks.24.attention_norm.weight": "model-
|
| 605 |
-
"model.vit.transformer.resblocks.24.feed_forward.w1.bias": "model-
|
| 606 |
-
"model.vit.transformer.resblocks.24.feed_forward.w1.weight": "model-
|
| 607 |
-
"model.vit.transformer.resblocks.24.feed_forward.w2.bias": "model-
|
| 608 |
-
"model.vit.transformer.resblocks.24.feed_forward.w2.weight": "model-
|
| 609 |
-
"model.vit.transformer.resblocks.24.ffn_norm.bias": "model-
|
| 610 |
-
"model.vit.transformer.resblocks.24.ffn_norm.weight": "model-
|
| 611 |
"model.vit.transformer.resblocks.3.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 612 |
"model.vit.transformer.resblocks.3.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 613 |
"model.vit.transformer.resblocks.3.attention.wo.bias": "model-00007-of-00008.safetensors",
|
|
@@ -696,32 +698,30 @@
|
|
| 696 |
"model.vit.transformer.resblocks.8.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 697 |
"model.vit.transformer.resblocks.8.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 698 |
"model.vit.transformer.resblocks.8.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 699 |
-
"model.vit.transformer.resblocks.8.attention_norm.bias": "model-
|
| 700 |
-
"model.vit.transformer.resblocks.8.attention_norm.weight": "model-
|
| 701 |
"model.vit.transformer.resblocks.8.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 702 |
"model.vit.transformer.resblocks.8.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 703 |
-
"model.vit.transformer.resblocks.8.feed_forward.w2.bias": "model-
|
| 704 |
-
"model.vit.transformer.resblocks.8.feed_forward.w2.weight": "model-
|
| 705 |
-
"model.vit.transformer.resblocks.8.ffn_norm.bias": "model-
|
| 706 |
-
"model.vit.transformer.resblocks.8.ffn_norm.weight": "model-
|
| 707 |
-
"model.vit.transformer.resblocks.9.attention.wk.bias": "model-
|
| 708 |
-
"model.vit.transformer.resblocks.9.attention.wk.weight": "model-
|
| 709 |
-
"model.vit.transformer.resblocks.9.attention.wo.bias": "model-
|
| 710 |
-
"model.vit.transformer.resblocks.9.attention.wo.weight": "model-
|
| 711 |
-
"model.vit.transformer.resblocks.9.attention.wq.bias": "model-
|
| 712 |
-
"model.vit.transformer.resblocks.9.attention.wq.weight": "model-
|
| 713 |
-
"model.vit.transformer.resblocks.9.attention.wv.bias": "model-
|
| 714 |
-
"model.vit.transformer.resblocks.9.attention.wv.weight": "model-
|
| 715 |
-
"model.vit.transformer.resblocks.9.attention_norm.bias": "model-
|
| 716 |
-
"model.vit.transformer.resblocks.9.attention_norm.weight": "model-
|
| 717 |
-
"model.vit.transformer.resblocks.9.feed_forward.w1.bias": "model-
|
| 718 |
-
"model.vit.transformer.resblocks.9.feed_forward.w1.weight": "model-
|
| 719 |
-
"model.vit.transformer.resblocks.9.feed_forward.w2.bias": "model-
|
| 720 |
-
"model.vit.transformer.resblocks.9.feed_forward.w2.weight": "model-
|
| 721 |
-
"model.vit.transformer.resblocks.9.ffn_norm.bias": "model-
|
| 722 |
-
"model.vit.transformer.resblocks.9.ffn_norm.weight": "model-
|
| 723 |
-
"model.x_norm.weight": "model-
|
| 724 |
-
"new_output_embeddings": "model-00001-of-00008.safetensors",
|
| 725 |
-
"output_embeddings": "model-00001-of-00008.safetensors"
|
| 726 |
}
|
| 727 |
}
|
|
|
|
| 4 |
"total_size": 34711420260
|
| 5 |
},
|
| 6 |
"weight_map": {
|
| 7 |
+
"lm_head.new_output_embeddings": "model-00008-of-00008.safetensors",
|
| 8 |
+
"lm_head.output_embeddings": "model-00008-of-00008.safetensors",
|
| 9 |
+
"model.add_no_point_class_embed.vector": "model-00007-of-00008.safetensors",
|
| 10 |
+
"model.build_vit_embedding.bias": "model-00007-of-00008.safetensors",
|
| 11 |
+
"model.build_vit_embedding.weight": "model-00007-of-00008.safetensors",
|
| 12 |
+
"model.connector.image_pooling_2d.wk.bias": "model-00007-of-00008.safetensors",
|
| 13 |
+
"model.connector.image_pooling_2d.wk.weight": "model-00007-of-00008.safetensors",
|
| 14 |
+
"model.connector.image_pooling_2d.wq.bias": "model-00007-of-00008.safetensors",
|
| 15 |
+
"model.connector.image_pooling_2d.wq.weight": "model-00007-of-00008.safetensors",
|
| 16 |
+
"model.connector.image_pooling_2d.wv.bias": "model-00007-of-00008.safetensors",
|
| 17 |
+
"model.connector.image_pooling_2d.wv.weight": "model-00007-of-00008.safetensors",
|
| 18 |
+
"model.connector.image_projector.w1.weight": "model-00007-of-00008.safetensors",
|
| 19 |
+
"model.connector.image_projector.w2.weight": "model-00007-of-00008.safetensors",
|
| 20 |
+
"model.connector.image_projector.w3.weight": "model-00007-of-00008.safetensors",
|
| 21 |
+
"model.patch_k.bias": "model-00007-of-00008.safetensors",
|
| 22 |
+
"model.patch_k.weight": "model-00007-of-00008.safetensors",
|
| 23 |
+
"model.patch_q.bias": "model-00007-of-00008.safetensors",
|
| 24 |
+
"model.patch_q.weight": "model-00007-of-00008.safetensors",
|
| 25 |
+
"model.subpatch_k.bias": "model-00007-of-00008.safetensors",
|
| 26 |
+
"model.subpatch_k.weight": "model-00007-of-00008.safetensors",
|
| 27 |
+
"model.subpatch_loc_k.bias": "model-00007-of-00008.safetensors",
|
| 28 |
+
"model.subpatch_loc_k.weight": "model-00007-of-00008.safetensors",
|
| 29 |
+
"model.subpatch_q.bias": "model-00007-of-00008.safetensors",
|
| 30 |
+
"model.subpatch_q.weight": "model-00007-of-00008.safetensors",
|
| 31 |
+
"model.transformer.blocks.0.attn_norm.weight": "model-00001-of-00008.safetensors",
|
| 32 |
+
"model.transformer.blocks.0.ff_norm.weight": "model-00001-of-00008.safetensors",
|
| 33 |
+
"model.transformer.blocks.0.mlp.ff_out.weight": "model-00001-of-00008.safetensors",
|
| 34 |
+
"model.transformer.blocks.0.mlp.ff_proj.weight": "model-00001-of-00008.safetensors",
|
| 35 |
+
"model.transformer.blocks.0.self_attn.att_proj.weight": "model-00001-of-00008.safetensors",
|
| 36 |
+
"model.transformer.blocks.0.self_attn.attn_out.weight": "model-00001-of-00008.safetensors",
|
| 37 |
+
"model.transformer.blocks.0.self_attn.k_norm.weight": "model-00001-of-00008.safetensors",
|
| 38 |
+
"model.transformer.blocks.0.self_attn.q_norm.weight": "model-00001-of-00008.safetensors",
|
| 39 |
+
"model.transformer.blocks.1.attn_norm.weight": "model-00001-of-00008.safetensors",
|
| 40 |
+
"model.transformer.blocks.1.ff_norm.weight": "model-00001-of-00008.safetensors",
|
| 41 |
+
"model.transformer.blocks.1.mlp.ff_out.weight": "model-00001-of-00008.safetensors",
|
| 42 |
+
"model.transformer.blocks.1.mlp.ff_proj.weight": "model-00001-of-00008.safetensors",
|
| 43 |
+
"model.transformer.blocks.1.self_attn.att_proj.weight": "model-00001-of-00008.safetensors",
|
| 44 |
+
"model.transformer.blocks.1.self_attn.attn_out.weight": "model-00001-of-00008.safetensors",
|
| 45 |
+
"model.transformer.blocks.1.self_attn.k_norm.weight": "model-00001-of-00008.safetensors",
|
| 46 |
+
"model.transformer.blocks.1.self_attn.q_norm.weight": "model-00001-of-00008.safetensors",
|
| 47 |
"model.transformer.blocks.10.attn_norm.weight": "model-00003-of-00008.safetensors",
|
| 48 |
"model.transformer.blocks.10.ff_norm.weight": "model-00003-of-00008.safetensors",
|
| 49 |
"model.transformer.blocks.10.mlp.ff_out.weight": "model-00003-of-00008.safetensors",
|
|
|
|
| 61 |
"model.transformer.blocks.11.self_attn.k_norm.weight": "model-00003-of-00008.safetensors",
|
| 62 |
"model.transformer.blocks.11.self_attn.q_norm.weight": "model-00003-of-00008.safetensors",
|
| 63 |
"model.transformer.blocks.12.attn_norm.weight": "model-00003-of-00008.safetensors",
|
| 64 |
+
"model.transformer.blocks.12.ff_norm.weight": "model-00003-of-00008.safetensors",
|
| 65 |
+
"model.transformer.blocks.12.mlp.ff_out.weight": "model-00003-of-00008.safetensors",
|
| 66 |
+
"model.transformer.blocks.12.mlp.ff_proj.weight": "model-00003-of-00008.safetensors",
|
| 67 |
"model.transformer.blocks.12.self_attn.att_proj.weight": "model-00003-of-00008.safetensors",
|
| 68 |
"model.transformer.blocks.12.self_attn.attn_out.weight": "model-00003-of-00008.safetensors",
|
| 69 |
"model.transformer.blocks.12.self_attn.k_norm.weight": "model-00003-of-00008.safetensors",
|
| 70 |
"model.transformer.blocks.12.self_attn.q_norm.weight": "model-00003-of-00008.safetensors",
|
| 71 |
+
"model.transformer.blocks.13.attn_norm.weight": "model-00003-of-00008.safetensors",
|
| 72 |
+
"model.transformer.blocks.13.ff_norm.weight": "model-00003-of-00008.safetensors",
|
| 73 |
+
"model.transformer.blocks.13.mlp.ff_out.weight": "model-00003-of-00008.safetensors",
|
| 74 |
+
"model.transformer.blocks.13.mlp.ff_proj.weight": "model-00003-of-00008.safetensors",
|
| 75 |
+
"model.transformer.blocks.13.self_attn.att_proj.weight": "model-00003-of-00008.safetensors",
|
| 76 |
+
"model.transformer.blocks.13.self_attn.attn_out.weight": "model-00003-of-00008.safetensors",
|
| 77 |
+
"model.transformer.blocks.13.self_attn.k_norm.weight": "model-00003-of-00008.safetensors",
|
| 78 |
+
"model.transformer.blocks.13.self_attn.q_norm.weight": "model-00003-of-00008.safetensors",
|
| 79 |
+
"model.transformer.blocks.14.attn_norm.weight": "model-00003-of-00008.safetensors",
|
| 80 |
+
"model.transformer.blocks.14.ff_norm.weight": "model-00003-of-00008.safetensors",
|
| 81 |
+
"model.transformer.blocks.14.mlp.ff_out.weight": "model-00003-of-00008.safetensors",
|
| 82 |
+
"model.transformer.blocks.14.mlp.ff_proj.weight": "model-00003-of-00008.safetensors",
|
| 83 |
+
"model.transformer.blocks.14.self_attn.att_proj.weight": "model-00003-of-00008.safetensors",
|
| 84 |
+
"model.transformer.blocks.14.self_attn.attn_out.weight": "model-00003-of-00008.safetensors",
|
| 85 |
+
"model.transformer.blocks.14.self_attn.k_norm.weight": "model-00003-of-00008.safetensors",
|
| 86 |
+
"model.transformer.blocks.14.self_attn.q_norm.weight": "model-00003-of-00008.safetensors",
|
| 87 |
+
"model.transformer.blocks.15.attn_norm.weight": "model-00003-of-00008.safetensors",
|
| 88 |
"model.transformer.blocks.15.ff_norm.weight": "model-00004-of-00008.safetensors",
|
| 89 |
"model.transformer.blocks.15.mlp.ff_out.weight": "model-00004-of-00008.safetensors",
|
| 90 |
"model.transformer.blocks.15.mlp.ff_proj.weight": "model-00004-of-00008.safetensors",
|
| 91 |
+
"model.transformer.blocks.15.self_attn.att_proj.weight": "model-00003-of-00008.safetensors",
|
| 92 |
+
"model.transformer.blocks.15.self_attn.attn_out.weight": "model-00003-of-00008.safetensors",
|
| 93 |
+
"model.transformer.blocks.15.self_attn.k_norm.weight": "model-00003-of-00008.safetensors",
|
| 94 |
+
"model.transformer.blocks.15.self_attn.q_norm.weight": "model-00003-of-00008.safetensors",
|
| 95 |
"model.transformer.blocks.16.attn_norm.weight": "model-00004-of-00008.safetensors",
|
| 96 |
"model.transformer.blocks.16.ff_norm.weight": "model-00004-of-00008.safetensors",
|
| 97 |
"model.transformer.blocks.16.mlp.ff_out.weight": "model-00004-of-00008.safetensors",
|
|
|
|
| 109 |
"model.transformer.blocks.17.self_attn.k_norm.weight": "model-00004-of-00008.safetensors",
|
| 110 |
"model.transformer.blocks.17.self_attn.q_norm.weight": "model-00004-of-00008.safetensors",
|
| 111 |
"model.transformer.blocks.18.attn_norm.weight": "model-00004-of-00008.safetensors",
|
| 112 |
+
"model.transformer.blocks.18.ff_norm.weight": "model-00004-of-00008.safetensors",
|
| 113 |
+
"model.transformer.blocks.18.mlp.ff_out.weight": "model-00004-of-00008.safetensors",
|
| 114 |
+
"model.transformer.blocks.18.mlp.ff_proj.weight": "model-00004-of-00008.safetensors",
|
| 115 |
"model.transformer.blocks.18.self_attn.att_proj.weight": "model-00004-of-00008.safetensors",
|
| 116 |
"model.transformer.blocks.18.self_attn.attn_out.weight": "model-00004-of-00008.safetensors",
|
| 117 |
"model.transformer.blocks.18.self_attn.k_norm.weight": "model-00004-of-00008.safetensors",
|
| 118 |
"model.transformer.blocks.18.self_attn.q_norm.weight": "model-00004-of-00008.safetensors",
|
| 119 |
+
"model.transformer.blocks.19.attn_norm.weight": "model-00004-of-00008.safetensors",
|
| 120 |
+
"model.transformer.blocks.19.ff_norm.weight": "model-00004-of-00008.safetensors",
|
| 121 |
+
"model.transformer.blocks.19.mlp.ff_out.weight": "model-00004-of-00008.safetensors",
|
| 122 |
+
"model.transformer.blocks.19.mlp.ff_proj.weight": "model-00004-of-00008.safetensors",
|
| 123 |
+
"model.transformer.blocks.19.self_attn.att_proj.weight": "model-00004-of-00008.safetensors",
|
| 124 |
+
"model.transformer.blocks.19.self_attn.attn_out.weight": "model-00004-of-00008.safetensors",
|
| 125 |
+
"model.transformer.blocks.19.self_attn.k_norm.weight": "model-00004-of-00008.safetensors",
|
| 126 |
+
"model.transformer.blocks.19.self_attn.q_norm.weight": "model-00004-of-00008.safetensors",
|
| 127 |
+
"model.transformer.blocks.2.attn_norm.weight": "model-00001-of-00008.safetensors",
|
| 128 |
+
"model.transformer.blocks.2.ff_norm.weight": "model-00001-of-00008.safetensors",
|
| 129 |
+
"model.transformer.blocks.2.mlp.ff_out.weight": "model-00001-of-00008.safetensors",
|
| 130 |
+
"model.transformer.blocks.2.mlp.ff_proj.weight": "model-00001-of-00008.safetensors",
|
| 131 |
+
"model.transformer.blocks.2.self_attn.att_proj.weight": "model-00001-of-00008.safetensors",
|
| 132 |
+
"model.transformer.blocks.2.self_attn.attn_out.weight": "model-00001-of-00008.safetensors",
|
| 133 |
+
"model.transformer.blocks.2.self_attn.k_norm.weight": "model-00001-of-00008.safetensors",
|
| 134 |
+
"model.transformer.blocks.2.self_attn.q_norm.weight": "model-00001-of-00008.safetensors",
|
| 135 |
+
"model.transformer.blocks.20.attn_norm.weight": "model-00004-of-00008.safetensors",
|
| 136 |
+
"model.transformer.blocks.20.ff_norm.weight": "model-00004-of-00008.safetensors",
|
| 137 |
+
"model.transformer.blocks.20.mlp.ff_out.weight": "model-00004-of-00008.safetensors",
|
| 138 |
+
"model.transformer.blocks.20.mlp.ff_proj.weight": "model-00004-of-00008.safetensors",
|
| 139 |
+
"model.transformer.blocks.20.self_attn.att_proj.weight": "model-00004-of-00008.safetensors",
|
| 140 |
+
"model.transformer.blocks.20.self_attn.attn_out.weight": "model-00004-of-00008.safetensors",
|
| 141 |
+
"model.transformer.blocks.20.self_attn.k_norm.weight": "model-00004-of-00008.safetensors",
|
| 142 |
+
"model.transformer.blocks.20.self_attn.q_norm.weight": "model-00004-of-00008.safetensors",
|
| 143 |
+
"model.transformer.blocks.21.attn_norm.weight": "model-00004-of-00008.safetensors",
|
| 144 |
"model.transformer.blocks.21.ff_norm.weight": "model-00005-of-00008.safetensors",
|
| 145 |
"model.transformer.blocks.21.mlp.ff_out.weight": "model-00005-of-00008.safetensors",
|
| 146 |
"model.transformer.blocks.21.mlp.ff_proj.weight": "model-00005-of-00008.safetensors",
|
| 147 |
+
"model.transformer.blocks.21.self_attn.att_proj.weight": "model-00004-of-00008.safetensors",
|
| 148 |
+
"model.transformer.blocks.21.self_attn.attn_out.weight": "model-00004-of-00008.safetensors",
|
| 149 |
+
"model.transformer.blocks.21.self_attn.k_norm.weight": "model-00004-of-00008.safetensors",
|
| 150 |
+
"model.transformer.blocks.21.self_attn.q_norm.weight": "model-00004-of-00008.safetensors",
|
| 151 |
"model.transformer.blocks.22.attn_norm.weight": "model-00005-of-00008.safetensors",
|
| 152 |
"model.transformer.blocks.22.ff_norm.weight": "model-00005-of-00008.safetensors",
|
| 153 |
"model.transformer.blocks.22.mlp.ff_out.weight": "model-00005-of-00008.safetensors",
|
|
|
|
| 165 |
"model.transformer.blocks.23.self_attn.k_norm.weight": "model-00005-of-00008.safetensors",
|
| 166 |
"model.transformer.blocks.23.self_attn.q_norm.weight": "model-00005-of-00008.safetensors",
|
| 167 |
"model.transformer.blocks.24.attn_norm.weight": "model-00005-of-00008.safetensors",
|
| 168 |
+
"model.transformer.blocks.24.ff_norm.weight": "model-00005-of-00008.safetensors",
|
| 169 |
+
"model.transformer.blocks.24.mlp.ff_out.weight": "model-00005-of-00008.safetensors",
|
| 170 |
+
"model.transformer.blocks.24.mlp.ff_proj.weight": "model-00005-of-00008.safetensors",
|
| 171 |
"model.transformer.blocks.24.self_attn.att_proj.weight": "model-00005-of-00008.safetensors",
|
| 172 |
"model.transformer.blocks.24.self_attn.attn_out.weight": "model-00005-of-00008.safetensors",
|
| 173 |
"model.transformer.blocks.24.self_attn.k_norm.weight": "model-00005-of-00008.safetensors",
|
| 174 |
"model.transformer.blocks.24.self_attn.q_norm.weight": "model-00005-of-00008.safetensors",
|
| 175 |
+
"model.transformer.blocks.25.attn_norm.weight": "model-00005-of-00008.safetensors",
|
| 176 |
+
"model.transformer.blocks.25.ff_norm.weight": "model-00005-of-00008.safetensors",
|
| 177 |
+
"model.transformer.blocks.25.mlp.ff_out.weight": "model-00005-of-00008.safetensors",
|
| 178 |
+
"model.transformer.blocks.25.mlp.ff_proj.weight": "model-00005-of-00008.safetensors",
|
| 179 |
+
"model.transformer.blocks.25.self_attn.att_proj.weight": "model-00005-of-00008.safetensors",
|
| 180 |
+
"model.transformer.blocks.25.self_attn.attn_out.weight": "model-00005-of-00008.safetensors",
|
| 181 |
+
"model.transformer.blocks.25.self_attn.k_norm.weight": "model-00005-of-00008.safetensors",
|
| 182 |
+
"model.transformer.blocks.25.self_attn.q_norm.weight": "model-00005-of-00008.safetensors",
|
| 183 |
+
"model.transformer.blocks.26.attn_norm.weight": "model-00005-of-00008.safetensors",
|
| 184 |
+
"model.transformer.blocks.26.ff_norm.weight": "model-00005-of-00008.safetensors",
|
| 185 |
+
"model.transformer.blocks.26.mlp.ff_out.weight": "model-00005-of-00008.safetensors",
|
| 186 |
+
"model.transformer.blocks.26.mlp.ff_proj.weight": "model-00005-of-00008.safetensors",
|
| 187 |
+
"model.transformer.blocks.26.self_attn.att_proj.weight": "model-00005-of-00008.safetensors",
|
| 188 |
+
"model.transformer.blocks.26.self_attn.attn_out.weight": "model-00005-of-00008.safetensors",
|
| 189 |
+
"model.transformer.blocks.26.self_attn.k_norm.weight": "model-00005-of-00008.safetensors",
|
| 190 |
+
"model.transformer.blocks.26.self_attn.q_norm.weight": "model-00005-of-00008.safetensors",
|
| 191 |
+
"model.transformer.blocks.27.attn_norm.weight": "model-00005-of-00008.safetensors",
|
| 192 |
"model.transformer.blocks.27.ff_norm.weight": "model-00006-of-00008.safetensors",
|
| 193 |
"model.transformer.blocks.27.mlp.ff_out.weight": "model-00006-of-00008.safetensors",
|
| 194 |
"model.transformer.blocks.27.mlp.ff_proj.weight": "model-00006-of-00008.safetensors",
|
| 195 |
+
"model.transformer.blocks.27.self_attn.att_proj.weight": "model-00005-of-00008.safetensors",
|
| 196 |
+
"model.transformer.blocks.27.self_attn.attn_out.weight": "model-00005-of-00008.safetensors",
|
| 197 |
+
"model.transformer.blocks.27.self_attn.k_norm.weight": "model-00005-of-00008.safetensors",
|
| 198 |
+
"model.transformer.blocks.27.self_attn.q_norm.weight": "model-00005-of-00008.safetensors",
|
| 199 |
"model.transformer.blocks.28.attn_norm.weight": "model-00006-of-00008.safetensors",
|
| 200 |
"model.transformer.blocks.28.ff_norm.weight": "model-00006-of-00008.safetensors",
|
| 201 |
"model.transformer.blocks.28.mlp.ff_out.weight": "model-00006-of-00008.safetensors",
|
|
|
|
| 212 |
"model.transformer.blocks.29.self_attn.attn_out.weight": "model-00006-of-00008.safetensors",
|
| 213 |
"model.transformer.blocks.29.self_attn.k_norm.weight": "model-00006-of-00008.safetensors",
|
| 214 |
"model.transformer.blocks.29.self_attn.q_norm.weight": "model-00006-of-00008.safetensors",
|
| 215 |
+
"model.transformer.blocks.3.attn_norm.weight": "model-00001-of-00008.safetensors",
|
| 216 |
"model.transformer.blocks.3.ff_norm.weight": "model-00002-of-00008.safetensors",
|
| 217 |
"model.transformer.blocks.3.mlp.ff_out.weight": "model-00002-of-00008.safetensors",
|
| 218 |
"model.transformer.blocks.3.mlp.ff_proj.weight": "model-00002-of-00008.safetensors",
|
| 219 |
+
"model.transformer.blocks.3.self_attn.att_proj.weight": "model-00001-of-00008.safetensors",
|
| 220 |
+
"model.transformer.blocks.3.self_attn.attn_out.weight": "model-00001-of-00008.safetensors",
|
| 221 |
+
"model.transformer.blocks.3.self_attn.k_norm.weight": "model-00001-of-00008.safetensors",
|
| 222 |
+
"model.transformer.blocks.3.self_attn.q_norm.weight": "model-00001-of-00008.safetensors",
|
| 223 |
"model.transformer.blocks.30.attn_norm.weight": "model-00006-of-00008.safetensors",
|
| 224 |
+
"model.transformer.blocks.30.ff_norm.weight": "model-00006-of-00008.safetensors",
|
| 225 |
+
"model.transformer.blocks.30.mlp.ff_out.weight": "model-00006-of-00008.safetensors",
|
| 226 |
+
"model.transformer.blocks.30.mlp.ff_proj.weight": "model-00006-of-00008.safetensors",
|
| 227 |
"model.transformer.blocks.30.self_attn.att_proj.weight": "model-00006-of-00008.safetensors",
|
| 228 |
"model.transformer.blocks.30.self_attn.attn_out.weight": "model-00006-of-00008.safetensors",
|
| 229 |
"model.transformer.blocks.30.self_attn.k_norm.weight": "model-00006-of-00008.safetensors",
|
| 230 |
"model.transformer.blocks.30.self_attn.q_norm.weight": "model-00006-of-00008.safetensors",
|
| 231 |
+
"model.transformer.blocks.31.attn_norm.weight": "model-00006-of-00008.safetensors",
|
| 232 |
+
"model.transformer.blocks.31.ff_norm.weight": "model-00006-of-00008.safetensors",
|
| 233 |
+
"model.transformer.blocks.31.mlp.ff_out.weight": "model-00006-of-00008.safetensors",
|
| 234 |
+
"model.transformer.blocks.31.mlp.ff_proj.weight": "model-00006-of-00008.safetensors",
|
| 235 |
+
"model.transformer.blocks.31.self_attn.att_proj.weight": "model-00006-of-00008.safetensors",
|
| 236 |
+
"model.transformer.blocks.31.self_attn.attn_out.weight": "model-00006-of-00008.safetensors",
|
| 237 |
+
"model.transformer.blocks.31.self_attn.k_norm.weight": "model-00006-of-00008.safetensors",
|
| 238 |
+
"model.transformer.blocks.31.self_attn.q_norm.weight": "model-00006-of-00008.safetensors",
|
| 239 |
+
"model.transformer.blocks.32.attn_norm.weight": "model-00006-of-00008.safetensors",
|
| 240 |
+
"model.transformer.blocks.32.ff_norm.weight": "model-00006-of-00008.safetensors",
|
| 241 |
+
"model.transformer.blocks.32.mlp.ff_out.weight": "model-00006-of-00008.safetensors",
|
| 242 |
+
"model.transformer.blocks.32.mlp.ff_proj.weight": "model-00006-of-00008.safetensors",
|
| 243 |
+
"model.transformer.blocks.32.self_attn.att_proj.weight": "model-00006-of-00008.safetensors",
|
| 244 |
+
"model.transformer.blocks.32.self_attn.attn_out.weight": "model-00006-of-00008.safetensors",
|
| 245 |
+
"model.transformer.blocks.32.self_attn.k_norm.weight": "model-00006-of-00008.safetensors",
|
| 246 |
+
"model.transformer.blocks.32.self_attn.q_norm.weight": "model-00006-of-00008.safetensors",
|
| 247 |
+
"model.transformer.blocks.33.attn_norm.weight": "model-00006-of-00008.safetensors",
|
| 248 |
"model.transformer.blocks.33.ff_norm.weight": "model-00007-of-00008.safetensors",
|
| 249 |
"model.transformer.blocks.33.mlp.ff_out.weight": "model-00007-of-00008.safetensors",
|
| 250 |
"model.transformer.blocks.33.mlp.ff_proj.weight": "model-00007-of-00008.safetensors",
|
| 251 |
+
"model.transformer.blocks.33.self_attn.att_proj.weight": "model-00006-of-00008.safetensors",
|
| 252 |
+
"model.transformer.blocks.33.self_attn.attn_out.weight": "model-00006-of-00008.safetensors",
|
| 253 |
+
"model.transformer.blocks.33.self_attn.k_norm.weight": "model-00006-of-00008.safetensors",
|
| 254 |
+
"model.transformer.blocks.33.self_attn.q_norm.weight": "model-00006-of-00008.safetensors",
|
| 255 |
"model.transformer.blocks.34.attn_norm.weight": "model-00007-of-00008.safetensors",
|
| 256 |
"model.transformer.blocks.34.ff_norm.weight": "model-00007-of-00008.safetensors",
|
| 257 |
"model.transformer.blocks.34.mlp.ff_out.weight": "model-00007-of-00008.safetensors",
|
|
|
|
| 285 |
"model.transformer.blocks.5.self_attn.k_norm.weight": "model-00002-of-00008.safetensors",
|
| 286 |
"model.transformer.blocks.5.self_attn.q_norm.weight": "model-00002-of-00008.safetensors",
|
| 287 |
"model.transformer.blocks.6.attn_norm.weight": "model-00002-of-00008.safetensors",
|
| 288 |
+
"model.transformer.blocks.6.ff_norm.weight": "model-00002-of-00008.safetensors",
|
| 289 |
+
"model.transformer.blocks.6.mlp.ff_out.weight": "model-00002-of-00008.safetensors",
|
| 290 |
+
"model.transformer.blocks.6.mlp.ff_proj.weight": "model-00002-of-00008.safetensors",
|
| 291 |
"model.transformer.blocks.6.self_attn.att_proj.weight": "model-00002-of-00008.safetensors",
|
| 292 |
"model.transformer.blocks.6.self_attn.attn_out.weight": "model-00002-of-00008.safetensors",
|
| 293 |
"model.transformer.blocks.6.self_attn.k_norm.weight": "model-00002-of-00008.safetensors",
|
| 294 |
"model.transformer.blocks.6.self_attn.q_norm.weight": "model-00002-of-00008.safetensors",
|
| 295 |
+
"model.transformer.blocks.7.attn_norm.weight": "model-00002-of-00008.safetensors",
|
| 296 |
+
"model.transformer.blocks.7.ff_norm.weight": "model-00002-of-00008.safetensors",
|
| 297 |
+
"model.transformer.blocks.7.mlp.ff_out.weight": "model-00002-of-00008.safetensors",
|
| 298 |
+
"model.transformer.blocks.7.mlp.ff_proj.weight": "model-00002-of-00008.safetensors",
|
| 299 |
+
"model.transformer.blocks.7.self_attn.att_proj.weight": "model-00002-of-00008.safetensors",
|
| 300 |
+
"model.transformer.blocks.7.self_attn.attn_out.weight": "model-00002-of-00008.safetensors",
|
| 301 |
+
"model.transformer.blocks.7.self_attn.k_norm.weight": "model-00002-of-00008.safetensors",
|
| 302 |
+
"model.transformer.blocks.7.self_attn.q_norm.weight": "model-00002-of-00008.safetensors",
|
| 303 |
+
"model.transformer.blocks.8.attn_norm.weight": "model-00002-of-00008.safetensors",
|
| 304 |
+
"model.transformer.blocks.8.ff_norm.weight": "model-00002-of-00008.safetensors",
|
| 305 |
+
"model.transformer.blocks.8.mlp.ff_out.weight": "model-00002-of-00008.safetensors",
|
| 306 |
+
"model.transformer.blocks.8.mlp.ff_proj.weight": "model-00002-of-00008.safetensors",
|
| 307 |
+
"model.transformer.blocks.8.self_attn.att_proj.weight": "model-00002-of-00008.safetensors",
|
| 308 |
+
"model.transformer.blocks.8.self_attn.attn_out.weight": "model-00002-of-00008.safetensors",
|
| 309 |
+
"model.transformer.blocks.8.self_attn.k_norm.weight": "model-00002-of-00008.safetensors",
|
| 310 |
+
"model.transformer.blocks.8.self_attn.q_norm.weight": "model-00002-of-00008.safetensors",
|
| 311 |
+
"model.transformer.blocks.9.attn_norm.weight": "model-00002-of-00008.safetensors",
|
| 312 |
"model.transformer.blocks.9.ff_norm.weight": "model-00003-of-00008.safetensors",
|
| 313 |
"model.transformer.blocks.9.mlp.ff_out.weight": "model-00003-of-00008.safetensors",
|
| 314 |
"model.transformer.blocks.9.mlp.ff_proj.weight": "model-00003-of-00008.safetensors",
|
| 315 |
+
"model.transformer.blocks.9.self_attn.att_proj.weight": "model-00002-of-00008.safetensors",
|
| 316 |
+
"model.transformer.blocks.9.self_attn.attn_out.weight": "model-00002-of-00008.safetensors",
|
| 317 |
+
"model.transformer.blocks.9.self_attn.k_norm.weight": "model-00002-of-00008.safetensors",
|
| 318 |
+
"model.transformer.blocks.9.self_attn.q_norm.weight": "model-00002-of-00008.safetensors",
|
| 319 |
"model.transformer.ln_f.weight": "model-00007-of-00008.safetensors",
|
| 320 |
"model.transformer.wte.embedding": "model-00001-of-00008.safetensors",
|
| 321 |
"model.transformer.wte.new_embedding": "model-00001-of-00008.safetensors",
|
|
|
|
| 354 |
"model.vit.transformer.resblocks.1.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 355 |
"model.vit.transformer.resblocks.1.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 356 |
"model.vit.transformer.resblocks.1.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 357 |
+
"model.vit.transformer.resblocks.10.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 358 |
+
"model.vit.transformer.resblocks.10.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 359 |
+
"model.vit.transformer.resblocks.10.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 360 |
+
"model.vit.transformer.resblocks.10.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 361 |
+
"model.vit.transformer.resblocks.10.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 362 |
+
"model.vit.transformer.resblocks.10.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 363 |
+
"model.vit.transformer.resblocks.10.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 364 |
+
"model.vit.transformer.resblocks.10.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 365 |
+
"model.vit.transformer.resblocks.10.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 366 |
+
"model.vit.transformer.resblocks.10.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 367 |
+
"model.vit.transformer.resblocks.10.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 368 |
+
"model.vit.transformer.resblocks.10.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 369 |
+
"model.vit.transformer.resblocks.10.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 370 |
+
"model.vit.transformer.resblocks.10.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 371 |
+
"model.vit.transformer.resblocks.10.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 372 |
+
"model.vit.transformer.resblocks.10.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 373 |
+
"model.vit.transformer.resblocks.11.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 374 |
+
"model.vit.transformer.resblocks.11.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 375 |
+
"model.vit.transformer.resblocks.11.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 376 |
+
"model.vit.transformer.resblocks.11.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 377 |
+
"model.vit.transformer.resblocks.11.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 378 |
+
"model.vit.transformer.resblocks.11.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 379 |
+
"model.vit.transformer.resblocks.11.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 380 |
+
"model.vit.transformer.resblocks.11.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 381 |
+
"model.vit.transformer.resblocks.11.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 382 |
+
"model.vit.transformer.resblocks.11.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 383 |
+
"model.vit.transformer.resblocks.11.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 384 |
+
"model.vit.transformer.resblocks.11.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 385 |
+
"model.vit.transformer.resblocks.11.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 386 |
+
"model.vit.transformer.resblocks.11.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 387 |
+
"model.vit.transformer.resblocks.11.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 388 |
+
"model.vit.transformer.resblocks.11.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 389 |
+
"model.vit.transformer.resblocks.12.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 390 |
+
"model.vit.transformer.resblocks.12.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 391 |
+
"model.vit.transformer.resblocks.12.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 392 |
+
"model.vit.transformer.resblocks.12.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 393 |
+
"model.vit.transformer.resblocks.12.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 394 |
+
"model.vit.transformer.resblocks.12.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 395 |
+
"model.vit.transformer.resblocks.12.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 396 |
+
"model.vit.transformer.resblocks.12.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 397 |
+
"model.vit.transformer.resblocks.12.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 398 |
+
"model.vit.transformer.resblocks.12.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 399 |
+
"model.vit.transformer.resblocks.12.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 400 |
+
"model.vit.transformer.resblocks.12.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 401 |
+
"model.vit.transformer.resblocks.12.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 402 |
+
"model.vit.transformer.resblocks.12.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 403 |
+
"model.vit.transformer.resblocks.12.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 404 |
+
"model.vit.transformer.resblocks.12.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 405 |
+
"model.vit.transformer.resblocks.13.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 406 |
+
"model.vit.transformer.resblocks.13.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 407 |
+
"model.vit.transformer.resblocks.13.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 408 |
+
"model.vit.transformer.resblocks.13.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 409 |
+
"model.vit.transformer.resblocks.13.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 410 |
+
"model.vit.transformer.resblocks.13.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 411 |
+
"model.vit.transformer.resblocks.13.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 412 |
+
"model.vit.transformer.resblocks.13.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 413 |
+
"model.vit.transformer.resblocks.13.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 414 |
+
"model.vit.transformer.resblocks.13.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 415 |
+
"model.vit.transformer.resblocks.13.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 416 |
+
"model.vit.transformer.resblocks.13.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 417 |
+
"model.vit.transformer.resblocks.13.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 418 |
+
"model.vit.transformer.resblocks.13.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 419 |
+
"model.vit.transformer.resblocks.13.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 420 |
+
"model.vit.transformer.resblocks.13.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 421 |
+
"model.vit.transformer.resblocks.14.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 422 |
+
"model.vit.transformer.resblocks.14.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 423 |
+
"model.vit.transformer.resblocks.14.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 424 |
+
"model.vit.transformer.resblocks.14.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 425 |
+
"model.vit.transformer.resblocks.14.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 426 |
+
"model.vit.transformer.resblocks.14.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 427 |
+
"model.vit.transformer.resblocks.14.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 428 |
+
"model.vit.transformer.resblocks.14.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 429 |
+
"model.vit.transformer.resblocks.14.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 430 |
+
"model.vit.transformer.resblocks.14.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 431 |
+
"model.vit.transformer.resblocks.14.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 432 |
+
"model.vit.transformer.resblocks.14.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 433 |
+
"model.vit.transformer.resblocks.14.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 434 |
+
"model.vit.transformer.resblocks.14.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 435 |
+
"model.vit.transformer.resblocks.14.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 436 |
+
"model.vit.transformer.resblocks.14.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 437 |
+
"model.vit.transformer.resblocks.15.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 438 |
+
"model.vit.transformer.resblocks.15.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 439 |
+
"model.vit.transformer.resblocks.15.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 440 |
+
"model.vit.transformer.resblocks.15.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 441 |
+
"model.vit.transformer.resblocks.15.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 442 |
+
"model.vit.transformer.resblocks.15.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 443 |
+
"model.vit.transformer.resblocks.15.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 444 |
+
"model.vit.transformer.resblocks.15.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 445 |
+
"model.vit.transformer.resblocks.15.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 446 |
+
"model.vit.transformer.resblocks.15.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 447 |
+
"model.vit.transformer.resblocks.15.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 448 |
+
"model.vit.transformer.resblocks.15.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 449 |
+
"model.vit.transformer.resblocks.15.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 450 |
+
"model.vit.transformer.resblocks.15.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 451 |
+
"model.vit.transformer.resblocks.15.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 452 |
+
"model.vit.transformer.resblocks.15.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 453 |
+
"model.vit.transformer.resblocks.16.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 454 |
+
"model.vit.transformer.resblocks.16.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 455 |
+
"model.vit.transformer.resblocks.16.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 456 |
+
"model.vit.transformer.resblocks.16.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 457 |
+
"model.vit.transformer.resblocks.16.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 458 |
+
"model.vit.transformer.resblocks.16.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 459 |
+
"model.vit.transformer.resblocks.16.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 460 |
+
"model.vit.transformer.resblocks.16.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 461 |
+
"model.vit.transformer.resblocks.16.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 462 |
+
"model.vit.transformer.resblocks.16.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 463 |
+
"model.vit.transformer.resblocks.16.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 464 |
+
"model.vit.transformer.resblocks.16.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 465 |
+
"model.vit.transformer.resblocks.16.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 466 |
+
"model.vit.transformer.resblocks.16.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 467 |
+
"model.vit.transformer.resblocks.16.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 468 |
+
"model.vit.transformer.resblocks.16.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 469 |
+
"model.vit.transformer.resblocks.17.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 470 |
+
"model.vit.transformer.resblocks.17.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 471 |
+
"model.vit.transformer.resblocks.17.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 472 |
+
"model.vit.transformer.resblocks.17.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 473 |
+
"model.vit.transformer.resblocks.17.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 474 |
+
"model.vit.transformer.resblocks.17.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 475 |
+
"model.vit.transformer.resblocks.17.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 476 |
+
"model.vit.transformer.resblocks.17.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 477 |
+
"model.vit.transformer.resblocks.17.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 478 |
+
"model.vit.transformer.resblocks.17.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 479 |
+
"model.vit.transformer.resblocks.17.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 480 |
+
"model.vit.transformer.resblocks.17.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 481 |
+
"model.vit.transformer.resblocks.17.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 482 |
+
"model.vit.transformer.resblocks.17.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 483 |
+
"model.vit.transformer.resblocks.17.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 484 |
+
"model.vit.transformer.resblocks.17.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 485 |
+
"model.vit.transformer.resblocks.18.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 486 |
+
"model.vit.transformer.resblocks.18.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 487 |
+
"model.vit.transformer.resblocks.18.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 488 |
+
"model.vit.transformer.resblocks.18.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 489 |
+
"model.vit.transformer.resblocks.18.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 490 |
+
"model.vit.transformer.resblocks.18.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 491 |
+
"model.vit.transformer.resblocks.18.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 492 |
+
"model.vit.transformer.resblocks.18.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 493 |
+
"model.vit.transformer.resblocks.18.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 494 |
+
"model.vit.transformer.resblocks.18.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 495 |
+
"model.vit.transformer.resblocks.18.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 496 |
+
"model.vit.transformer.resblocks.18.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 497 |
+
"model.vit.transformer.resblocks.18.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 498 |
+
"model.vit.transformer.resblocks.18.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 499 |
+
"model.vit.transformer.resblocks.18.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 500 |
+
"model.vit.transformer.resblocks.18.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 501 |
+
"model.vit.transformer.resblocks.19.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 502 |
+
"model.vit.transformer.resblocks.19.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 503 |
+
"model.vit.transformer.resblocks.19.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 504 |
+
"model.vit.transformer.resblocks.19.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 505 |
+
"model.vit.transformer.resblocks.19.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 506 |
+
"model.vit.transformer.resblocks.19.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 507 |
+
"model.vit.transformer.resblocks.19.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 508 |
+
"model.vit.transformer.resblocks.19.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 509 |
+
"model.vit.transformer.resblocks.19.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 510 |
+
"model.vit.transformer.resblocks.19.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 511 |
+
"model.vit.transformer.resblocks.19.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 512 |
+
"model.vit.transformer.resblocks.19.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 513 |
+
"model.vit.transformer.resblocks.19.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 514 |
+
"model.vit.transformer.resblocks.19.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 515 |
+
"model.vit.transformer.resblocks.19.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 516 |
+
"model.vit.transformer.resblocks.19.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 517 |
"model.vit.transformer.resblocks.2.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 518 |
"model.vit.transformer.resblocks.2.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 519 |
"model.vit.transformer.resblocks.2.attention.wo.bias": "model-00007-of-00008.safetensors",
|
|
|
|
| 530 |
"model.vit.transformer.resblocks.2.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 531 |
"model.vit.transformer.resblocks.2.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 532 |
"model.vit.transformer.resblocks.2.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 533 |
+
"model.vit.transformer.resblocks.20.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 534 |
+
"model.vit.transformer.resblocks.20.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 535 |
+
"model.vit.transformer.resblocks.20.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 536 |
+
"model.vit.transformer.resblocks.20.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 537 |
+
"model.vit.transformer.resblocks.20.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 538 |
+
"model.vit.transformer.resblocks.20.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 539 |
+
"model.vit.transformer.resblocks.20.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 540 |
+
"model.vit.transformer.resblocks.20.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 541 |
+
"model.vit.transformer.resblocks.20.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 542 |
+
"model.vit.transformer.resblocks.20.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 543 |
+
"model.vit.transformer.resblocks.20.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 544 |
+
"model.vit.transformer.resblocks.20.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 545 |
+
"model.vit.transformer.resblocks.20.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 546 |
+
"model.vit.transformer.resblocks.20.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 547 |
+
"model.vit.transformer.resblocks.20.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 548 |
+
"model.vit.transformer.resblocks.20.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 549 |
+
"model.vit.transformer.resblocks.21.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 550 |
+
"model.vit.transformer.resblocks.21.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 551 |
+
"model.vit.transformer.resblocks.21.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 552 |
+
"model.vit.transformer.resblocks.21.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 553 |
+
"model.vit.transformer.resblocks.21.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 554 |
+
"model.vit.transformer.resblocks.21.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 555 |
+
"model.vit.transformer.resblocks.21.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 556 |
+
"model.vit.transformer.resblocks.21.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 557 |
+
"model.vit.transformer.resblocks.21.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 558 |
+
"model.vit.transformer.resblocks.21.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 559 |
+
"model.vit.transformer.resblocks.21.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 560 |
+
"model.vit.transformer.resblocks.21.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 561 |
+
"model.vit.transformer.resblocks.21.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 562 |
+
"model.vit.transformer.resblocks.21.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 563 |
+
"model.vit.transformer.resblocks.21.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 564 |
+
"model.vit.transformer.resblocks.21.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 565 |
+
"model.vit.transformer.resblocks.22.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 566 |
+
"model.vit.transformer.resblocks.22.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 567 |
+
"model.vit.transformer.resblocks.22.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 568 |
+
"model.vit.transformer.resblocks.22.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 569 |
+
"model.vit.transformer.resblocks.22.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 570 |
+
"model.vit.transformer.resblocks.22.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 571 |
+
"model.vit.transformer.resblocks.22.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 572 |
+
"model.vit.transformer.resblocks.22.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 573 |
+
"model.vit.transformer.resblocks.22.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 574 |
+
"model.vit.transformer.resblocks.22.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 575 |
+
"model.vit.transformer.resblocks.22.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 576 |
+
"model.vit.transformer.resblocks.22.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 577 |
+
"model.vit.transformer.resblocks.22.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 578 |
+
"model.vit.transformer.resblocks.22.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 579 |
+
"model.vit.transformer.resblocks.22.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 580 |
+
"model.vit.transformer.resblocks.22.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 581 |
+
"model.vit.transformer.resblocks.23.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 582 |
+
"model.vit.transformer.resblocks.23.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 583 |
+
"model.vit.transformer.resblocks.23.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 584 |
+
"model.vit.transformer.resblocks.23.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 585 |
+
"model.vit.transformer.resblocks.23.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 586 |
+
"model.vit.transformer.resblocks.23.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 587 |
+
"model.vit.transformer.resblocks.23.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 588 |
+
"model.vit.transformer.resblocks.23.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 589 |
+
"model.vit.transformer.resblocks.23.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 590 |
+
"model.vit.transformer.resblocks.23.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 591 |
+
"model.vit.transformer.resblocks.23.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 592 |
+
"model.vit.transformer.resblocks.23.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 593 |
+
"model.vit.transformer.resblocks.23.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 594 |
+
"model.vit.transformer.resblocks.23.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 595 |
+
"model.vit.transformer.resblocks.23.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 596 |
+
"model.vit.transformer.resblocks.23.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 597 |
+
"model.vit.transformer.resblocks.24.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 598 |
+
"model.vit.transformer.resblocks.24.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 599 |
+
"model.vit.transformer.resblocks.24.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 600 |
+
"model.vit.transformer.resblocks.24.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 601 |
+
"model.vit.transformer.resblocks.24.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 602 |
+
"model.vit.transformer.resblocks.24.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 603 |
+
"model.vit.transformer.resblocks.24.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 604 |
+
"model.vit.transformer.resblocks.24.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 605 |
+
"model.vit.transformer.resblocks.24.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 606 |
+
"model.vit.transformer.resblocks.24.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 607 |
+
"model.vit.transformer.resblocks.24.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 608 |
+
"model.vit.transformer.resblocks.24.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 609 |
+
"model.vit.transformer.resblocks.24.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 610 |
+
"model.vit.transformer.resblocks.24.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 611 |
+
"model.vit.transformer.resblocks.24.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 612 |
+
"model.vit.transformer.resblocks.24.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 613 |
"model.vit.transformer.resblocks.3.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 614 |
"model.vit.transformer.resblocks.3.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 615 |
"model.vit.transformer.resblocks.3.attention.wo.bias": "model-00007-of-00008.safetensors",
|
|
|
|
| 698 |
"model.vit.transformer.resblocks.8.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 699 |
"model.vit.transformer.resblocks.8.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 700 |
"model.vit.transformer.resblocks.8.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 701 |
+
"model.vit.transformer.resblocks.8.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 702 |
+
"model.vit.transformer.resblocks.8.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 703 |
"model.vit.transformer.resblocks.8.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 704 |
"model.vit.transformer.resblocks.8.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 705 |
+
"model.vit.transformer.resblocks.8.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 706 |
+
"model.vit.transformer.resblocks.8.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 707 |
+
"model.vit.transformer.resblocks.8.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 708 |
+
"model.vit.transformer.resblocks.8.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 709 |
+
"model.vit.transformer.resblocks.9.attention.wk.bias": "model-00007-of-00008.safetensors",
|
| 710 |
+
"model.vit.transformer.resblocks.9.attention.wk.weight": "model-00007-of-00008.safetensors",
|
| 711 |
+
"model.vit.transformer.resblocks.9.attention.wo.bias": "model-00007-of-00008.safetensors",
|
| 712 |
+
"model.vit.transformer.resblocks.9.attention.wo.weight": "model-00007-of-00008.safetensors",
|
| 713 |
+
"model.vit.transformer.resblocks.9.attention.wq.bias": "model-00007-of-00008.safetensors",
|
| 714 |
+
"model.vit.transformer.resblocks.9.attention.wq.weight": "model-00007-of-00008.safetensors",
|
| 715 |
+
"model.vit.transformer.resblocks.9.attention.wv.bias": "model-00007-of-00008.safetensors",
|
| 716 |
+
"model.vit.transformer.resblocks.9.attention.wv.weight": "model-00007-of-00008.safetensors",
|
| 717 |
+
"model.vit.transformer.resblocks.9.attention_norm.bias": "model-00007-of-00008.safetensors",
|
| 718 |
+
"model.vit.transformer.resblocks.9.attention_norm.weight": "model-00007-of-00008.safetensors",
|
| 719 |
+
"model.vit.transformer.resblocks.9.feed_forward.w1.bias": "model-00007-of-00008.safetensors",
|
| 720 |
+
"model.vit.transformer.resblocks.9.feed_forward.w1.weight": "model-00007-of-00008.safetensors",
|
| 721 |
+
"model.vit.transformer.resblocks.9.feed_forward.w2.bias": "model-00007-of-00008.safetensors",
|
| 722 |
+
"model.vit.transformer.resblocks.9.feed_forward.w2.weight": "model-00007-of-00008.safetensors",
|
| 723 |
+
"model.vit.transformer.resblocks.9.ffn_norm.bias": "model-00007-of-00008.safetensors",
|
| 724 |
+
"model.vit.transformer.resblocks.9.ffn_norm.weight": "model-00007-of-00008.safetensors",
|
| 725 |
+
"model.x_norm.weight": "model-00007-of-00008.safetensors"
|
|
|
|
|
|
|
| 726 |
}
|
| 727 |
}
|
modeling_molmo_point.py
CHANGED
|
@@ -1307,9 +1307,10 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1307 |
input_patch_ids = None
|
| 1308 |
can_point = False
|
| 1309 |
|
| 1310 |
-
|
|
|
|
| 1311 |
batch_size, _, dim = x.shape
|
| 1312 |
-
batch_idx = torch.arange(batch_size, device=
|
| 1313 |
|
| 1314 |
# TODO update embeddings for patch/subpatch tokens
|
| 1315 |
vit_features_flat: Optional[torch.FloatTensor] = None
|
|
@@ -1326,7 +1327,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1326 |
features = []
|
| 1327 |
for layer in self.vit_layers:
|
| 1328 |
features.append(vit_image_features[layer])
|
| 1329 |
-
vit_features = torch.cat(features, dim=-1)
|
| 1330 |
vit_feature_dim = vit_features.shape[-1]
|
| 1331 |
|
| 1332 |
# Gather the features that should be pooled to build patch embeddings
|
|
@@ -1342,7 +1343,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1342 |
vit_features_to_flat_mask = vit_features_mask.view(-1, token_pooling.shape[-1])[image_features_mask.view(-1)]
|
| 1343 |
|
| 1344 |
# Finally apply the connector and add to input embeddings
|
| 1345 |
-
image_features = self.connector(vit_features_flat, vit_features_to_flat_mask)
|
| 1346 |
x = x.clone()
|
| 1347 |
x.view(-1, dim)[is_image_token.view(-1)] += image_features.view(-1, dim)
|
| 1348 |
|
|
@@ -1350,7 +1351,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1350 |
# embeddings
|
| 1351 |
image_token_indices = torch.cumsum(is_indexable_image_token, dim=-1) - 1
|
| 1352 |
image_pos_ids_flat = image_token_indices.view(-1)[is_image_token.view(-1)]
|
| 1353 |
-
image_pos_ids = torch.zeros([batch_size, token_pooling.shape[1]], dtype=torch.long, device=
|
| 1354 |
image_pos_ids.view(-1)[image_features_mask.view(-1)] = image_pos_ids_flat
|
| 1355 |
max_image_pos_id = image_pos_ids_flat.max() + 1
|
| 1356 |
elif image_data is not None:
|
|
@@ -1374,7 +1375,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1374 |
assert last_predicted_patch_id is not None, "Patch should always be generated before a subpatch"
|
| 1375 |
for_patches = (last_predicted_patch_id.view(batch_size) + image_token_offset)[input_subpatch_ids.view(batch_size) >= 0]
|
| 1376 |
vit_features_to_embed = vit_features_flat[for_patches, input_subpatch_ids]
|
| 1377 |
-
x.view(-1, dim)[is_subpatch.view(-1)] = self.build_vit_embedding(vit_features_to_embed).to(
|
| 1378 |
|
| 1379 |
# shape: (batch_size, seq_len, d_model)
|
| 1380 |
x = self.transformer.emb_drop(x) # type: ignore
|
|
@@ -1438,7 +1439,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1438 |
|
| 1439 |
if images is not None or image_data is not None:
|
| 1440 |
if self.x_norm:
|
| 1441 |
-
x_norm = self.x_norm(x)
|
| 1442 |
elif self.config.norm_x:
|
| 1443 |
x_norm = x / math.sqrt(dim)
|
| 1444 |
else:
|
|
@@ -1452,7 +1453,8 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1452 |
patch_k_flat = self.patch_k(x_norm.view(-1, dim)[is_image_token.view(-1)])
|
| 1453 |
if self.patch_rotary is not None:
|
| 1454 |
patch_k_flat = self.patch_rotary(patch_k_flat, image_pos_ids_flat)
|
| 1455 |
-
|
|
|
|
| 1456 |
patch_k.view(-1, patch_k_flat.shape[-1])[image_features_mask.flatten()] = patch_k_flat.to(dtype=x.dtype)
|
| 1457 |
|
| 1458 |
patch_k_mask = image_features_mask.clone()
|
|
@@ -1460,14 +1462,14 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1460 |
is_indexable_image_token.view(-1)[is_image_token.view(-1)])
|
| 1461 |
|
| 1462 |
if self.config.no_more_points_class:
|
| 1463 |
-
patch_k = self.add_no_point_class_embed(patch_k)
|
| 1464 |
patch_k_mask = F.pad(patch_k_mask, (0, 1), value=True)
|
| 1465 |
|
| 1466 |
-
subpatch_k = self.subpatch_k(vit_features)
|
| 1467 |
|
| 1468 |
# Predict patch locations
|
| 1469 |
if can_point:
|
| 1470 |
-
image_q = self.patch_q(x_norm)
|
| 1471 |
if self.patch_rotary is not None and last_predicted_patch_id is not None:
|
| 1472 |
rotate_by = image_pos_ids[batch_idx, last_predicted_patch_id]
|
| 1473 |
rotate_by = torch.where(last_predicted_patch_id >= 0, rotate_by, 0)
|
|
@@ -1475,7 +1477,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1475 |
image_q = self.patch_rotary(
|
| 1476 |
image_q.view(-1, image_q.shape[-1]),
|
| 1477 |
torch.clamp(rotate_by, min=0),
|
| 1478 |
-
).reshape(batch_size, -1, image_q.shape[-1])
|
| 1479 |
|
| 1480 |
dots = torch.matmul(image_q, patch_k.transpose(1, 2)) # [batch, 1, num_images]
|
| 1481 |
if self.config.norm_logits:
|
|
@@ -1487,7 +1489,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1487 |
if can_point and torch.any(is_patch):
|
| 1488 |
if x_norm.shape[1] != 1:
|
| 1489 |
raise NotImplementedError()
|
| 1490 |
-
subpatch_point_q = self.subpatch_q(x_norm.squeeze(1))
|
| 1491 |
subpatch_k = subpatch_k[batch_idx, input_patch_ids.squeeze(1)]
|
| 1492 |
subpatch_logits = torch.einsum("pd,pcd->pc", subpatch_point_q, subpatch_k)
|
| 1493 |
if self.config.norm_logits:
|
|
@@ -1497,7 +1499,7 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1497 |
subpatch_logits = subpatch_logits[:, None, :]
|
| 1498 |
|
| 1499 |
if can_point and torch.any(is_subpatch):
|
| 1500 |
-
location_logits = self.subpatch_loc_k(x)
|
| 1501 |
|
| 1502 |
if is_prefill:
|
| 1503 |
num_image_tokens = is_image_token.sum(-1)
|
|
@@ -1534,6 +1536,17 @@ class MolmoPointModel(MolmoPointPreTrainedModel):
|
|
| 1534 |
)
|
| 1535 |
|
| 1536 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1537 |
class MolmoPointForConditionalGeneration(MolmoPointPreTrainedModel, GenerationMixin):
|
| 1538 |
_checkpoint_conversion_mapping = {}
|
| 1539 |
_tied_weights_keys = [] # Weights are not tied
|
|
@@ -1545,8 +1558,7 @@ class MolmoPointForConditionalGeneration(MolmoPointPreTrainedModel, GenerationMi
|
|
| 1545 |
super().__init__(config)
|
| 1546 |
|
| 1547 |
self.model = MolmoPointModel(config)
|
| 1548 |
-
self.
|
| 1549 |
-
self.new_output_embeddings = nn.Parameter(torch.zeros([128, config.hidden_size]))
|
| 1550 |
self.vocab_size = config.vocab_size
|
| 1551 |
|
| 1552 |
# Initialize weights and apply final processing
|
|
@@ -1675,8 +1687,7 @@ class MolmoPointForConditionalGeneration(MolmoPointPreTrainedModel, GenerationMi
|
|
| 1675 |
hidden_states = outputs.last_hidden_state
|
| 1676 |
# Only compute necessary logits, and do not upcast them to float if we are not computing the loss
|
| 1677 |
slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
|
| 1678 |
-
|
| 1679 |
-
logits = F.linear(hidden_states[:, slice_indices, :], lm_head)
|
| 1680 |
|
| 1681 |
loss = None
|
| 1682 |
if labels is not None:
|
|
@@ -1697,6 +1708,7 @@ class MolmoPointForConditionalGeneration(MolmoPointPreTrainedModel, GenerationMi
|
|
| 1697 |
# process is hard to emulate in generation frameworks
|
| 1698 |
# Our hack here is to assume that, if we generate a TOKEN, we always select the argmax
|
| 1699 |
# patch. Then we can use PATCH_TOKEN scores as the argmax's patch scores
|
|
|
|
| 1700 |
predicted_tokens = torch.argmax(logits[:, -1], dim=-1)
|
| 1701 |
patch_token_logits = torch.clone(logits[:, :, self.config.patch_token_id])
|
| 1702 |
logits[:, :, self.config.patch_token_id] = small_val
|
|
@@ -1705,8 +1717,8 @@ class MolmoPointForConditionalGeneration(MolmoPointPreTrainedModel, GenerationMi
|
|
| 1705 |
if outputs.patch_logits is not None:
|
| 1706 |
selected_patches = torch.argmax(outputs.patch_logits, -1)
|
| 1707 |
bs, seq, n_patches = outputs.patch_logits.shape
|
| 1708 |
-
batch_idx = torch.arange(outputs.patch_logits.shape[0], device=
|
| 1709 |
-
seq_ix = torch.arange(outputs.patch_logits.shape[1], device=
|
| 1710 |
argmax_patch_logits[batch_idx.view(-1, 1, 1), seq_ix.view(1, -1, 1), selected_patches] = patch_token_logits
|
| 1711 |
|
| 1712 |
logits[:, :, self.config.subpatch_token_id] = small_val
|
|
@@ -1722,7 +1734,11 @@ class MolmoPointForConditionalGeneration(MolmoPointPreTrainedModel, GenerationMi
|
|
| 1722 |
location_logits = torch.full([bs, seq, 9], small_val, dtype=logits.dtype, device=logits.device)
|
| 1723 |
|
| 1724 |
logits = torch.concatenate([
|
| 1725 |
-
logits,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1726 |
|
| 1727 |
return MolmoPointCausalLMOutputWithPast(
|
| 1728 |
loss=loss,
|
|
|
|
| 1307 |
input_patch_ids = None
|
| 1308 |
can_point = False
|
| 1309 |
|
| 1310 |
+
device = input_ids.device
|
| 1311 |
+
x = self.transformer.wte(input_ids).to(device=device)
|
| 1312 |
batch_size, _, dim = x.shape
|
| 1313 |
+
batch_idx = torch.arange(batch_size, device=device)
|
| 1314 |
|
| 1315 |
# TODO update embeddings for patch/subpatch tokens
|
| 1316 |
vit_features_flat: Optional[torch.FloatTensor] = None
|
|
|
|
| 1327 |
features = []
|
| 1328 |
for layer in self.vit_layers:
|
| 1329 |
features.append(vit_image_features[layer])
|
| 1330 |
+
vit_features = torch.cat(features, dim=-1).to(device=device)
|
| 1331 |
vit_feature_dim = vit_features.shape[-1]
|
| 1332 |
|
| 1333 |
# Gather the features that should be pooled to build patch embeddings
|
|
|
|
| 1343 |
vit_features_to_flat_mask = vit_features_mask.view(-1, token_pooling.shape[-1])[image_features_mask.view(-1)]
|
| 1344 |
|
| 1345 |
# Finally apply the connector and add to input embeddings
|
| 1346 |
+
image_features = self.connector(vit_features_flat, vit_features_to_flat_mask).to(device=device)
|
| 1347 |
x = x.clone()
|
| 1348 |
x.view(-1, dim)[is_image_token.view(-1)] += image_features.view(-1, dim)
|
| 1349 |
|
|
|
|
| 1351 |
# embeddings
|
| 1352 |
image_token_indices = torch.cumsum(is_indexable_image_token, dim=-1) - 1
|
| 1353 |
image_pos_ids_flat = image_token_indices.view(-1)[is_image_token.view(-1)]
|
| 1354 |
+
image_pos_ids = torch.zeros([batch_size, token_pooling.shape[1]], dtype=torch.long, device=device)
|
| 1355 |
image_pos_ids.view(-1)[image_features_mask.view(-1)] = image_pos_ids_flat
|
| 1356 |
max_image_pos_id = image_pos_ids_flat.max() + 1
|
| 1357 |
elif image_data is not None:
|
|
|
|
| 1375 |
assert last_predicted_patch_id is not None, "Patch should always be generated before a subpatch"
|
| 1376 |
for_patches = (last_predicted_patch_id.view(batch_size) + image_token_offset)[input_subpatch_ids.view(batch_size) >= 0]
|
| 1377 |
vit_features_to_embed = vit_features_flat[for_patches, input_subpatch_ids]
|
| 1378 |
+
x.view(-1, dim)[is_subpatch.view(-1)] = self.build_vit_embedding(vit_features_to_embed).to(device=device)
|
| 1379 |
|
| 1380 |
# shape: (batch_size, seq_len, d_model)
|
| 1381 |
x = self.transformer.emb_drop(x) # type: ignore
|
|
|
|
| 1439 |
|
| 1440 |
if images is not None or image_data is not None:
|
| 1441 |
if self.x_norm:
|
| 1442 |
+
x_norm = self.x_norm(x).to(device=device)
|
| 1443 |
elif self.config.norm_x:
|
| 1444 |
x_norm = x / math.sqrt(dim)
|
| 1445 |
else:
|
|
|
|
| 1453 |
patch_k_flat = self.patch_k(x_norm.view(-1, dim)[is_image_token.view(-1)])
|
| 1454 |
if self.patch_rotary is not None:
|
| 1455 |
patch_k_flat = self.patch_rotary(patch_k_flat, image_pos_ids_flat)
|
| 1456 |
+
patch_k_flat = patch_k_flat.to(device=device)
|
| 1457 |
+
patch_k = torch.zeros([batch_size, image_features_mask.shape[1], patch_k_flat.shape[-1]], dtype=x.dtype, device=device)
|
| 1458 |
patch_k.view(-1, patch_k_flat.shape[-1])[image_features_mask.flatten()] = patch_k_flat.to(dtype=x.dtype)
|
| 1459 |
|
| 1460 |
patch_k_mask = image_features_mask.clone()
|
|
|
|
| 1462 |
is_indexable_image_token.view(-1)[is_image_token.view(-1)])
|
| 1463 |
|
| 1464 |
if self.config.no_more_points_class:
|
| 1465 |
+
patch_k = self.add_no_point_class_embed(patch_k).to(device=device)
|
| 1466 |
patch_k_mask = F.pad(patch_k_mask, (0, 1), value=True)
|
| 1467 |
|
| 1468 |
+
subpatch_k = self.subpatch_k(vit_features).to(device=device)
|
| 1469 |
|
| 1470 |
# Predict patch locations
|
| 1471 |
if can_point:
|
| 1472 |
+
image_q = self.patch_q(x_norm).to(device=device)
|
| 1473 |
if self.patch_rotary is not None and last_predicted_patch_id is not None:
|
| 1474 |
rotate_by = image_pos_ids[batch_idx, last_predicted_patch_id]
|
| 1475 |
rotate_by = torch.where(last_predicted_patch_id >= 0, rotate_by, 0)
|
|
|
|
| 1477 |
image_q = self.patch_rotary(
|
| 1478 |
image_q.view(-1, image_q.shape[-1]),
|
| 1479 |
torch.clamp(rotate_by, min=0),
|
| 1480 |
+
).reshape(batch_size, -1, image_q.shape[-1]).to(device=device)
|
| 1481 |
|
| 1482 |
dots = torch.matmul(image_q, patch_k.transpose(1, 2)) # [batch, 1, num_images]
|
| 1483 |
if self.config.norm_logits:
|
|
|
|
| 1489 |
if can_point and torch.any(is_patch):
|
| 1490 |
if x_norm.shape[1] != 1:
|
| 1491 |
raise NotImplementedError()
|
| 1492 |
+
subpatch_point_q = self.subpatch_q(x_norm.squeeze(1)).to(device=device)
|
| 1493 |
subpatch_k = subpatch_k[batch_idx, input_patch_ids.squeeze(1)]
|
| 1494 |
subpatch_logits = torch.einsum("pd,pcd->pc", subpatch_point_q, subpatch_k)
|
| 1495 |
if self.config.norm_logits:
|
|
|
|
| 1499 |
subpatch_logits = subpatch_logits[:, None, :]
|
| 1500 |
|
| 1501 |
if can_point and torch.any(is_subpatch):
|
| 1502 |
+
location_logits = self.subpatch_loc_k(x).to(device=device)
|
| 1503 |
|
| 1504 |
if is_prefill:
|
| 1505 |
num_image_tokens = is_image_token.sum(-1)
|
|
|
|
| 1536 |
)
|
| 1537 |
|
| 1538 |
|
| 1539 |
+
class ExtendedLmHead(nn.Module):
|
| 1540 |
+
def __init__(self, config):
|
| 1541 |
+
super().__init__()
|
| 1542 |
+
self.output_embeddings = nn.Parameter(torch.zeros([config.vocab_size, config.hidden_size]))
|
| 1543 |
+
self.new_output_embeddings = nn.Parameter(torch.zeros([128, config.hidden_size]))
|
| 1544 |
+
|
| 1545 |
+
def __call__(self, hidden_states, slice_indices=None):
|
| 1546 |
+
lm_head = torch.concatenate([self.output_embeddings, self.new_output_embeddings], dim=0)
|
| 1547 |
+
return F.linear(hidden_states[:, slice_indices, :], lm_head)
|
| 1548 |
+
|
| 1549 |
+
|
| 1550 |
class MolmoPointForConditionalGeneration(MolmoPointPreTrainedModel, GenerationMixin):
|
| 1551 |
_checkpoint_conversion_mapping = {}
|
| 1552 |
_tied_weights_keys = [] # Weights are not tied
|
|
|
|
| 1558 |
super().__init__(config)
|
| 1559 |
|
| 1560 |
self.model = MolmoPointModel(config)
|
| 1561 |
+
self.lm_head = ExtendedLmHead(config)
|
|
|
|
| 1562 |
self.vocab_size = config.vocab_size
|
| 1563 |
|
| 1564 |
# Initialize weights and apply final processing
|
|
|
|
| 1687 |
hidden_states = outputs.last_hidden_state
|
| 1688 |
# Only compute necessary logits, and do not upcast them to float if we are not computing the loss
|
| 1689 |
slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
|
| 1690 |
+
logits = self.lm_head(hidden_states, slice_indices=slice_indices)
|
|
|
|
| 1691 |
|
| 1692 |
loss = None
|
| 1693 |
if labels is not None:
|
|
|
|
| 1708 |
# process is hard to emulate in generation frameworks
|
| 1709 |
# Our hack here is to assume that, if we generate a TOKEN, we always select the argmax
|
| 1710 |
# patch. Then we can use PATCH_TOKEN scores as the argmax's patch scores
|
| 1711 |
+
device = logits.device
|
| 1712 |
predicted_tokens = torch.argmax(logits[:, -1], dim=-1)
|
| 1713 |
patch_token_logits = torch.clone(logits[:, :, self.config.patch_token_id])
|
| 1714 |
logits[:, :, self.config.patch_token_id] = small_val
|
|
|
|
| 1717 |
if outputs.patch_logits is not None:
|
| 1718 |
selected_patches = torch.argmax(outputs.patch_logits, -1)
|
| 1719 |
bs, seq, n_patches = outputs.patch_logits.shape
|
| 1720 |
+
batch_idx = torch.arange(outputs.patch_logits.shape[0], device=device)
|
| 1721 |
+
seq_ix = torch.arange(outputs.patch_logits.shape[1], device=device)
|
| 1722 |
argmax_patch_logits[batch_idx.view(-1, 1, 1), seq_ix.view(1, -1, 1), selected_patches] = patch_token_logits
|
| 1723 |
|
| 1724 |
logits[:, :, self.config.subpatch_token_id] = small_val
|
|
|
|
| 1734 |
location_logits = torch.full([bs, seq, 9], small_val, dtype=logits.dtype, device=logits.device)
|
| 1735 |
|
| 1736 |
logits = torch.concatenate([
|
| 1737 |
+
logits,
|
| 1738 |
+
argmax_patch_logits,
|
| 1739 |
+
subpatch_logits.to(device=device),
|
| 1740 |
+
location_logits.to(device=device)
|
| 1741 |
+
], -1)
|
| 1742 |
|
| 1743 |
return MolmoPointCausalLMOutputWithPast(
|
| 1744 |
loss=loss,
|