Incrementally upload converted MTP layer
#2
by
Tile-AI-team
- opened
- model.safetensors.index.json +215 -7
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"model.embed_tokens.weight": "model.safetensors-00001-of-00137.safetensors",
|
|
@@ -2800,8 +2800,8 @@
|
|
| 2800 |
"layer_60_exp_upgate_scales_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
| 2801 |
"layer_60_exp_down_weights_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
| 2802 |
"layer_60_exp_down_scales_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
| 2803 |
-
"layer_61_lm_head.weight_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
| 2804 |
"layer_61_model.norm.weight_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
|
|
|
| 2805 |
"layer_0_x_rmsnorm_gamma_dev_2": "model.safetensors-00036-of-00137.safetensors",
|
| 2806 |
"layer_0_qkv_wa_weights_dev_2": "model.safetensors-00036-of-00137.safetensors",
|
| 2807 |
"layer_0_qkv_wa_scales_dev_2": "model.safetensors-00036-of-00137.safetensors",
|
|
@@ -4199,8 +4199,8 @@
|
|
| 4199 |
"layer_60_exp_upgate_scales_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
| 4200 |
"layer_60_exp_down_weights_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
| 4201 |
"layer_60_exp_down_scales_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
| 4202 |
-
"layer_61_lm_head.weight_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
| 4203 |
"layer_61_model.norm.weight_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
|
|
|
| 4204 |
"layer_0_x_rmsnorm_gamma_dev_3": "model.safetensors-00053-of-00137.safetensors",
|
| 4205 |
"layer_0_qkv_wa_weights_dev_3": "model.safetensors-00053-of-00137.safetensors",
|
| 4206 |
"layer_0_qkv_wa_scales_dev_3": "model.safetensors-00053-of-00137.safetensors",
|
|
@@ -5598,8 +5598,8 @@
|
|
| 5598 |
"layer_60_exp_upgate_scales_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
| 5599 |
"layer_60_exp_down_weights_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
| 5600 |
"layer_60_exp_down_scales_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
| 5601 |
-
"layer_61_lm_head.weight_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
| 5602 |
"layer_61_model.norm.weight_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
|
|
|
| 5603 |
"layer_0_x_rmsnorm_gamma_dev_4": "model.safetensors-00070-of-00137.safetensors",
|
| 5604 |
"layer_0_qkv_wa_weights_dev_4": "model.safetensors-00070-of-00137.safetensors",
|
| 5605 |
"layer_0_qkv_wa_scales_dev_4": "model.safetensors-00070-of-00137.safetensors",
|
|
@@ -6997,8 +6997,8 @@
|
|
| 6997 |
"layer_60_exp_upgate_scales_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
| 6998 |
"layer_60_exp_down_weights_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
| 6999 |
"layer_60_exp_down_scales_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
| 7000 |
-
"layer_61_lm_head.weight_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
| 7001 |
"layer_61_model.norm.weight_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
|
|
|
| 7002 |
"layer_0_x_rmsnorm_gamma_dev_5": "model.safetensors-00087-of-00137.safetensors",
|
| 7003 |
"layer_0_qkv_wa_weights_dev_5": "model.safetensors-00087-of-00137.safetensors",
|
| 7004 |
"layer_0_qkv_wa_scales_dev_5": "model.safetensors-00087-of-00137.safetensors",
|
|
@@ -8396,8 +8396,8 @@
|
|
| 8396 |
"layer_60_exp_upgate_scales_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
| 8397 |
"layer_60_exp_down_weights_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
| 8398 |
"layer_60_exp_down_scales_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
| 8399 |
-
"layer_61_lm_head.weight_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
| 8400 |
"layer_61_model.norm.weight_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
|
|
|
| 8401 |
"layer_0_x_rmsnorm_gamma_dev_6": "model.safetensors-00104-of-00137.safetensors",
|
| 8402 |
"layer_0_qkv_wa_weights_dev_6": "model.safetensors-00104-of-00137.safetensors",
|
| 8403 |
"layer_0_qkv_wa_scales_dev_6": "model.safetensors-00104-of-00137.safetensors",
|
|
@@ -11195,6 +11195,214 @@
|
|
| 11195 |
"layer_60_exp_down_weights_dev_7": "model.safetensors-00137-of-00137.safetensors",
|
| 11196 |
"layer_60_exp_down_scales_dev_7": "model.safetensors-00137-of-00137.safetensors",
|
| 11197 |
"layer_61_lm_head.weight_dev_7": "model.safetensors-00137-of-00137.safetensors",
|
| 11198 |
-
"layer_61_model.norm.weight_dev_7": "model.safetensors-00137-of-00137.safetensors"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11199 |
}
|
| 11200 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 97814217856
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"model.embed_tokens.weight": "model.safetensors-00001-of-00137.safetensors",
|
|
|
|
| 2800 |
"layer_60_exp_upgate_scales_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
| 2801 |
"layer_60_exp_down_weights_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
| 2802 |
"layer_60_exp_down_scales_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
|
|
|
| 2803 |
"layer_61_model.norm.weight_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
| 2804 |
+
"layer_61_lm_head.weight_dev_1": "model.safetensors-00035-of-00137.safetensors",
|
| 2805 |
"layer_0_x_rmsnorm_gamma_dev_2": "model.safetensors-00036-of-00137.safetensors",
|
| 2806 |
"layer_0_qkv_wa_weights_dev_2": "model.safetensors-00036-of-00137.safetensors",
|
| 2807 |
"layer_0_qkv_wa_scales_dev_2": "model.safetensors-00036-of-00137.safetensors",
|
|
|
|
| 4199 |
"layer_60_exp_upgate_scales_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
| 4200 |
"layer_60_exp_down_weights_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
| 4201 |
"layer_60_exp_down_scales_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
|
|
|
| 4202 |
"layer_61_model.norm.weight_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
| 4203 |
+
"layer_61_lm_head.weight_dev_2": "model.safetensors-00052-of-00137.safetensors",
|
| 4204 |
"layer_0_x_rmsnorm_gamma_dev_3": "model.safetensors-00053-of-00137.safetensors",
|
| 4205 |
"layer_0_qkv_wa_weights_dev_3": "model.safetensors-00053-of-00137.safetensors",
|
| 4206 |
"layer_0_qkv_wa_scales_dev_3": "model.safetensors-00053-of-00137.safetensors",
|
|
|
|
| 5598 |
"layer_60_exp_upgate_scales_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
| 5599 |
"layer_60_exp_down_weights_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
| 5600 |
"layer_60_exp_down_scales_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
|
|
|
| 5601 |
"layer_61_model.norm.weight_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
| 5602 |
+
"layer_61_lm_head.weight_dev_3": "model.safetensors-00069-of-00137.safetensors",
|
| 5603 |
"layer_0_x_rmsnorm_gamma_dev_4": "model.safetensors-00070-of-00137.safetensors",
|
| 5604 |
"layer_0_qkv_wa_weights_dev_4": "model.safetensors-00070-of-00137.safetensors",
|
| 5605 |
"layer_0_qkv_wa_scales_dev_4": "model.safetensors-00070-of-00137.safetensors",
|
|
|
|
| 6997 |
"layer_60_exp_upgate_scales_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
| 6998 |
"layer_60_exp_down_weights_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
| 6999 |
"layer_60_exp_down_scales_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
|
|
|
| 7000 |
"layer_61_model.norm.weight_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
| 7001 |
+
"layer_61_lm_head.weight_dev_4": "model.safetensors-00086-of-00137.safetensors",
|
| 7002 |
"layer_0_x_rmsnorm_gamma_dev_5": "model.safetensors-00087-of-00137.safetensors",
|
| 7003 |
"layer_0_qkv_wa_weights_dev_5": "model.safetensors-00087-of-00137.safetensors",
|
| 7004 |
"layer_0_qkv_wa_scales_dev_5": "model.safetensors-00087-of-00137.safetensors",
|
|
|
|
| 8396 |
"layer_60_exp_upgate_scales_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
| 8397 |
"layer_60_exp_down_weights_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
| 8398 |
"layer_60_exp_down_scales_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
|
|
|
| 8399 |
"layer_61_model.norm.weight_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
| 8400 |
+
"layer_61_lm_head.weight_dev_5": "model.safetensors-00103-of-00137.safetensors",
|
| 8401 |
"layer_0_x_rmsnorm_gamma_dev_6": "model.safetensors-00104-of-00137.safetensors",
|
| 8402 |
"layer_0_qkv_wa_weights_dev_6": "model.safetensors-00104-of-00137.safetensors",
|
| 8403 |
"layer_0_qkv_wa_scales_dev_6": "model.safetensors-00104-of-00137.safetensors",
|
|
|
|
| 11195 |
"layer_60_exp_down_weights_dev_7": "model.safetensors-00137-of-00137.safetensors",
|
| 11196 |
"layer_60_exp_down_scales_dev_7": "model.safetensors-00137-of-00137.safetensors",
|
| 11197 |
"layer_61_lm_head.weight_dev_7": "model.safetensors-00137-of-00137.safetensors",
|
| 11198 |
+
"layer_61_model.norm.weight_dev_7": "model.safetensors-00137-of-00137.safetensors",
|
| 11199 |
+
"layer_61_x_rmsnorm_gamma_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11200 |
+
"layer_61_qkv_wa_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11201 |
+
"layer_61_qkv_wa_scales_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11202 |
+
"layer_61_k_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11203 |
+
"layer_61_k_bias_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11204 |
+
"layer_61_q_rmsnorm_gamma_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11205 |
+
"layer_61_q_wb_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11206 |
+
"layer_61_q_wb_scales_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11207 |
+
"layer_61_id_score_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11208 |
+
"layer_61_wkv_b1_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11209 |
+
"layer_61_wkv_b1_scales_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11210 |
+
"layer_61_kv_rmsnorm_gamma_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11211 |
+
"layer_61_wkv_b2_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11212 |
+
"layer_61_wkv_b2_scales_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11213 |
+
"layer_61_unproj_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11214 |
+
"layer_61_unproj_scales_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11215 |
+
"layer_61_x_rmsnorm_gamma_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11216 |
+
"layer_61_qkv_wa_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11217 |
+
"layer_61_qkv_wa_scales_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11218 |
+
"layer_61_k_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11219 |
+
"layer_61_k_bias_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11220 |
+
"layer_61_q_rmsnorm_gamma_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11221 |
+
"layer_61_q_wb_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11222 |
+
"layer_61_q_wb_scales_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11223 |
+
"layer_61_id_score_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11224 |
+
"layer_61_wkv_b1_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11225 |
+
"layer_61_wkv_b1_scales_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11226 |
+
"layer_61_kv_rmsnorm_gamma_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11227 |
+
"layer_61_wkv_b2_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11228 |
+
"layer_61_wkv_b2_scales_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11229 |
+
"layer_61_unproj_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11230 |
+
"layer_61_unproj_scales_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11231 |
+
"layer_61_x_rmsnorm_gamma_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11232 |
+
"layer_61_qkv_wa_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11233 |
+
"layer_61_qkv_wa_scales_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11234 |
+
"layer_61_k_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11235 |
+
"layer_61_k_bias_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11236 |
+
"layer_61_q_rmsnorm_gamma_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11237 |
+
"layer_61_q_wb_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11238 |
+
"layer_61_q_wb_scales_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11239 |
+
"layer_61_id_score_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11240 |
+
"layer_61_wkv_b1_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11241 |
+
"layer_61_wkv_b1_scales_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11242 |
+
"layer_61_kv_rmsnorm_gamma_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11243 |
+
"layer_61_wkv_b2_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11244 |
+
"layer_61_wkv_b2_scales_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11245 |
+
"layer_61_unproj_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11246 |
+
"layer_61_unproj_scales_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11247 |
+
"layer_61_x_rmsnorm_gamma_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11248 |
+
"layer_61_qkv_wa_weights_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11249 |
+
"layer_61_qkv_wa_scales_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11250 |
+
"layer_61_k_weights_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11251 |
+
"layer_61_k_bias_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11252 |
+
"layer_61_q_rmsnorm_gamma_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11253 |
+
"layer_61_q_wb_weights_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11254 |
+
"layer_61_q_wb_scales_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11255 |
+
"layer_61_id_score_weights_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11256 |
+
"layer_61_wkv_b1_weights_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11257 |
+
"layer_61_wkv_b1_scales_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11258 |
+
"layer_61_kv_rmsnorm_gamma_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11259 |
+
"layer_61_wkv_b2_weights_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11260 |
+
"layer_61_wkv_b2_scales_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11261 |
+
"layer_61_unproj_weights_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11262 |
+
"layer_61_unproj_scales_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11263 |
+
"layer_61_x_rmsnorm_gamma_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11264 |
+
"layer_61_qkv_wa_weights_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11265 |
+
"layer_61_qkv_wa_scales_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11266 |
+
"layer_61_k_weights_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11267 |
+
"layer_61_k_bias_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11268 |
+
"layer_61_q_rmsnorm_gamma_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11269 |
+
"layer_61_q_wb_weights_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11270 |
+
"layer_61_q_wb_scales_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11271 |
+
"layer_61_id_score_weights_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11272 |
+
"layer_61_wkv_b1_weights_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11273 |
+
"layer_61_wkv_b1_scales_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11274 |
+
"layer_61_kv_rmsnorm_gamma_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11275 |
+
"layer_61_wkv_b2_weights_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11276 |
+
"layer_61_wkv_b2_scales_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11277 |
+
"layer_61_unproj_weights_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11278 |
+
"layer_61_unproj_scales_dev_4": "model_mtp_layer61-00001.safetensors",
|
| 11279 |
+
"layer_61_x_rmsnorm_gamma_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11280 |
+
"layer_61_qkv_wa_weights_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11281 |
+
"layer_61_qkv_wa_scales_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11282 |
+
"layer_61_k_weights_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11283 |
+
"layer_61_k_bias_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11284 |
+
"layer_61_q_rmsnorm_gamma_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11285 |
+
"layer_61_q_wb_weights_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11286 |
+
"layer_61_q_wb_scales_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11287 |
+
"layer_61_id_score_weights_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11288 |
+
"layer_61_wkv_b1_weights_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11289 |
+
"layer_61_wkv_b1_scales_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11290 |
+
"layer_61_kv_rmsnorm_gamma_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11291 |
+
"layer_61_wkv_b2_weights_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11292 |
+
"layer_61_wkv_b2_scales_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11293 |
+
"layer_61_unproj_weights_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11294 |
+
"layer_61_unproj_scales_dev_5": "model_mtp_layer61-00001.safetensors",
|
| 11295 |
+
"layer_61_x_rmsnorm_gamma_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11296 |
+
"layer_61_qkv_wa_weights_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11297 |
+
"layer_61_qkv_wa_scales_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11298 |
+
"layer_61_k_weights_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11299 |
+
"layer_61_k_bias_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11300 |
+
"layer_61_q_rmsnorm_gamma_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11301 |
+
"layer_61_q_wb_weights_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11302 |
+
"layer_61_q_wb_scales_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11303 |
+
"layer_61_id_score_weights_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11304 |
+
"layer_61_wkv_b1_weights_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11305 |
+
"layer_61_wkv_b1_scales_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11306 |
+
"layer_61_kv_rmsnorm_gamma_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11307 |
+
"layer_61_wkv_b2_weights_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11308 |
+
"layer_61_wkv_b2_scales_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11309 |
+
"layer_61_unproj_weights_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11310 |
+
"layer_61_unproj_scales_dev_6": "model_mtp_layer61-00001.safetensors",
|
| 11311 |
+
"layer_61_x_rmsnorm_gamma_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11312 |
+
"layer_61_qkv_wa_weights_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11313 |
+
"layer_61_qkv_wa_scales_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11314 |
+
"layer_61_k_weights_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11315 |
+
"layer_61_k_bias_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11316 |
+
"layer_61_q_rmsnorm_gamma_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11317 |
+
"layer_61_q_wb_weights_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11318 |
+
"layer_61_q_wb_scales_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11319 |
+
"layer_61_id_score_weights_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11320 |
+
"layer_61_wkv_b1_weights_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11321 |
+
"layer_61_wkv_b1_scales_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11322 |
+
"layer_61_kv_rmsnorm_gamma_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11323 |
+
"layer_61_wkv_b2_weights_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11324 |
+
"layer_61_wkv_b2_scales_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11325 |
+
"layer_61_unproj_weights_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11326 |
+
"layer_61_unproj_scales_dev_7": "model_mtp_layer61-00001.safetensors",
|
| 11327 |
+
"layer_61_unproj_o_gamma_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11328 |
+
"layer_61_exp_proj_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11329 |
+
"layer_61_exp_bias_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11330 |
+
"layer_61_exp_upgate_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11331 |
+
"layer_61_exp_upgate_scales_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11332 |
+
"layer_61_exp_down_weights_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11333 |
+
"layer_61_exp_down_scales_dev_0": "model_mtp_layer61-00001.safetensors",
|
| 11334 |
+
"layer_61_unproj_o_gamma_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11335 |
+
"layer_61_exp_proj_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11336 |
+
"layer_61_exp_bias_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11337 |
+
"layer_61_exp_upgate_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11338 |
+
"layer_61_exp_upgate_scales_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11339 |
+
"layer_61_exp_down_weights_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11340 |
+
"layer_61_exp_down_scales_dev_1": "model_mtp_layer61-00001.safetensors",
|
| 11341 |
+
"layer_61_unproj_o_gamma_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11342 |
+
"layer_61_exp_proj_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11343 |
+
"layer_61_exp_bias_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11344 |
+
"layer_61_exp_upgate_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11345 |
+
"layer_61_exp_upgate_scales_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11346 |
+
"layer_61_exp_down_weights_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11347 |
+
"layer_61_exp_down_scales_dev_2": "model_mtp_layer61-00001.safetensors",
|
| 11348 |
+
"layer_61_unproj_o_gamma_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11349 |
+
"layer_61_exp_proj_weights_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11350 |
+
"layer_61_exp_bias_dev_3": "model_mtp_layer61-00001.safetensors",
|
| 11351 |
+
"layer_61_exp_upgate_weights_dev_3": "model_mtp_layer61-00002.safetensors",
|
| 11352 |
+
"layer_61_exp_upgate_scales_dev_3": "model_mtp_layer61-00002.safetensors",
|
| 11353 |
+
"layer_61_exp_down_weights_dev_3": "model_mtp_layer61-00002.safetensors",
|
| 11354 |
+
"layer_61_exp_down_scales_dev_3": "model_mtp_layer61-00002.safetensors",
|
| 11355 |
+
"layer_61_unproj_o_gamma_dev_4": "model_mtp_layer61-00002.safetensors",
|
| 11356 |
+
"layer_61_exp_proj_weights_dev_4": "model_mtp_layer61-00002.safetensors",
|
| 11357 |
+
"layer_61_exp_bias_dev_4": "model_mtp_layer61-00002.safetensors",
|
| 11358 |
+
"layer_61_exp_upgate_weights_dev_4": "model_mtp_layer61-00002.safetensors",
|
| 11359 |
+
"layer_61_exp_upgate_scales_dev_4": "model_mtp_layer61-00002.safetensors",
|
| 11360 |
+
"layer_61_exp_down_weights_dev_4": "model_mtp_layer61-00002.safetensors",
|
| 11361 |
+
"layer_61_exp_down_scales_dev_4": "model_mtp_layer61-00002.safetensors",
|
| 11362 |
+
"layer_61_unproj_o_gamma_dev_5": "model_mtp_layer61-00002.safetensors",
|
| 11363 |
+
"layer_61_exp_proj_weights_dev_5": "model_mtp_layer61-00002.safetensors",
|
| 11364 |
+
"layer_61_exp_bias_dev_5": "model_mtp_layer61-00002.safetensors",
|
| 11365 |
+
"layer_61_exp_upgate_weights_dev_5": "model_mtp_layer61-00002.safetensors",
|
| 11366 |
+
"layer_61_exp_upgate_scales_dev_5": "model_mtp_layer61-00002.safetensors",
|
| 11367 |
+
"layer_61_exp_down_weights_dev_5": "model_mtp_layer61-00002.safetensors",
|
| 11368 |
+
"layer_61_exp_down_scales_dev_5": "model_mtp_layer61-00002.safetensors",
|
| 11369 |
+
"layer_61_unproj_o_gamma_dev_6": "model_mtp_layer61-00002.safetensors",
|
| 11370 |
+
"layer_61_exp_proj_weights_dev_6": "model_mtp_layer61-00002.safetensors",
|
| 11371 |
+
"layer_61_exp_bias_dev_6": "model_mtp_layer61-00002.safetensors",
|
| 11372 |
+
"layer_61_exp_upgate_weights_dev_6": "model_mtp_layer61-00002.safetensors",
|
| 11373 |
+
"layer_61_exp_upgate_scales_dev_6": "model_mtp_layer61-00002.safetensors",
|
| 11374 |
+
"layer_61_exp_down_weights_dev_6": "model_mtp_layer61-00003.safetensors",
|
| 11375 |
+
"layer_61_exp_down_scales_dev_6": "model_mtp_layer61-00003.safetensors",
|
| 11376 |
+
"layer_61_unproj_o_gamma_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11377 |
+
"layer_61_exp_proj_weights_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11378 |
+
"layer_61_exp_bias_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11379 |
+
"layer_61_exp_upgate_weights_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11380 |
+
"layer_61_exp_upgate_scales_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11381 |
+
"layer_61_exp_down_weights_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11382 |
+
"layer_61_exp_down_scales_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11383 |
+
"layer_61_embedding_rmsnorm_gamma_dev_0": "model_mtp_layer61-00003.safetensors",
|
| 11384 |
+
"layer_61_hidden_rmsnorm_gamma_dev_0": "model_mtp_layer61-00003.safetensors",
|
| 11385 |
+
"layer_61_eh_proj_weights_dev_0": "model_mtp_layer61-00003.safetensors",
|
| 11386 |
+
"layer_61_embedding_rmsnorm_gamma_dev_1": "model_mtp_layer61-00003.safetensors",
|
| 11387 |
+
"layer_61_hidden_rmsnorm_gamma_dev_1": "model_mtp_layer61-00003.safetensors",
|
| 11388 |
+
"layer_61_eh_proj_weights_dev_1": "model_mtp_layer61-00003.safetensors",
|
| 11389 |
+
"layer_61_embedding_rmsnorm_gamma_dev_2": "model_mtp_layer61-00003.safetensors",
|
| 11390 |
+
"layer_61_hidden_rmsnorm_gamma_dev_2": "model_mtp_layer61-00003.safetensors",
|
| 11391 |
+
"layer_61_eh_proj_weights_dev_2": "model_mtp_layer61-00003.safetensors",
|
| 11392 |
+
"layer_61_embedding_rmsnorm_gamma_dev_3": "model_mtp_layer61-00003.safetensors",
|
| 11393 |
+
"layer_61_hidden_rmsnorm_gamma_dev_3": "model_mtp_layer61-00003.safetensors",
|
| 11394 |
+
"layer_61_eh_proj_weights_dev_3": "model_mtp_layer61-00003.safetensors",
|
| 11395 |
+
"layer_61_embedding_rmsnorm_gamma_dev_4": "model_mtp_layer61-00003.safetensors",
|
| 11396 |
+
"layer_61_hidden_rmsnorm_gamma_dev_4": "model_mtp_layer61-00003.safetensors",
|
| 11397 |
+
"layer_61_eh_proj_weights_dev_4": "model_mtp_layer61-00003.safetensors",
|
| 11398 |
+
"layer_61_embedding_rmsnorm_gamma_dev_5": "model_mtp_layer61-00003.safetensors",
|
| 11399 |
+
"layer_61_hidden_rmsnorm_gamma_dev_5": "model_mtp_layer61-00003.safetensors",
|
| 11400 |
+
"layer_61_eh_proj_weights_dev_5": "model_mtp_layer61-00003.safetensors",
|
| 11401 |
+
"layer_61_embedding_rmsnorm_gamma_dev_6": "model_mtp_layer61-00003.safetensors",
|
| 11402 |
+
"layer_61_hidden_rmsnorm_gamma_dev_6": "model_mtp_layer61-00003.safetensors",
|
| 11403 |
+
"layer_61_eh_proj_weights_dev_6": "model_mtp_layer61-00003.safetensors",
|
| 11404 |
+
"layer_61_embedding_rmsnorm_gamma_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11405 |
+
"layer_61_hidden_rmsnorm_gamma_dev_7": "model_mtp_layer61-00003.safetensors",
|
| 11406 |
+
"layer_61_eh_proj_weights_dev_7": "model_mtp_layer61-00003.safetensors"
|
| 11407 |
}
|
| 11408 |
}
|