Incrementally upload converted MTP layer

#2
Files changed (1) hide show
  1. model.safetensors.index.json +215 -7
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 688586654720
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model.safetensors-00001-of-00137.safetensors",
@@ -2800,8 +2800,8 @@
2800
  "layer_60_exp_upgate_scales_dev_1": "model.safetensors-00035-of-00137.safetensors",
2801
  "layer_60_exp_down_weights_dev_1": "model.safetensors-00035-of-00137.safetensors",
2802
  "layer_60_exp_down_scales_dev_1": "model.safetensors-00035-of-00137.safetensors",
2803
- "layer_61_lm_head.weight_dev_1": "model.safetensors-00035-of-00137.safetensors",
2804
  "layer_61_model.norm.weight_dev_1": "model.safetensors-00035-of-00137.safetensors",
 
2805
  "layer_0_x_rmsnorm_gamma_dev_2": "model.safetensors-00036-of-00137.safetensors",
2806
  "layer_0_qkv_wa_weights_dev_2": "model.safetensors-00036-of-00137.safetensors",
2807
  "layer_0_qkv_wa_scales_dev_2": "model.safetensors-00036-of-00137.safetensors",
@@ -4199,8 +4199,8 @@
4199
  "layer_60_exp_upgate_scales_dev_2": "model.safetensors-00052-of-00137.safetensors",
4200
  "layer_60_exp_down_weights_dev_2": "model.safetensors-00052-of-00137.safetensors",
4201
  "layer_60_exp_down_scales_dev_2": "model.safetensors-00052-of-00137.safetensors",
4202
- "layer_61_lm_head.weight_dev_2": "model.safetensors-00052-of-00137.safetensors",
4203
  "layer_61_model.norm.weight_dev_2": "model.safetensors-00052-of-00137.safetensors",
 
4204
  "layer_0_x_rmsnorm_gamma_dev_3": "model.safetensors-00053-of-00137.safetensors",
4205
  "layer_0_qkv_wa_weights_dev_3": "model.safetensors-00053-of-00137.safetensors",
4206
  "layer_0_qkv_wa_scales_dev_3": "model.safetensors-00053-of-00137.safetensors",
@@ -5598,8 +5598,8 @@
5598
  "layer_60_exp_upgate_scales_dev_3": "model.safetensors-00069-of-00137.safetensors",
5599
  "layer_60_exp_down_weights_dev_3": "model.safetensors-00069-of-00137.safetensors",
5600
  "layer_60_exp_down_scales_dev_3": "model.safetensors-00069-of-00137.safetensors",
5601
- "layer_61_lm_head.weight_dev_3": "model.safetensors-00069-of-00137.safetensors",
5602
  "layer_61_model.norm.weight_dev_3": "model.safetensors-00069-of-00137.safetensors",
 
5603
  "layer_0_x_rmsnorm_gamma_dev_4": "model.safetensors-00070-of-00137.safetensors",
5604
  "layer_0_qkv_wa_weights_dev_4": "model.safetensors-00070-of-00137.safetensors",
5605
  "layer_0_qkv_wa_scales_dev_4": "model.safetensors-00070-of-00137.safetensors",
@@ -6997,8 +6997,8 @@
6997
  "layer_60_exp_upgate_scales_dev_4": "model.safetensors-00086-of-00137.safetensors",
6998
  "layer_60_exp_down_weights_dev_4": "model.safetensors-00086-of-00137.safetensors",
6999
  "layer_60_exp_down_scales_dev_4": "model.safetensors-00086-of-00137.safetensors",
7000
- "layer_61_lm_head.weight_dev_4": "model.safetensors-00086-of-00137.safetensors",
7001
  "layer_61_model.norm.weight_dev_4": "model.safetensors-00086-of-00137.safetensors",
 
7002
  "layer_0_x_rmsnorm_gamma_dev_5": "model.safetensors-00087-of-00137.safetensors",
7003
  "layer_0_qkv_wa_weights_dev_5": "model.safetensors-00087-of-00137.safetensors",
7004
  "layer_0_qkv_wa_scales_dev_5": "model.safetensors-00087-of-00137.safetensors",
@@ -8396,8 +8396,8 @@
8396
  "layer_60_exp_upgate_scales_dev_5": "model.safetensors-00103-of-00137.safetensors",
8397
  "layer_60_exp_down_weights_dev_5": "model.safetensors-00103-of-00137.safetensors",
8398
  "layer_60_exp_down_scales_dev_5": "model.safetensors-00103-of-00137.safetensors",
8399
- "layer_61_lm_head.weight_dev_5": "model.safetensors-00103-of-00137.safetensors",
8400
  "layer_61_model.norm.weight_dev_5": "model.safetensors-00103-of-00137.safetensors",
 
8401
  "layer_0_x_rmsnorm_gamma_dev_6": "model.safetensors-00104-of-00137.safetensors",
8402
  "layer_0_qkv_wa_weights_dev_6": "model.safetensors-00104-of-00137.safetensors",
8403
  "layer_0_qkv_wa_scales_dev_6": "model.safetensors-00104-of-00137.safetensors",
@@ -11195,6 +11195,214 @@
11195
  "layer_60_exp_down_weights_dev_7": "model.safetensors-00137-of-00137.safetensors",
11196
  "layer_60_exp_down_scales_dev_7": "model.safetensors-00137-of-00137.safetensors",
11197
  "layer_61_lm_head.weight_dev_7": "model.safetensors-00137-of-00137.safetensors",
11198
- "layer_61_model.norm.weight_dev_7": "model.safetensors-00137-of-00137.safetensors"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11199
  }
11200
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 97814217856
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model.safetensors-00001-of-00137.safetensors",
 
2800
  "layer_60_exp_upgate_scales_dev_1": "model.safetensors-00035-of-00137.safetensors",
2801
  "layer_60_exp_down_weights_dev_1": "model.safetensors-00035-of-00137.safetensors",
2802
  "layer_60_exp_down_scales_dev_1": "model.safetensors-00035-of-00137.safetensors",
 
2803
  "layer_61_model.norm.weight_dev_1": "model.safetensors-00035-of-00137.safetensors",
2804
+ "layer_61_lm_head.weight_dev_1": "model.safetensors-00035-of-00137.safetensors",
2805
  "layer_0_x_rmsnorm_gamma_dev_2": "model.safetensors-00036-of-00137.safetensors",
2806
  "layer_0_qkv_wa_weights_dev_2": "model.safetensors-00036-of-00137.safetensors",
2807
  "layer_0_qkv_wa_scales_dev_2": "model.safetensors-00036-of-00137.safetensors",
 
4199
  "layer_60_exp_upgate_scales_dev_2": "model.safetensors-00052-of-00137.safetensors",
4200
  "layer_60_exp_down_weights_dev_2": "model.safetensors-00052-of-00137.safetensors",
4201
  "layer_60_exp_down_scales_dev_2": "model.safetensors-00052-of-00137.safetensors",
 
4202
  "layer_61_model.norm.weight_dev_2": "model.safetensors-00052-of-00137.safetensors",
4203
+ "layer_61_lm_head.weight_dev_2": "model.safetensors-00052-of-00137.safetensors",
4204
  "layer_0_x_rmsnorm_gamma_dev_3": "model.safetensors-00053-of-00137.safetensors",
4205
  "layer_0_qkv_wa_weights_dev_3": "model.safetensors-00053-of-00137.safetensors",
4206
  "layer_0_qkv_wa_scales_dev_3": "model.safetensors-00053-of-00137.safetensors",
 
5598
  "layer_60_exp_upgate_scales_dev_3": "model.safetensors-00069-of-00137.safetensors",
5599
  "layer_60_exp_down_weights_dev_3": "model.safetensors-00069-of-00137.safetensors",
5600
  "layer_60_exp_down_scales_dev_3": "model.safetensors-00069-of-00137.safetensors",
 
5601
  "layer_61_model.norm.weight_dev_3": "model.safetensors-00069-of-00137.safetensors",
5602
+ "layer_61_lm_head.weight_dev_3": "model.safetensors-00069-of-00137.safetensors",
5603
  "layer_0_x_rmsnorm_gamma_dev_4": "model.safetensors-00070-of-00137.safetensors",
5604
  "layer_0_qkv_wa_weights_dev_4": "model.safetensors-00070-of-00137.safetensors",
5605
  "layer_0_qkv_wa_scales_dev_4": "model.safetensors-00070-of-00137.safetensors",
 
6997
  "layer_60_exp_upgate_scales_dev_4": "model.safetensors-00086-of-00137.safetensors",
6998
  "layer_60_exp_down_weights_dev_4": "model.safetensors-00086-of-00137.safetensors",
6999
  "layer_60_exp_down_scales_dev_4": "model.safetensors-00086-of-00137.safetensors",
 
7000
  "layer_61_model.norm.weight_dev_4": "model.safetensors-00086-of-00137.safetensors",
7001
+ "layer_61_lm_head.weight_dev_4": "model.safetensors-00086-of-00137.safetensors",
7002
  "layer_0_x_rmsnorm_gamma_dev_5": "model.safetensors-00087-of-00137.safetensors",
7003
  "layer_0_qkv_wa_weights_dev_5": "model.safetensors-00087-of-00137.safetensors",
7004
  "layer_0_qkv_wa_scales_dev_5": "model.safetensors-00087-of-00137.safetensors",
 
8396
  "layer_60_exp_upgate_scales_dev_5": "model.safetensors-00103-of-00137.safetensors",
8397
  "layer_60_exp_down_weights_dev_5": "model.safetensors-00103-of-00137.safetensors",
8398
  "layer_60_exp_down_scales_dev_5": "model.safetensors-00103-of-00137.safetensors",
 
8399
  "layer_61_model.norm.weight_dev_5": "model.safetensors-00103-of-00137.safetensors",
8400
+ "layer_61_lm_head.weight_dev_5": "model.safetensors-00103-of-00137.safetensors",
8401
  "layer_0_x_rmsnorm_gamma_dev_6": "model.safetensors-00104-of-00137.safetensors",
8402
  "layer_0_qkv_wa_weights_dev_6": "model.safetensors-00104-of-00137.safetensors",
8403
  "layer_0_qkv_wa_scales_dev_6": "model.safetensors-00104-of-00137.safetensors",
 
11195
  "layer_60_exp_down_weights_dev_7": "model.safetensors-00137-of-00137.safetensors",
11196
  "layer_60_exp_down_scales_dev_7": "model.safetensors-00137-of-00137.safetensors",
11197
  "layer_61_lm_head.weight_dev_7": "model.safetensors-00137-of-00137.safetensors",
11198
+ "layer_61_model.norm.weight_dev_7": "model.safetensors-00137-of-00137.safetensors",
11199
+ "layer_61_x_rmsnorm_gamma_dev_0": "model_mtp_layer61-00001.safetensors",
11200
+ "layer_61_qkv_wa_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11201
+ "layer_61_qkv_wa_scales_dev_0": "model_mtp_layer61-00001.safetensors",
11202
+ "layer_61_k_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11203
+ "layer_61_k_bias_dev_0": "model_mtp_layer61-00001.safetensors",
11204
+ "layer_61_q_rmsnorm_gamma_dev_0": "model_mtp_layer61-00001.safetensors",
11205
+ "layer_61_q_wb_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11206
+ "layer_61_q_wb_scales_dev_0": "model_mtp_layer61-00001.safetensors",
11207
+ "layer_61_id_score_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11208
+ "layer_61_wkv_b1_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11209
+ "layer_61_wkv_b1_scales_dev_0": "model_mtp_layer61-00001.safetensors",
11210
+ "layer_61_kv_rmsnorm_gamma_dev_0": "model_mtp_layer61-00001.safetensors",
11211
+ "layer_61_wkv_b2_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11212
+ "layer_61_wkv_b2_scales_dev_0": "model_mtp_layer61-00001.safetensors",
11213
+ "layer_61_unproj_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11214
+ "layer_61_unproj_scales_dev_0": "model_mtp_layer61-00001.safetensors",
11215
+ "layer_61_x_rmsnorm_gamma_dev_1": "model_mtp_layer61-00001.safetensors",
11216
+ "layer_61_qkv_wa_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11217
+ "layer_61_qkv_wa_scales_dev_1": "model_mtp_layer61-00001.safetensors",
11218
+ "layer_61_k_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11219
+ "layer_61_k_bias_dev_1": "model_mtp_layer61-00001.safetensors",
11220
+ "layer_61_q_rmsnorm_gamma_dev_1": "model_mtp_layer61-00001.safetensors",
11221
+ "layer_61_q_wb_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11222
+ "layer_61_q_wb_scales_dev_1": "model_mtp_layer61-00001.safetensors",
11223
+ "layer_61_id_score_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11224
+ "layer_61_wkv_b1_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11225
+ "layer_61_wkv_b1_scales_dev_1": "model_mtp_layer61-00001.safetensors",
11226
+ "layer_61_kv_rmsnorm_gamma_dev_1": "model_mtp_layer61-00001.safetensors",
11227
+ "layer_61_wkv_b2_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11228
+ "layer_61_wkv_b2_scales_dev_1": "model_mtp_layer61-00001.safetensors",
11229
+ "layer_61_unproj_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11230
+ "layer_61_unproj_scales_dev_1": "model_mtp_layer61-00001.safetensors",
11231
+ "layer_61_x_rmsnorm_gamma_dev_2": "model_mtp_layer61-00001.safetensors",
11232
+ "layer_61_qkv_wa_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11233
+ "layer_61_qkv_wa_scales_dev_2": "model_mtp_layer61-00001.safetensors",
11234
+ "layer_61_k_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11235
+ "layer_61_k_bias_dev_2": "model_mtp_layer61-00001.safetensors",
11236
+ "layer_61_q_rmsnorm_gamma_dev_2": "model_mtp_layer61-00001.safetensors",
11237
+ "layer_61_q_wb_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11238
+ "layer_61_q_wb_scales_dev_2": "model_mtp_layer61-00001.safetensors",
11239
+ "layer_61_id_score_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11240
+ "layer_61_wkv_b1_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11241
+ "layer_61_wkv_b1_scales_dev_2": "model_mtp_layer61-00001.safetensors",
11242
+ "layer_61_kv_rmsnorm_gamma_dev_2": "model_mtp_layer61-00001.safetensors",
11243
+ "layer_61_wkv_b2_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11244
+ "layer_61_wkv_b2_scales_dev_2": "model_mtp_layer61-00001.safetensors",
11245
+ "layer_61_unproj_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11246
+ "layer_61_unproj_scales_dev_2": "model_mtp_layer61-00001.safetensors",
11247
+ "layer_61_x_rmsnorm_gamma_dev_3": "model_mtp_layer61-00001.safetensors",
11248
+ "layer_61_qkv_wa_weights_dev_3": "model_mtp_layer61-00001.safetensors",
11249
+ "layer_61_qkv_wa_scales_dev_3": "model_mtp_layer61-00001.safetensors",
11250
+ "layer_61_k_weights_dev_3": "model_mtp_layer61-00001.safetensors",
11251
+ "layer_61_k_bias_dev_3": "model_mtp_layer61-00001.safetensors",
11252
+ "layer_61_q_rmsnorm_gamma_dev_3": "model_mtp_layer61-00001.safetensors",
11253
+ "layer_61_q_wb_weights_dev_3": "model_mtp_layer61-00001.safetensors",
11254
+ "layer_61_q_wb_scales_dev_3": "model_mtp_layer61-00001.safetensors",
11255
+ "layer_61_id_score_weights_dev_3": "model_mtp_layer61-00001.safetensors",
11256
+ "layer_61_wkv_b1_weights_dev_3": "model_mtp_layer61-00001.safetensors",
11257
+ "layer_61_wkv_b1_scales_dev_3": "model_mtp_layer61-00001.safetensors",
11258
+ "layer_61_kv_rmsnorm_gamma_dev_3": "model_mtp_layer61-00001.safetensors",
11259
+ "layer_61_wkv_b2_weights_dev_3": "model_mtp_layer61-00001.safetensors",
11260
+ "layer_61_wkv_b2_scales_dev_3": "model_mtp_layer61-00001.safetensors",
11261
+ "layer_61_unproj_weights_dev_3": "model_mtp_layer61-00001.safetensors",
11262
+ "layer_61_unproj_scales_dev_3": "model_mtp_layer61-00001.safetensors",
11263
+ "layer_61_x_rmsnorm_gamma_dev_4": "model_mtp_layer61-00001.safetensors",
11264
+ "layer_61_qkv_wa_weights_dev_4": "model_mtp_layer61-00001.safetensors",
11265
+ "layer_61_qkv_wa_scales_dev_4": "model_mtp_layer61-00001.safetensors",
11266
+ "layer_61_k_weights_dev_4": "model_mtp_layer61-00001.safetensors",
11267
+ "layer_61_k_bias_dev_4": "model_mtp_layer61-00001.safetensors",
11268
+ "layer_61_q_rmsnorm_gamma_dev_4": "model_mtp_layer61-00001.safetensors",
11269
+ "layer_61_q_wb_weights_dev_4": "model_mtp_layer61-00001.safetensors",
11270
+ "layer_61_q_wb_scales_dev_4": "model_mtp_layer61-00001.safetensors",
11271
+ "layer_61_id_score_weights_dev_4": "model_mtp_layer61-00001.safetensors",
11272
+ "layer_61_wkv_b1_weights_dev_4": "model_mtp_layer61-00001.safetensors",
11273
+ "layer_61_wkv_b1_scales_dev_4": "model_mtp_layer61-00001.safetensors",
11274
+ "layer_61_kv_rmsnorm_gamma_dev_4": "model_mtp_layer61-00001.safetensors",
11275
+ "layer_61_wkv_b2_weights_dev_4": "model_mtp_layer61-00001.safetensors",
11276
+ "layer_61_wkv_b2_scales_dev_4": "model_mtp_layer61-00001.safetensors",
11277
+ "layer_61_unproj_weights_dev_4": "model_mtp_layer61-00001.safetensors",
11278
+ "layer_61_unproj_scales_dev_4": "model_mtp_layer61-00001.safetensors",
11279
+ "layer_61_x_rmsnorm_gamma_dev_5": "model_mtp_layer61-00001.safetensors",
11280
+ "layer_61_qkv_wa_weights_dev_5": "model_mtp_layer61-00001.safetensors",
11281
+ "layer_61_qkv_wa_scales_dev_5": "model_mtp_layer61-00001.safetensors",
11282
+ "layer_61_k_weights_dev_5": "model_mtp_layer61-00001.safetensors",
11283
+ "layer_61_k_bias_dev_5": "model_mtp_layer61-00001.safetensors",
11284
+ "layer_61_q_rmsnorm_gamma_dev_5": "model_mtp_layer61-00001.safetensors",
11285
+ "layer_61_q_wb_weights_dev_5": "model_mtp_layer61-00001.safetensors",
11286
+ "layer_61_q_wb_scales_dev_5": "model_mtp_layer61-00001.safetensors",
11287
+ "layer_61_id_score_weights_dev_5": "model_mtp_layer61-00001.safetensors",
11288
+ "layer_61_wkv_b1_weights_dev_5": "model_mtp_layer61-00001.safetensors",
11289
+ "layer_61_wkv_b1_scales_dev_5": "model_mtp_layer61-00001.safetensors",
11290
+ "layer_61_kv_rmsnorm_gamma_dev_5": "model_mtp_layer61-00001.safetensors",
11291
+ "layer_61_wkv_b2_weights_dev_5": "model_mtp_layer61-00001.safetensors",
11292
+ "layer_61_wkv_b2_scales_dev_5": "model_mtp_layer61-00001.safetensors",
11293
+ "layer_61_unproj_weights_dev_5": "model_mtp_layer61-00001.safetensors",
11294
+ "layer_61_unproj_scales_dev_5": "model_mtp_layer61-00001.safetensors",
11295
+ "layer_61_x_rmsnorm_gamma_dev_6": "model_mtp_layer61-00001.safetensors",
11296
+ "layer_61_qkv_wa_weights_dev_6": "model_mtp_layer61-00001.safetensors",
11297
+ "layer_61_qkv_wa_scales_dev_6": "model_mtp_layer61-00001.safetensors",
11298
+ "layer_61_k_weights_dev_6": "model_mtp_layer61-00001.safetensors",
11299
+ "layer_61_k_bias_dev_6": "model_mtp_layer61-00001.safetensors",
11300
+ "layer_61_q_rmsnorm_gamma_dev_6": "model_mtp_layer61-00001.safetensors",
11301
+ "layer_61_q_wb_weights_dev_6": "model_mtp_layer61-00001.safetensors",
11302
+ "layer_61_q_wb_scales_dev_6": "model_mtp_layer61-00001.safetensors",
11303
+ "layer_61_id_score_weights_dev_6": "model_mtp_layer61-00001.safetensors",
11304
+ "layer_61_wkv_b1_weights_dev_6": "model_mtp_layer61-00001.safetensors",
11305
+ "layer_61_wkv_b1_scales_dev_6": "model_mtp_layer61-00001.safetensors",
11306
+ "layer_61_kv_rmsnorm_gamma_dev_6": "model_mtp_layer61-00001.safetensors",
11307
+ "layer_61_wkv_b2_weights_dev_6": "model_mtp_layer61-00001.safetensors",
11308
+ "layer_61_wkv_b2_scales_dev_6": "model_mtp_layer61-00001.safetensors",
11309
+ "layer_61_unproj_weights_dev_6": "model_mtp_layer61-00001.safetensors",
11310
+ "layer_61_unproj_scales_dev_6": "model_mtp_layer61-00001.safetensors",
11311
+ "layer_61_x_rmsnorm_gamma_dev_7": "model_mtp_layer61-00001.safetensors",
11312
+ "layer_61_qkv_wa_weights_dev_7": "model_mtp_layer61-00001.safetensors",
11313
+ "layer_61_qkv_wa_scales_dev_7": "model_mtp_layer61-00001.safetensors",
11314
+ "layer_61_k_weights_dev_7": "model_mtp_layer61-00001.safetensors",
11315
+ "layer_61_k_bias_dev_7": "model_mtp_layer61-00001.safetensors",
11316
+ "layer_61_q_rmsnorm_gamma_dev_7": "model_mtp_layer61-00001.safetensors",
11317
+ "layer_61_q_wb_weights_dev_7": "model_mtp_layer61-00001.safetensors",
11318
+ "layer_61_q_wb_scales_dev_7": "model_mtp_layer61-00001.safetensors",
11319
+ "layer_61_id_score_weights_dev_7": "model_mtp_layer61-00001.safetensors",
11320
+ "layer_61_wkv_b1_weights_dev_7": "model_mtp_layer61-00001.safetensors",
11321
+ "layer_61_wkv_b1_scales_dev_7": "model_mtp_layer61-00001.safetensors",
11322
+ "layer_61_kv_rmsnorm_gamma_dev_7": "model_mtp_layer61-00001.safetensors",
11323
+ "layer_61_wkv_b2_weights_dev_7": "model_mtp_layer61-00001.safetensors",
11324
+ "layer_61_wkv_b2_scales_dev_7": "model_mtp_layer61-00001.safetensors",
11325
+ "layer_61_unproj_weights_dev_7": "model_mtp_layer61-00001.safetensors",
11326
+ "layer_61_unproj_scales_dev_7": "model_mtp_layer61-00001.safetensors",
11327
+ "layer_61_unproj_o_gamma_dev_0": "model_mtp_layer61-00001.safetensors",
11328
+ "layer_61_exp_proj_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11329
+ "layer_61_exp_bias_dev_0": "model_mtp_layer61-00001.safetensors",
11330
+ "layer_61_exp_upgate_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11331
+ "layer_61_exp_upgate_scales_dev_0": "model_mtp_layer61-00001.safetensors",
11332
+ "layer_61_exp_down_weights_dev_0": "model_mtp_layer61-00001.safetensors",
11333
+ "layer_61_exp_down_scales_dev_0": "model_mtp_layer61-00001.safetensors",
11334
+ "layer_61_unproj_o_gamma_dev_1": "model_mtp_layer61-00001.safetensors",
11335
+ "layer_61_exp_proj_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11336
+ "layer_61_exp_bias_dev_1": "model_mtp_layer61-00001.safetensors",
11337
+ "layer_61_exp_upgate_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11338
+ "layer_61_exp_upgate_scales_dev_1": "model_mtp_layer61-00001.safetensors",
11339
+ "layer_61_exp_down_weights_dev_1": "model_mtp_layer61-00001.safetensors",
11340
+ "layer_61_exp_down_scales_dev_1": "model_mtp_layer61-00001.safetensors",
11341
+ "layer_61_unproj_o_gamma_dev_2": "model_mtp_layer61-00001.safetensors",
11342
+ "layer_61_exp_proj_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11343
+ "layer_61_exp_bias_dev_2": "model_mtp_layer61-00001.safetensors",
11344
+ "layer_61_exp_upgate_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11345
+ "layer_61_exp_upgate_scales_dev_2": "model_mtp_layer61-00001.safetensors",
11346
+ "layer_61_exp_down_weights_dev_2": "model_mtp_layer61-00001.safetensors",
11347
+ "layer_61_exp_down_scales_dev_2": "model_mtp_layer61-00001.safetensors",
11348
+ "layer_61_unproj_o_gamma_dev_3": "model_mtp_layer61-00001.safetensors",
11349
+ "layer_61_exp_proj_weights_dev_3": "model_mtp_layer61-00001.safetensors",
11350
+ "layer_61_exp_bias_dev_3": "model_mtp_layer61-00001.safetensors",
11351
+ "layer_61_exp_upgate_weights_dev_3": "model_mtp_layer61-00002.safetensors",
11352
+ "layer_61_exp_upgate_scales_dev_3": "model_mtp_layer61-00002.safetensors",
11353
+ "layer_61_exp_down_weights_dev_3": "model_mtp_layer61-00002.safetensors",
11354
+ "layer_61_exp_down_scales_dev_3": "model_mtp_layer61-00002.safetensors",
11355
+ "layer_61_unproj_o_gamma_dev_4": "model_mtp_layer61-00002.safetensors",
11356
+ "layer_61_exp_proj_weights_dev_4": "model_mtp_layer61-00002.safetensors",
11357
+ "layer_61_exp_bias_dev_4": "model_mtp_layer61-00002.safetensors",
11358
+ "layer_61_exp_upgate_weights_dev_4": "model_mtp_layer61-00002.safetensors",
11359
+ "layer_61_exp_upgate_scales_dev_4": "model_mtp_layer61-00002.safetensors",
11360
+ "layer_61_exp_down_weights_dev_4": "model_mtp_layer61-00002.safetensors",
11361
+ "layer_61_exp_down_scales_dev_4": "model_mtp_layer61-00002.safetensors",
11362
+ "layer_61_unproj_o_gamma_dev_5": "model_mtp_layer61-00002.safetensors",
11363
+ "layer_61_exp_proj_weights_dev_5": "model_mtp_layer61-00002.safetensors",
11364
+ "layer_61_exp_bias_dev_5": "model_mtp_layer61-00002.safetensors",
11365
+ "layer_61_exp_upgate_weights_dev_5": "model_mtp_layer61-00002.safetensors",
11366
+ "layer_61_exp_upgate_scales_dev_5": "model_mtp_layer61-00002.safetensors",
11367
+ "layer_61_exp_down_weights_dev_5": "model_mtp_layer61-00002.safetensors",
11368
+ "layer_61_exp_down_scales_dev_5": "model_mtp_layer61-00002.safetensors",
11369
+ "layer_61_unproj_o_gamma_dev_6": "model_mtp_layer61-00002.safetensors",
11370
+ "layer_61_exp_proj_weights_dev_6": "model_mtp_layer61-00002.safetensors",
11371
+ "layer_61_exp_bias_dev_6": "model_mtp_layer61-00002.safetensors",
11372
+ "layer_61_exp_upgate_weights_dev_6": "model_mtp_layer61-00002.safetensors",
11373
+ "layer_61_exp_upgate_scales_dev_6": "model_mtp_layer61-00002.safetensors",
11374
+ "layer_61_exp_down_weights_dev_6": "model_mtp_layer61-00003.safetensors",
11375
+ "layer_61_exp_down_scales_dev_6": "model_mtp_layer61-00003.safetensors",
11376
+ "layer_61_unproj_o_gamma_dev_7": "model_mtp_layer61-00003.safetensors",
11377
+ "layer_61_exp_proj_weights_dev_7": "model_mtp_layer61-00003.safetensors",
11378
+ "layer_61_exp_bias_dev_7": "model_mtp_layer61-00003.safetensors",
11379
+ "layer_61_exp_upgate_weights_dev_7": "model_mtp_layer61-00003.safetensors",
11380
+ "layer_61_exp_upgate_scales_dev_7": "model_mtp_layer61-00003.safetensors",
11381
+ "layer_61_exp_down_weights_dev_7": "model_mtp_layer61-00003.safetensors",
11382
+ "layer_61_exp_down_scales_dev_7": "model_mtp_layer61-00003.safetensors",
11383
+ "layer_61_embedding_rmsnorm_gamma_dev_0": "model_mtp_layer61-00003.safetensors",
11384
+ "layer_61_hidden_rmsnorm_gamma_dev_0": "model_mtp_layer61-00003.safetensors",
11385
+ "layer_61_eh_proj_weights_dev_0": "model_mtp_layer61-00003.safetensors",
11386
+ "layer_61_embedding_rmsnorm_gamma_dev_1": "model_mtp_layer61-00003.safetensors",
11387
+ "layer_61_hidden_rmsnorm_gamma_dev_1": "model_mtp_layer61-00003.safetensors",
11388
+ "layer_61_eh_proj_weights_dev_1": "model_mtp_layer61-00003.safetensors",
11389
+ "layer_61_embedding_rmsnorm_gamma_dev_2": "model_mtp_layer61-00003.safetensors",
11390
+ "layer_61_hidden_rmsnorm_gamma_dev_2": "model_mtp_layer61-00003.safetensors",
11391
+ "layer_61_eh_proj_weights_dev_2": "model_mtp_layer61-00003.safetensors",
11392
+ "layer_61_embedding_rmsnorm_gamma_dev_3": "model_mtp_layer61-00003.safetensors",
11393
+ "layer_61_hidden_rmsnorm_gamma_dev_3": "model_mtp_layer61-00003.safetensors",
11394
+ "layer_61_eh_proj_weights_dev_3": "model_mtp_layer61-00003.safetensors",
11395
+ "layer_61_embedding_rmsnorm_gamma_dev_4": "model_mtp_layer61-00003.safetensors",
11396
+ "layer_61_hidden_rmsnorm_gamma_dev_4": "model_mtp_layer61-00003.safetensors",
11397
+ "layer_61_eh_proj_weights_dev_4": "model_mtp_layer61-00003.safetensors",
11398
+ "layer_61_embedding_rmsnorm_gamma_dev_5": "model_mtp_layer61-00003.safetensors",
11399
+ "layer_61_hidden_rmsnorm_gamma_dev_5": "model_mtp_layer61-00003.safetensors",
11400
+ "layer_61_eh_proj_weights_dev_5": "model_mtp_layer61-00003.safetensors",
11401
+ "layer_61_embedding_rmsnorm_gamma_dev_6": "model_mtp_layer61-00003.safetensors",
11402
+ "layer_61_hidden_rmsnorm_gamma_dev_6": "model_mtp_layer61-00003.safetensors",
11403
+ "layer_61_eh_proj_weights_dev_6": "model_mtp_layer61-00003.safetensors",
11404
+ "layer_61_embedding_rmsnorm_gamma_dev_7": "model_mtp_layer61-00003.safetensors",
11405
+ "layer_61_hidden_rmsnorm_gamma_dev_7": "model_mtp_layer61-00003.safetensors",
11406
+ "layer_61_eh_proj_weights_dev_7": "model_mtp_layer61-00003.safetensors"
11407
  }
11408
  }