junrushao's picture
Initial commit
e7ce72a
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3048549376.0,
"BitsPerParam": 3.619307029695688
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 52736000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52736000,
"byteOffset": 0
}
],
"md5sum": "9837c89d545850f3cd0d2637286c72b5"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "0b3892fd87005159cae3135feb15481f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "4eac62fcd0f886e724cdc40cf81cd941"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29226496,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6592000,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6592000
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 6600192
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 24688128
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26949120
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26957312
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 26965504
}
],
"md5sum": "7b53644d7b0c813717f65ebf11235608"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "ba44c2e9965a65f35d9db6bd925860dc"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "2be4f03707263b27c49924d2eda86299"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "d9e03bfd02709e8f05d30249ed39536c"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "cedf294ee3f2dd7ead342991db8d05b7"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a03e69c96e83aea5bd7f3252517d7cfa"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "796d81a11fbf19e0728e8e58e9f04a97"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "9ba26206a1c6b829849edc19f477eb7a"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "aa5ae7d711608d59f910483e621a643c"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "08b5e4ba67b0b1464c599c672315ff9a"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "6c5dfa04a79db8d5aa0c97ea4eb75892"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "736a6ded920d351a0ba6ee593fd33bf7"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "df2a12a1071b900820cafdea9474a11e"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "8454540ba9b5e2f98b22f417e712eebc"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "6bdf3b9ce32e2a28bdbd0ff5396334b8"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "ae58701b107dd184fcb0877329418995"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "1bd984b6714ebe828f8ac45e45e7b226"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "b4fdd4d7aba0c33b7b24cd296ce52a39"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "f6377a6192aad921c9a257f85383493c"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "537aa38eef35c6850fbf9835344fc320"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "04ee6768eed61211f9fff2a959efdb50"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "e359ec1f1f82082341d5aeee964483d8"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "83eeb59c0ba757e7081b390ee21b69b6"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 52736000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52736000,
"byteOffset": 0
}
],
"md5sum": "d01b089f0beed4795ce259f291ef38a2"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "276facd44f8601842dc48a1da8c4494d"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "073d93ebf6889233637ee1a33bb3bf93"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "85a1d9b29ef60762dcb84a28c2e65f08"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 32819712,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6592000,
"byteOffset": 10133504
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16725504
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 16733696
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 18994688
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23529984
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 23538176
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 26069504
}
],
"md5sum": "2238fe06b147a60d6774f94c6b63f53b"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9105ac0fed7cb450442a775ee98f9055"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "57e1956d6a09f71cb01daf62e7ec6cb7"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 28275712,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 843776
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 851968
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 18939904
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 21200896
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25736192
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 25744384
}
],
"md5sum": "2b8799702b69ba201c496c6d52769bed"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0cfaac8ca06224e04cde47c14f5637c1"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "ce5307f1afe10f5a589c2b9f723ec05f"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "630eda6fee482469ab88c94663af1208"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "73122d378591d4857e7e0ebae5a72556"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "86814866596ab1fc5088cd19bbb24ec9"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "83bde830156064662a03172c3b6e13ef"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9a52675436254a8cdd25d9c02fd5cf62"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "2bc382a626c4e8f723a09c0d58b56e83"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 31598592,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 10125312
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 14660608
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17191936
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 23942144
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24785920
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 24794112
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27055104
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 31590400
}
],
"md5sum": "8d51f7d6c12235b2abf387d529c01b7b"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "cbd9f23ad633e37551a3b64a51b4b729"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "8bc804e781ae6864b6d5046ddbcb0e5d"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "c3aedd052c46ad3d90ed4d02d4d0c383"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9ffb2b8f14577858f7be056df4bc09e5"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "8295c00c13128a087586c4984671cac8"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "869157e380854ebd5b9b00c214b05838"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "f0fb9c735bb82546d4becbd8cafe7705"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "5aba9e6137383abb728ec6dc01ac010a"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "304149984b3ccb211fe88e828a395b87"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "64c7c0272ed3d1402fae3adc11d3fd13"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "22896762997b92f94930c8a2c8d90ff8"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "2c1fd7c0a6e995c3f823b4072c76fa49"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "f8259b65afd8eafcbcd9a679e5d6dbbc"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "26f3e6325f953b0b13ba8283e817ab37"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "33e5e31bdd0b2421ff9b257622f553c6"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "1d165c4fc19f09987ff94a635e169112"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "de5e55d28850b54d5d26d4d0cd961298"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "a8a50295579addb7e4c010894124f77a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "5041c43fd7f3ba1f0460fb76caef7ae9"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "84e46244ade810ec313c46a7abe40128"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "b035209c306d2b46f1fa89cea05a0c1a"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "efa29a69e6573eeebf11611d37a6fca6"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "089e14f6b0840ff4fd9e222c61245afd"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 30228480,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27951104
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27959296
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 27967488
}
],
"md5sum": "3a66005cf61cc5b0424bb804a8cb3eb9"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "6304b7818496c0aae10bb4f3379ac64c"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "e804e07c9252309a571ac7c30f700849"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "117034f73e7aa1fcf86820c617f8373f"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "836d7e0622283dc44f62a659d9ed2215"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "f81bfdfcd16051cc6432a25756fbc53c"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "4bedbc11f235391afdbf084fb29f80aa"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "13322be5f2729c3b453b61f70ded2d1b"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "ecc6647459376b03b8c5bce35c6f9150"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "f5743ebce68a711782ca3d1ecd458743"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "e8d61cdf7e88eddd42d2dc867581c2e4"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "683a947bb5ff5c2468695a39cae10e74"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "1b0fabd5e4a27575d11c69939db0140b"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "a96c228e1921b194e729aeefe21417df"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "31304f9b3905d972e49fa3746ea6d1d2"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "b5ef1597ecd9c6f5706d9eaf8357aa3f"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "911f1e93c43f21357910b755765a5553"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "c089f4e50a8212105865e7927ead03f3"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "baa160fd29ab4bdc146fb3f24667ceca"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "8a1792f09eecf2efd6332906524cd2db"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "457099fbb5fca4e1630034c48c4c4cd4"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "d37ed0690f7e631e6de4f891552cbe97"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "d075798d5dcd6d2040e2291032b29c74"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "88104d111cb155de4bdd9f3cb6172667"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "bc58f72778b1683dd1655f4f60574831"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "0589f9cccea49fe3610734b60361a353"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "6bd46ac523c0e4b2a2263f76c0ef69bb"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "9d7b6c0530910d8a6627af7a9dc7340d"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "f22827dfcea8f24fcb4addfd328b9cae"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7070698d3525dcfbb4dd6d02002d934a"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "7a87f85958bda94b5343276c281ba26f"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "11cd68ad01464500faf104ed291a76f2"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "68769a31a8abe08412608e49b4566d4a"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 32379904,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 7593984
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 12129280
}
],
"md5sum": "5c0f5b5d300406d0c219823e92311ad4"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 10125312,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
}
],
"md5sum": "e30a83fb4611c0d2556a52dbc086b3f7"
}
]
}