junrushao's picture
Initial commit
210d423
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3048608704.0,
"BitsPerParam": 3.6193070644493246
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 52762368,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52762368,
"byteOffset": 0
}
],
"md5sum": "c42f676c9e5f47490cb57a7fa536f694"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "c067dcd8780e97d599ca80e753e2c57d"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9e8a50b5ecc922cce9eb48867534cd16"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29229792,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6595296,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6595296
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 6603488
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 24691424
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26952416
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26960608
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 26968800
}
],
"md5sum": "b072a6e5e7080a6cae4005607f58a517"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "277c16f3c5389153228bb452dd148e10"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "4feabbb67d458a3d150ee1fea6a3a8f2"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "24a8a99c0dcbe0f2677d1457b512695b"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "048a401bc493124ce9f6eec2d832c707"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "26483f534695e5cdf2cced21468c094d"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "f244e12abda99b8809535a949234bd35"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "4eecff9501161d53f6fce252cee212d1"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "f0f5c7ed15f75b054af873fa63eee509"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "e97829650f3c8721c388d731b73fbafa"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "2050177792a8236e0047b412ee524e4f"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a08c1bb00d1e2eec0273f552f2cd6faf"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "347222f8a54e21d12d9c4971706764aa"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "21d838e5972d65e57d377cf5b1d829e7"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "962679ab0b2d704986583c2f3f7ea413"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "82951d1f80bf53c473acc399da7443ad"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "eb68eb89fb4cbd61d6fab0083de4df60"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "885f56f7c9ab3fd45c26ddfd0da53ff0"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "930d00de9dbc39e534a7ec8df9ab68de"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "bbe4ef8aad23c71e94b7b902310c94e0"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "941a59910a086587db11fb763728cdbb"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "e24567428615d2bf62f1ebfbf0605c12"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "7336357368bcb4274db82b65e23de17d"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 52762368,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52762368,
"byteOffset": 0
}
],
"md5sum": "1d1614174bc4a70dc676649ef0cf84ec"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "6e525d0171f5618e41854582fd485bc7"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "2b5b58653c44540297af00ab24b537c0"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "603bc99c36aa33fc4ae4047bb56435fe"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 32823008,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6595296,
"byteOffset": 10133504
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16728800
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 16736992
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 18997984
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23533280
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 23541472
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 26072800
}
],
"md5sum": "7036177483c594d4da78a290a35c6a52"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a611babe0770a56968a3c32b3427813b"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "96c9330bcc4840ba88d7aaf298f1eaa2"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 28275712,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 843776
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 851968
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 18939904
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 21200896
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25736192
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 25744384
}
],
"md5sum": "38faa2a8b7e96d8398dd5eda9bb1b706"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0edb382d54c7d2502ef69164bfec488c"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "20a9a9260732ae00c75fb2a171630319"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "dd558445e29df14d4037d186d3f57f0e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "c8c99bee6f67c027b25bb4a923e5fb3c"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "6bacad932c8a4112861e6e21f78ba937"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "45d44b3b510333c69397d4ca9107186a"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "262bc0c5c936f17b4610e53115140023"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "04be911667ee2098a1d176bda9886be6"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 31598592,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 10125312
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 14660608
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17191936
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 23942144
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24785920
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 24794112
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27055104
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 31590400
}
],
"md5sum": "3115dff22dd960fad4384e0e6a3b817f"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "bc78756aaa3ec55bbf470c646bd71ea8"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "dc6a89012786d8676efdb810024a8689"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "ac28d8a035de99b3ad22a8130d57346c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0f686f19c2a767b68c1e17aa89eac75e"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "092745b8bada9c3125e73b3015ea323c"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "72b13633599fb635840b250f34ee356c"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "5a1b12c6a3361b53b0338b022ee41664"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "7b8a775bd17fe337fa1b70a035e431e9"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "3f45e135040d2b26212afc5be4b652dc"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "e73a714bfa1e330a116ae06db26615f3"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "174da339b801d0598df115dc0ce6298e"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "6b60564cba534ba05e2c7bf6cdcb0934"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7cd85c1fea22d34bcedc2d2fabe90192"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "f03183185a1cf712d6bfdb5c7aa52ece"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "aebef4ef4e339a55eae64a7fbd4f3686"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "82aa0d6b0636543fb941fa2fb4ffc77b"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "a7c8e9b7b375314f9aaca8ebd0f5b3f1"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "5b387f7572d912149dc686668262b48e"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0eb95391aa379e869c35cd14556496bf"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "abbf6bfda48b80ef50cd9e25ac29ac2e"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "07b193489ced74929f7fcdc82f958a4a"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "2e4da4822aeaab318c2d54d9c4643bc1"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9aca5114ff3a1b357f2b5b05c4af222a"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 30228480,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27951104
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27959296
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 27967488
}
],
"md5sum": "8c9ab6d16819923eccb8090141d51c67"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "117535dceb8b584a58092e9208ecfbec"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "1f0ef21ca224866bc0924b31e91a0a4c"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "5d672dec703c9c67b9d7d320ad01877e"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "34abe335812521835de8036c0347919e"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "4acc3cbaa0a5745a4156f8f1491f5f8e"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "0281707177c4cac3a060bc79d1b1ebaf"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "fdb361bf93f8aae85591b099be716673"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7e873d528db069e0294d498b3440eaab"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "d9e7409319de11beda88f7a5ba0ce4ec"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "1d863bdb32db3ab7dd4cc87f10d6c9c6"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "c4e7024a0cc590271d29643873aa6083"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "252ed76ef5350bd5cb16f6317ae169ba"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "2b155f1cfb2032ec568b0d5ac16f0e21"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "4fe4fe5436cf8d476abfc3e793978404"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "40d103985b3403eca30c43cd48c7f61f"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "ba91937dfe279833780de73d00c938fc"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "3ecfa5960f8b1eb1b7f2c7a96556c8ab"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "e052d401e6f6ce26d66cb8cfbcb68dab"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "2b6d165e2d94ab4c087643d24dbabeb0"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "b36d3a45d81daada3a69ce03e0fd5fee"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "cffe3d3a5003bc3d0f6e82954b206a0e"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "a7304e582f577245630da10a9cc700eb"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "07910504c2de5db257470f3204bf041c"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "aba6d51a21cd643d47d11e7d563b4eb4"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "27a00b2f6eb28c29e66fdadd72e73482"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "bd2fc33def6bbed6a4f7897385442a92"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "ff37f556795e1b0b827de40bd933bbd7"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "1e57b9294a634d5e407c799d3e97ffac"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "c454f4ab785a50bf983b2eb6c3c3e988"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "e021046f69e02bd28538124aa1c780dc"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "381cf00ee63e55b1799f520ae810de38"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "07442b06cf2153625e8529aa83481219"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 32379904,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 7593984
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 12129280
}
],
"md5sum": "fb009dee4ed1ba7950f57bf7005a5a28"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 10125312,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
}
],
"md5sum": "6f5a496325f5343cb90e78ae7dbe0101"
}
]
}