junrushao's picture
Initial commit
3333e7a
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 18982125568.0,
"BitsPerParam": 4.500270809152499
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 131072000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131072000,
"byteOffset": 0
}
],
"md5sum": "7289fa97cd43f2502fa51a8c1bfe570f"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "690c605108ea8f34557d58cb846551ef"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "bc3da80d0e4f6cce1af27bbc6aa20195"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 27705344,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384000,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16384000
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 16400384
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27672576
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27688960
}
],
"md5sum": "50a28b46fbb4eb4ae2cab67630a169e4"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f6ef26046a4ed196a3895be4378887d3"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2ad8e8d1c461c1b823cb70bf9db33462"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "68eea3e4872bfa0520721142dd5d9688"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c21b0b25622f89df84302453a7e362aa"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "f0342ac7be70bc1a58b936d4d4790834"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "5d85aefd7dd083f39282fc0d2167d0f3"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "34d8b8c23cbd575821ca7e570f6a34d7"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "921b69810dc25b132221d4e39b45f14f"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.42.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "4e4401717473facc8e484664370e20c7"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "64a1444226713dfc58bd01d08c7badf8"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "538b95e3adecb212a7f6a556eb99f40d"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "acb3850fe060dbbd1efb4198c23cc4e9"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6da183f89ec664fc91e26a130298bcf1"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "fde56351b5e2a09829efe0065aa16e1d"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a7765cf2a5310caed9f3119ecf018cfa"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "2fc0aaabe0565b3e295bb477d0b5acfb"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.44.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "e714710ebcbdd17a55820418ae388c8c"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "e931ba52d25fc891c907a7ae8db2bfda"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "fdf90ed37b3807cbb6837e5f86816f18"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "13231d28b12d9dce254a28a32b1e09b0"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0778044e54a88aeaf726d4fd2f830558"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "71738117cc0e4f397033262874d78673"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "e7c85fdab433f50e8d257bc6eec7f008"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "43b150a3529e2748e185ab1b79caf774"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "816b22f73db67468cc80efe5dc9992d8"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.45.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "a1a78bd824f880ac72f8fa4f9f8feab2"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8e716211261c83c81f6ebacb5d8c8369"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8115a25623ff3ad8e3f032e4d1d90da7"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "299d1c467084b368078ffba6075138c0"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "fe4243b6f23807a38ea0afc5ae23365e"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "40222b9023340a5efe7c4f37f3d3c76d"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cd82ac9d50f4e9c86db06ef0d081227d"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 131072000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131072000,
"byteOffset": 0
}
],
"md5sum": "8bc2c15665a0c6d8163ec278362a7846"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.47.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "db5712187deab30ba40ae743654df56c"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "48f568e02e0bfea8107a3f989d34dc8e"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "b704f410e497cc65c6b3a9f2b68af4f6"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "aaebd5421c68c68c26be10a8ae99bccc"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c8a20ab681f70c3e31563208b8d1fab2"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0b857f87900299d0669ff9a9869f6fec"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32931840,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384000,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16384000
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 16400384
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27672576
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 27688960
}
],
"md5sum": "cacc0b304a4de69db88a3b74e704dd7d"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ba47e2a9b42dc6a309c1815d6f647a8b"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "bc6210b9309403c3b3aab163fac54d08"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d1e6d418669416ecc1dfde2839390dad"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "622275923a6a94432e7e64d90a6ab7c1"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fa12a3c4ec5a3ca4532840c7d5cacc6d"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "02383761325fd8d73d78fc6c3860353c"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 24952832,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 4210688
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 15482880
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 15499264
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 20742144
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24936448
}
],
"md5sum": "30ec5459978f11d692d8edc8928fe93a"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "94f06398b862257587684996b4b1d497"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d2a85dc204070711bdedcde92af12bdb"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "24f353c10e65689e2dd9037225f41f95"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cd39dfa98ddfae867af776cd0ea75245"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4bf63af4d3112711b70b2e4098d738fe"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "1a52d131c3377b1539b9fe7c7193be69"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "383ca1be8f92cabd7c4d6d331cabb26f"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "145bba7a7eddff69589aec8ed5bdd868"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "778d75ebc0eabfdd2078207fa4ab60f1"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fb2a61577db54d5925f534ba1c85705d"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "48bf5e83a05e8e4ec0d85d74e4fcde00"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "1117510c2a62d1d7efe1ffd63bb3e20b"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b8f4f302ed8482d8326db502600ad281"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "96feb9931d39de82bcf96ce2ce17d933"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c6740ae77abfb0c72c58e294b96fb777"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b45f2fcfbd878356c8ed5b94ed4d06ab"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "286caf2ed01c8fb594d20e0cb6d3185b"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "c7b4eb17f046eb08edaf03c8da32ef72"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6537e581077da2d2faac4efa373b7b81"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "85197a83004428b4510d8ec9fb9ca1a9"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "82cde4335c275209a1dec96528fe5aed"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d20dbad1c4397351b715ca3a05a8268b"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e819dbaf63584819ac88451cc757d597"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9e18d7ba93591b4e2c6365dd4c1575d1"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "603b397f86f5b70da0d9ab590c28344b"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "aaf2c75fa4e1bb67a4930aac604c86f2"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30162944
}
],
"md5sum": "a9574e1298b22739cc3549b53dde1e8a"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "349de5574b91fe570bdbe8be02f3b445"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "472378f5a02ecace50802276cf1f1086"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4dc6723f25ea1081a63b71d19afcf337"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b4e57443bb99be16b680d23e86ea5c7a"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "af28c9e9590bfc58b1d7c217db1cfb72"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d0ddde3d44684c265b8940f2999327ab"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4dd7ae2a72b5af7ecdc39e1bbeb61ef6"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "f51c9ad5c021b3f02c83ccff87a2011e"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "cac0c157888340b2b6a0c9221d7b3b5c"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f496b5ad77a851987c746046b188def1"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1a93bbc5422467002d2c91c0db0ce287"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "82edd70e5cd37497ab1a93ddd199e2c7"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4bff922dbddfd0eeb846842b3fbde602"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0b3b854961e5ada4aeebbdf18df06acc"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c31d830b221d26d835f394e8efc1eb92"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "bf761f5286d5f88c8a15f8c14fdcb826"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "56c4fb750e829e27b9bc42ee3bfe9fdf"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "96c6902bd9f8c1d51d453b2c466826f2"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
}
],
"md5sum": "12e5127075fa0b358fb967558678417a"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8c67d0fd61f20bb0233922c7a7acb70b"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4d96c1fcfce4072cbab44f873dbed7d5"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "c16e572deca7f168ac5b1948b7bf6a25"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "9d1cdcfc41c63d66daabdb368fad8bdb"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b71aa8717f80ef4f400e07f657bb97d5"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9198d27f800b60f1c5f25b416ec9cd2e"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "043ab3737d873f1180f6e2df8f713925"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "854b64b6c0942a1a4f81436e548b68ea"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "914123630566f15bba3559c25427bd32"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "3b927d01f3361e5b7e629d12be2d5922"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2ed9b3db11e3f265a9110e740ebc2b70"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6faa024b20ca3d124e842fb6a34aad43"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "021892341fca9ca0e3779dba2ed003df"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1c09d4dc691f45c445fa309b32124421"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "b1f16df4a3cdb8e0c2e18ff3ffdbed17"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "b384b221634a0bd8185c560e5cef0b9d"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c1f6aa960c4e8678d41b674492a0398e"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5a0455632a5224e2bbc5e7fa28a5bc99"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f676284083424393f0464b7d98908686"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "130f06d6790a0f7b4be516b2db73d2b0"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "c4d3096c6af7b96ce681b7c916007e8a"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "9ae02b356a50dbe05c498ca89f5604f6"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "0cf385d106684368d6160c0417694789"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "453f0702b38f510b4364bffa87958631"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "f2c9eec0215df295629f792183b9fe86"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "15a5e888fd7b11c7768e31ecb1f28bfb"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "fcca991b065e8b4cc1fad784f554a4f9"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "0b5ce01006450462a32b133217ff4783"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7dcfe5433011dedd1a35884cbb8f3e43"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "99b363a177f772db4804c99fb71f99a9"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "0458196202b35dc54119b07c97847e3d"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e3345f4961dd086036dade6165fb731b"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "90a97e76b3f42fc71ce199e5587d7bc4"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f449be834273671d3e8356a8d6ecf1ed"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a0df38ba8e30250adb669923f1aa006e"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "78b7b98468e796666d4d52af22a28066"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6cc1191c30dfba3ff8a1a009a91c709d"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a440b41f4b0b9e466f1c7f794b50450a"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "ec820e4ea598557cf7a48f2015c0fc45"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "334d0419c05794a8269fe83d5043f754"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f9cd5bd3731b003ea1c04d12f6cc0eb3"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4361dd1d6ad597d539475bee709521af"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "14a34a8a277359c898f47f679b9846c4"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e313aad765d84308891d9e467c4b8571"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "1619ef1c597de62c7a36c86c8b268970"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "369037d83ebdb8f552340cc06f37d7d9"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ecce66444bea5504322e6b1cc00fbd1e"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "d5c84415242e11a33041a99bb27ca177"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "63e87f59be7b344329ba316d66b9958c"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "edee6be6ffb2ef96d6a7e7912b6a7901"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "59caac1b3591287090b636bd3718aead"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "24316ead71c49df051cfde0a24b3befc"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "96ad4394d33e283b42d0e2bd863d9aec"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6febc0030cbde8f0af121ce547732b66"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "37d0d38c19ca44b7dbfb604c53b82df5"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "56786f7ffc3b6161ef56625a1ad425a4"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4c71586032e0f84c15736b2bef6aced9"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
}
],
"md5sum": "e685b41c5d97ba0d421fe28f1fce7265"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4e753b0d9c9a78be4ea5ea7c02f4ffad"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a7ed480d6cce79a8ae007aa35978fc4a"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0fe0ad82b0081a5282138f0c70d3c5ef"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "47c79313d8c92a6567745b393f47ceb7"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6ba908553afa9b94a0510f1d21384b64"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b7fabcbe1096acc1c3516af3dc35d497"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "7e63348a3dcf3d4a59d05fc8a3c4fb84"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d7315d5664f6f829d9fbdf8adb722230"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "247511f30296eb7f00483ae554e7bdad"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "e98b51e2f6392e771daf117202796813"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "1ddd55657072081d480458d4e45d6ff5"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9c2a89ec8bda20154a4f4268a7411266"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "df55db7445f73888a7b73c7f412150c9"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4a1e570baebffd7bd54337ba6dd80f9f"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "1e766f418d94098aa7842e7d55c10ab7"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "464f450a36a62efe2199a770084a7bd0"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a6ca59c19d4e0807000e7268ea37f101"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8888b677ae78133f6ca29221902210f8"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a2de4d74e6e3e053c35f7eacdd921d7e"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ac3fae478bca212db51093f79a8b9e01"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "130bc7e10886a0084cd604ab3ef7d9ee"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "252065824b28896455adcd97b5823d68"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2500498d9066098fa38b591930d8e0f7"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "28cfa0779cbd7ac7f91fded88b5d737e"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2b52c1827ce2879d726d0a8a846982ee"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b4e9634d0a6d2804270e3269b04d0229"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "97dbe5f0e3ce8dac0733ea5f92bb1cb9"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7f0b1f52a33d22dd55b4efa68ad6cd19"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "7d80e2567e2de3dcb50aec4730e65fa1"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "09c1e5e38595ea88863631e4c71c26dd"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1ba62e1ed7074b112599ba17eeed1350"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "2451813a82d745fd70e8e6c62fda0ef5"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "5d6d46e22f5cd4b2e8309dbfa9dc1f0d"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "58e3fb24910f38bb315fdd2c3cb4e3dd"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d75e5af03c9e51e73b920a3f2a0b4bfc"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0b241b7aee8ac3a06410264e28ecf72e"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "1808a360eb3f06ac93dd595c8b9309a3"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "eb00f2b7813434bd71fac68675eb7be9"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8d1d63601f2c392700b07fbf1d7d3542"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3fb292b2880e837b54554bef965bbda8"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5cff6eb5d0ceb2a43ba08f157c67cd45"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30162944
}
],
"md5sum": "075025b52cbd6017ad0f67bf56e2ff38"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1f25bc33082239e13dbdfc1ec1c415b5"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "04b8e5c934ff18466ccc181e2e389a56"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c6bde864dca08dacc8fed57d900806a3"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2c737a08b6f70b436a9c04498d4b7358"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "33f0bbc5b4370634390b1fa711f7be55"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "060d364a27d7bd0495bf4522c296a157"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11288576
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 11304960
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22577152
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 22593536
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 27836416
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "b1274f99b9015e05629432be297b7145"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d5d2c7d6d4e40ce58cb0fe6613f44dd0"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a4541a11dba0994698efceef84d23a27"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6807fcc435c7eac9a544c017df219a9c"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "46a394dd3d585dab1723fd24e24766bd"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "851a969057b7c45167f213901ba95869"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d415791800d6bb79cad406f5f2940d0f"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4e1e3bf0b1052c506bfe2958bca78765"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c9e46a56949185f0ad1dcad443685831"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "bd34cab085a639e73fe2645ec5173714"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "63cc32a102bf6a75182147bece6fbb37"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1a07242e294d98969f5313e31a816cd1"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "b19e7979d5bff76ee438a935893a243f"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "15e523da987debed681f969072628564"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "f7817faf348570c32da60f0bb7f81836"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cb537df72ae8281eac89c79b0688d2b4"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8ce183eb92a5e55d5fbd8e091d2e1403"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "835e1d251b5e8933f15f6cfd75710172"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "9016d61974c2e16ded6ace9aa116dc39"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5c05a994a6fbe52800fd317078388b2d"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0a4f68fb05211be98a7da493c17caa7c"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ce12f299cf817c87cf9f35fe2a721229"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e5bd73d9b2a1238271cbc804531aee6e"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "5684ad4dfe99a185cb8bb64e2447c2cd"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4ad03a5f05b7bb16f526323ff9a5884c"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5822ec931c227f701bf7db1bd4aa04ad"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "9cddb6db59159539545d6cdcdb6844b3"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "eeff6fd22c02f841ae725d6ded427cbf"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "686bbfa47b0fbe2fa5f164e49a6a1e5f"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "37144d8b1b51b3d2e4dd6202e83f11d3"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 31981568,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9437184
}
],
"md5sum": "4ca2d40f1cbb8b4b0991e5f0f51d5a47"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7cc199ef6471124e6b89f5801af73c2f"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "adf8d155654cf6ba540491f8384819dd"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ccccbf60635d4a55fe95855ddde8e961"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "472f81265b94dc8d1a92b811a6ae38e6"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "0209a2adc1b4280bb22c3adce50d069e"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "68a919666e247956e8384aa858392d43"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "1da05eeee0d9e41ef3d60eae8a2b0959"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e0e1bb6009923c8ea817d8741e418ef4"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "07067e449fdeb36f43463be2de01d14d"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "5cbaf75ce71574708cca35b9b131893b"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a2660d70bbe50279e8e9cfea1797c4c2"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "bf32cc4c0111399d02212c676dffbf46"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e011127e54e8b3b162aaf2bd0cfa72d4"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1c94c23c454a2f428a981d51f227fa0d"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "545cc59fb21258c72b343ccb3c073ed9"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "32ec24098029839b2f4e47f1ecfd4055"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f7793ea955e1961cbfc3ba7a7a9a5df6"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d466fd80cab65d2fc236aa844056f29a"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e346fe4656160b27644a0a17a134b78d"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d7725cdb6b4c98e246d2eb265f164d6a"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8f39cb9b5d0f0c0d03300569513dd3f3"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "691a8bd2895631838766cb489c90f194"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c2cd1f7156666c4754c5a0a526909bf4"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "97f7a682b7348ad2197385cc1b7d6150"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "25223d5e33d028dce8ba4a1f7337bdc7"
},
{
"dataPath": "params_shard_259.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ce8beae48fb45d51ab9c90196d9529f5"
},
{
"dataPath": "params_shard_260.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "dc6518177a6cdb857238fc802b4dc7af"
},
{
"dataPath": "params_shard_261.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a6a739179cc92ef42a73d34c3e96af9d"
},
{
"dataPath": "params_shard_262.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ead6688c27b2111acbdea0ec58ca5d51"
},
{
"dataPath": "params_shard_263.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e6ea42b8542654f9c8d8600de9b936a8"
},
{
"dataPath": "params_shard_264.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "dc3354e02a7227a9647cb45455a7777b"
},
{
"dataPath": "params_shard_265.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "e58b0fb482d7b158aaec5ac3417f5157"
},
{
"dataPath": "params_shard_266.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a1d5619e879b000764a89957b9345154"
},
{
"dataPath": "params_shard_267.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5e7a3fade972ee897b79d7f0c1315570"
},
{
"dataPath": "params_shard_268.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4d03f17cb80f2ad821f3b5dca1ed05ea"
},
{
"dataPath": "params_shard_269.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ef900be73173ab17f9c0f29ba3b70bc7"
},
{
"dataPath": "params_shard_270.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "e4d0720f37e20ad197280cb526a5b4d0"
},
{
"dataPath": "params_shard_271.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "df08277e8450479088bf918d288d5c86"
},
{
"dataPath": "params_shard_272.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ab37a5b7109ac6d4995529a65d652dfe"
},
{
"dataPath": "params_shard_273.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e94406000f1ac53f70d12553e422e8ca"
},
{
"dataPath": "params_shard_274.bin",
"format": "raw-shard",
"nbytes": 30162944,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
}
],
"md5sum": "e911520af24a8d92158fd4bebee0357d"
}
]
}