imi2's picture
Upload 153 files
9bdacf7 verified
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 6889973760.0,
"BitsPerParam": 3.749252192749517
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 335544320,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
131072,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 335544320,
"byteOffset": 0
}
],
"md5sum": "0d9643d03376592419461bdef2b6c785"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
131072,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8bfc072df27f6c75636704b717388382"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "892cb1814798985288301bea72e959db"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "44d5c806684468de000cc36ffeaa64ba"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "ad6d5108d5955bffe499c6268ed352e9"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "cc3bc797c7bf9c744f1c074ac23f84a4"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27566080,
"records": [
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 10240
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4597760
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13772800
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13783040
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 13793280
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 18380800
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 27555840
}
],
"md5sum": "d030560b997cc8ea1bcf8f3ea7cbf9ce"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "8a99cd1708eac58bb0183dffcc8b4662"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "757d2cb6823f875e8cb9aaf77869906c"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e16990cd7b17d3207b09a4f14c3336be"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "a516ad4df914e15c41e5137c97293976"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "a50f971f3127ca103545e77a72747ef6"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b757abc0e5b061363f332e7296963500"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "2c04539a874c344b534c13da3d37041d"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "a6367e35062996212bb524b51dc1a84e"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "ef643486d3c07e81e1f1fc2ffe1a28f1"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "28218bcaede1acd6112e00f3421a4b98"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "c04509c17d8ec45fc02875a30832489a"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "6d96134e6d2a11fd2e799ae6aa5f1c29"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "174b5fde1d5672e59767d440f77bd9a8"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "294b806b9f8bc8d47cd3e19324b2a6bf"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "908e26c0956a03d2141bed5af5fbaab8"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "3505498efddd6444e1ed37c0fceb8d37"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a7e97a5a1830c38aa3ff74e01160dc34"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "0affc9e14468cffb7cff769fb8b4eaae"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 335544320,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
131072,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 335544320,
"byteOffset": 0
}
],
"md5sum": "3c15656f0ded974b83ebcad68c1f4619"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
131072,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6c32d4392a8865376dd946da79a7224e"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "8baa5bf4e4ccad298605b1db121324ba"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "32ae242adaa01515f0ea9e313b40ffee"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25589760,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11806720
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11816960
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16404480
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25579520
}
],
"md5sum": "8b5e39a641c983fccb5e7d4bf9d09a03"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "f3c837cb58076b1c02066f4a6037637f"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "d947da27228793b58c6bd4c2eb0b2e82"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "005e5d2c5bcac5915457e6dedcd538bb"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "38d70e35673446bab3a44b54c2aa40ac"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "ac544895c4bb078e98b18cbbf6784cd8"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d24dd7dd9405916eefdfa84cee93b1cc"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "8ddfd607078c8d857af5501daae0e994"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "6f8274e5c04357cf822ba7279dbf03f9"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "5d9b5a264804ba0164514a31f760e65b"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "24f10be03f42bc6963e8531cabfd646d"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "9c81ce10af0d25d0b6cbed31b1475884"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "7a5a5292398cb46f307573e32bb7be87"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1f04efd13fac1bbb9569f2f7555d28bb"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "72bbfdf5979e203952960a030125df8e"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "b43e9899f03bcce80dd7815dec0a8460"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "79cb0e43aaf82a422501f15881e347b9"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7a9a1d16f703d84c54d4ac06ee394a27"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "1555c2f8fd5229a1e0653aed872fd217"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6d854a94e4dc4d535c494e4a21192797"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "aa75be0920e7f3ed49f40c13c0f079ac"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "2f2aa2cace33adc8d44b86358e07ddc9"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "0ef5619d62c48aa31ff34a33edbfe7f8"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d069ecdd03c1ccea9f14f981af4c1894"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "4844746a888cf8654c28b8cb79973356"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "6993e7874b9ea4a1db89a8977ac4f8ca"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f0383ec44fe7c99b5be901f873920f7e"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "2fc9bed0ad21eae2bd70eb7646e939cd"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "17ca0ef46466155730e4470a27cbc2e5"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "482c88b59f548772292c6c4bd73ff7f9"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5b3707ea21e9319c9b1c867b25b7d9a4"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "eb0bbbf064aee02e16da80a41aed9640"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "f47b079315c809f5c6b29a37703b6938"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5f6c917d162bc6f292d56b48ea608ac4"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "f8fed5e207d95b1e4bffbe907160cd45"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "6f1d3904c2b64ef0d338a71bd074c3a1"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "89ea0e68d2f72e0964af0b80d9aa1d40"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "16416a31bdcc0f7c4c601900e50959e6"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "ee0fdf1d515783fcab547b61826d470d"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "0121934181c7321ed3d77315e4f1266b"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "63f282c85f5c695c40b9b96d085d930a"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "aad0e7e6a89be4b19ad116dd65ea41cf"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "ae6133fff0a7769f1eaca7db05046aac"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "2f83a86c5e2b3cab440b289ad0a9d04e"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "602e6197728c0d778226670760fdae4e"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "f6e03cc613e77c0d93deebad4a7133f9"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "736959dd244aae58d8f2b042c12777f9"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "8c86436371069e76c0ad6cc2098717df"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e85a51800ace423b76f4f9add4edd31a"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "97db87702f8e6d01eb368c67beac7fa2"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "e9ab3f9778556554b726069db0f7ad62"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "942ed7be8643f51d6729fc4c44ba57da"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "26d963cc7bf45d1ab5bf112a4b57dcbe"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "9ff403d8e24023e6841c47307761d031"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "94fe0a656ea279a3b18fbc856a168322"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "56dae122c54a24e8863ecfcee1389e46"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "fb2bd6168248b05ba19e09159a768c66"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "f887cc9b3c67346744a576707df50de8"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "666ec8783b600d5d2d2673a9879b9d81"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "30fc24d9a59bcb912f8dd457128aded0"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6af3471d49cc0753b38a779aef67e213"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "42f252c06fecd1901f9e197af09f9c51"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "4239eaf4cb8232d2118a326877e0ce73"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6f46af9d221a20cca00e6af786a8c38a"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "3bd85cc04035d63829b959b7e4fe9e3f"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "73fbb81c7eebed4a417fd02fe0b01429"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "8625a209f47fa228a4f1b447034ea620"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "748eedda44a70cd10ea00ac996c6227d"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "8e71fa779a68269f5cec54b73bb291b8"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "d1d1ad06369c032ce940eec3ce570f98"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ada050206a3abd73f1a5950f65be57b6"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "2ec8149c48738872fec1e475ec4e04dc"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "bdfeec76d7fee84236c5275e9a8b45ff"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "d505051fea4e3d0cd67a8b782778042e"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "dd50c3d8c8a73aeafb294d2ffe05eaa5"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "9cd679aca2d4141b50f9fc49e4a54bb6"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "0f1783c94ad3ae319f0054a12fe7830a"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "608c578278577729084e8cb3cac249b6"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "a83f1c87a799e23d4868a8313c897e4b"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "6b24808eb59993b6c5082334eec48f46"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "b66a8a98fe639739128be7733aef9ea0"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6eecd0ecf94cee23583e7bcde6a9f2e3"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "143beee6f5c9ea625d9d0f24aa93b58e"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a176f7dd8680c786e1dbc34f2bbbb586"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 11796480
}
],
"md5sum": "31ac9d6aea404dc80df014e1a3766db4"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "f4116a0d0fe2b7e04dd3aa720187b959"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "d3bfc7e007b84829d2165f6dd124d0df"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "fae1a1e1062166aab9c15ed1683342fc"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "824f0c18eb8469240e2c535787ff352e"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 18380800,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4587520
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 4597760
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 4608000
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 9195520
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 18370560
}
],
"md5sum": "a334f4fd09e6116278b6a244239d70ed"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "3c041072ac3dfd6f43f2c0878f7b0cd9"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "6a964850da4bcab8032e179f06295b05"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c6841a25798fc2c6cff7304d6f7c484e"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "d352251c6998bb2ae65912f0922ebf52"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "3b169c1b3a7aaaba4d558fce0e2dcc1c"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "dcd1cd2695d90f02ef248d3d02b7f530"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "faec1a609f3c2b26c2920edd2dc3d8f0"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "d6c6c6b6693234fac42f045fb58f0b04"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "5991423e2b40b05591d7167766676650"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a20fae8c6c3e2579b264da59c2f7eb9f"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "29c33fd388d06322f2ae682f04bd2118"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "74f3a7b0c534ce976e1ee4b4f12d0f3e"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ae40fbb22dd6a63f65a8200d258c0b4d"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "3c14ad88595ba90977454528d741ebd7"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "5ffa7f3d7b040df7737d30d11970b07f"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "b3ddbaeeb64cde662fe0405d1f9c6408"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "0dfe6549baf6fa68eef4b4f44a4dc4d1"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "73402b2ac7776502761e3decab577f36"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "0541fdbd842ec984704f2d5dd90d5b6e"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d6d6d55c7fb4984707102cd6f2110357"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 25579520,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11796480
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 11806720
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 16394240
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 25569280
}
],
"md5sum": "8feed7b6f03a588c38bb6f4d38a35d7e"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 36700160,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36700160,
"byteOffset": 0
}
],
"md5sum": "a75a6a3dbfbf44378ae0a31e157480a9"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 15728640
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 17694720
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 28180480
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 29491200
}
],
"md5sum": "6af5c762b573e9f39375a97c56a5fd25"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
28672,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d2977fa30cebdd9f12b76ddad6daad8c"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 31467520,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4587520,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
28672,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9175040,
"byteOffset": 4587520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 13762560
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 13772800
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 29501440
}
],
"md5sum": "0ec8bbf5541a77485846871f976e3db6"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 29491200,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
6144,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 15728640,
"byteOffset": 11796480
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
6144,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1966080,
"byteOffset": 27525120
}
],
"md5sum": "74e73c2bd92671eb4e4f20bb9fafbb94"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 11796480,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1310720,
"byteOffset": 10485760
}
],
"md5sum": "9dd49dab2766b83e3b17fa735feb8d53"
}
]
}