SebRincon's picture
Upload 87 files
6e321ea verified
{
"metadata": {
"ParamSize": 313,
"ParamBytes": 3439746048.0,
"BitsPerParam": 3.6133605651807272
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 218972160,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 218972160,
"byteOffset": 0
}
],
"md5sum": "20d6e7fe62912f2bc757d82da7887891"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 27380736,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27371520,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 27371520
}
],
"md5sum": "e6cba5ff5b1343a6fe07c4cfb751cc2f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "f2926ccf8c5e7338600dec9371ba2aa9"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "0d1d3def2c4e744cd5f0652b2a4bdbd5"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 13271040
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20090880
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23488512
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "2622196c8679a9c889ef39e7551aa42d"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "ebe2a67574ab4abc8618f0cc2ad1afc0"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "bb6b746127c9b2772fa31339df320ecd"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "4a15acd5f3e571b1e8ee0d1d6f12846e"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29318144,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 5806080
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 12625920
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16023552
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16030720
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 16037888
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 16047104
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 22682624
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 23512064
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 28673024
}
],
"md5sum": "4e419f7f37788fa85b3702d7d07b0445"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "f148a0dbc163e64e6cba8bef9ab72f9c"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "b74d8e5320e8c3bc67ae21a7856eb7d6"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "80a57644a987228b4490f12f86a02b92"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 30331904,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 6819840
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10224640
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10231808
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10241024
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16876544
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17705984
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22866944
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
}
],
"md5sum": "3f0a8be2a2eb1548724df7fc3868c084"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "d99f052a6d1a9d1ed4f0d380a0eae45b"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "9e945905e1ebf7dac6b6ba251cf86405"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 26933248,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3397632
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3404800
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3411968
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 3421184
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 10056704
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 10886144
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 16047104
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16692224
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26909696
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26916864
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 26924032
}
],
"md5sum": "0986ff19b355b7dbb816ad9a45d1c191"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "6322af817165065a9e711ae60620b13d"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "639941d5f0dffaa22babb2fb928056c1"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 13271040
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20090880
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23488512
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "f658c701a1d57b5470d8a8e37dbd371a"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "bd323da8420292d570e0c35db1f7a129"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "0e879e6956104fe87bd064fccf628f33"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "eaa36148e4dbdce32a7cfad461d93a04"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29318144,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 5806080
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 12625920
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16023552
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16030720
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 16037888
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 16047104
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 22682624
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 23512064
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 28673024
}
],
"md5sum": "7b4d61f77f7fb2b0888198fd575b7d47"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "6c86642a7d76054003e968b9ae6f1af4"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "f5cff6dac4e2c910931a79021cfac7ae"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "83b2e3f1b96e90f6abe0491e80d911bf"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 30331904,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 6819840
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10224640
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10231808
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10241024
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16876544
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17705984
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22866944
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
}
],
"md5sum": "7a8096e98aeaf22ebdd95334f1c7b17a"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "ef99ec929ccef0f59fad840799408b74"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "48deb39c530029a8484d624048c4dbac"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 26933248,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3397632
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3404800
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3411968
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 3421184
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 10056704
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 10886144
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 16047104
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16692224
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26909696
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26916864
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 26924032
}
],
"md5sum": "89155a9ccb6d5e34bad8fe64f2c5db4d"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "a64b8e0dbc8412b07945c49ddeaba3c7"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "d82c304c5b7a2cab8cc1a2b69761f49c"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 13271040
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20090880
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23488512
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "e324abc8943911bb5d3c5a05d01d74da"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "94d4b972ee22c8c606cb67ccbba23623"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "343e8459c5cfcadd7bb353bf5d66e6e8"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "45e935aa5151d5e8a91f0d9cc149b6ee"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 29318144,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 5806080
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 12625920
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16023552
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16030720
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 16037888
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 16047104
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 22682624
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 23512064
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 28673024
}
],
"md5sum": "4cdc65a0ec2fc5bf5e4efad34646fd82"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "8dc61cb07e676a2eece4d84577a5bfc7"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "5910d3c9317909afb8f0ad9f0732231c"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "006efd95e341c3af634419bba61d3104"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 30331904,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 6819840
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10224640
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10231808
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10241024
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16876544
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17705984
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22866944
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
}
],
"md5sum": "b03eb35ebdf3e82ef5f27a709234ee70"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "b2fbf638c6f0eafb3e93ae9aa069b802"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "cd8dd1a3c24d2e88509e7898815337fb"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 26933248,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3397632
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3404800
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3411968
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 3421184
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 10056704
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 10886144
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 16047104
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16692224
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26909696
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26916864
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 26924032
}
],
"md5sum": "3c5026f4348104082684e4d96697e0ed"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "e2e9144edf576ef4a66fba1720ed827b"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "2c8f48ac276931fab0820c048ece56c0"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 13271040
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20090880
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23488512
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "80e3946938eeaf237feaa20572a620f3"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "73f979927f1e3f6d73a3e8f77290ae38"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "d90b362217f96e6573c2c8648f541254"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "e7f23567198607ff3762dc6c4732d25e"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 29318144,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 5806080
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 12625920
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16023552
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16030720
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 16037888
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 16047104
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 22682624
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 23512064
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 28673024
}
],
"md5sum": "c08873dd448535efa6b624de595d7136"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "bc05a2e2f87c4693291564cc4b038b3e"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "b4238925aa2621415222f57f99f013ee"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "50c11c2bfea03db8a15921c732a85b6f"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 30331904,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 6819840
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10224640
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10231808
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10241024
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16876544
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17705984
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22866944
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
}
],
"md5sum": "9a5c460913d8ccb7f7cb3deb4faae368"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "c287af0f4249ba4b57720a6e31efca6e"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "2bf1f5d9201d5874b31d5c2151a6ce49"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 26933248,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3397632
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3404800
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3411968
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 3421184
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 10056704
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 10886144
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 16047104
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16692224
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26909696
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26916864
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 26924032
}
],
"md5sum": "8a6ce3a377c2e681d543219223eb3df4"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "bf0117af395e2d0ecb5173e02e768ff2"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "2667a9ee9de3d3103785d876f4d01039"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 13271040
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20090880
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23488512
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "86554654054d9b0ff9434c9d91b9ac6e"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "2f8368d394f2def59f99491fc910eed7"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "5b215ababea2518d5bdac53c51cc8cee"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "9c45db8cf61728bf61f728db0471d2c0"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29318144,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 5806080
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 12625920
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16023552
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16030720
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 16037888
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 16047104
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 22682624
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 23512064
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 28673024
}
],
"md5sum": "692c517dbee27e059c0f2020cf65c49c"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "a4748c436bdd8372cdd48382a727b5d7"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "ba51d509ebceb6d72005d7b75f5c12b2"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "084d4ca76277deb82033f1c8bd4f758e"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 30331904,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 6819840
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10224640
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 10231808
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 10241024
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 16876544
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 17705984
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 22866944
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 23512064
}
],
"md5sum": "52bbdf6350920cd517ab13c37adce1b3"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "4626961ea87e6f1d37ac3d76c6e0bd54"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "34af059876eeb87324868adfeb53518d"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 26933248,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3397632
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3404800
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3411968
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 3421184
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 10056704
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 10886144
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 16047104
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 16692224
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 23512064
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26909696
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 26916864
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 26924032
}
],
"md5sum": "ca0c409cd46eb83375daccff30ddc446"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "260f4915c6bebe3f3d35ce71b38e7d5a"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "5f870a91a33e316d22438627f90576bf"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 30977024,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 6635520
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 7464960
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 12625920
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 13271040
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 20090880
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23488512
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 23495680
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23502848
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 23512064
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 30147584
}
],
"md5sum": "167ab58ffb87cff97c407de90818679f"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "4f887e990bc71f6f8a4e0decebfc7594"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "e7b9bc135e68e5dc8a589ff764415fff"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 54558720,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
37888,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 54558720,
"byteOffset": 0
}
],
"md5sum": "a1a30758cc45630c0d93d51f3f818d4d"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 29318144,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 5160960
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 5806080
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 12625920
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16023552
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16030720
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 16037888
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
4608,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6635520,
"byteOffset": 16047104
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
4608,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 829440,
"byteOffset": 22682624
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3584,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5160960,
"byteOffset": 23512064
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3584,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 645120,
"byteOffset": 28673024
}
],
"md5sum": "736abd9b5c719582e80835ee09a8ccbe"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 27181056,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3584,
1896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 27181056,
"byteOffset": 0
}
],
"md5sum": "8d130ae08af50136b86c6646b152ca0b"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 218972160,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
360
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 218972160,
"byteOffset": 0
}
],
"md5sum": "f29c6f6cb47b0e1bdcc147a97cc45c58"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 27371520,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27371520,
"byteOffset": 0
}
],
"md5sum": "f0695bd8440e484d82e1c828c8e1e229"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 10238976,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
37888,
90
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6819840,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3584,
474
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3397632,
"byteOffset": 6819840
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10217472
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10224640
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 10231808
}
],
"md5sum": "c834978673c0fca7555d7eb588d620b7"
}
]
}