gatepoet's picture
Initial commit
4e17260 verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4526981120.0,
"BitsPerParam": 5.000992646497372
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "2b57581148c833b363b3a46155d6c7a0"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6ca6a9ebee27547da33fca7f9a5db8b8"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 31784960,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 8192000
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 20774912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 22347776
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 30736384
}
],
"md5sum": "b9ad634c3a1133ba2239edbebecbcaaf"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "44107b290919f876e6154c068bd661ba"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "717ec17e1227154411c1a820dc047261"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "38c6b4269ae350b9282fa431c846ed23"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b7239cac09cb8273a156c003010be479"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "de1f018181fbbdcd4474bd967e6bde47"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7d5f9af4aa13184e1d199522f522e369"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6687708501605032ee921bee4a2253a8"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "f484c7d54447e8fa3a60554b31576b39"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4255e2d37ee1260ea398fdb377da60fc"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "dacfa3f4e70f731902865d8bd771e2ad"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "6bfed2448f571e67ffc4fb0ea22d7ec4"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bb5427d504bac40c1d589078f9764a05"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "6ea7d8604ec7fd9d5ed88ac8c72a8b85"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c01d31ab5e41daccd65013bbb2142aa8"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "b56702e3404bdc2aafa82160c94f47ff"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "83520f6b4fa3b4348fc151aada5a9f50"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8aa79d26e4b00dbec3c96452f6f30adf"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "0b42566d825abc55ea0ee8dd55060446"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "419b16bd7d0d37291266b7f48fa5dcb6"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b070ae27e587e4a482d3411535572b70"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "5b0d59357e5021db580dff05fe7814bb"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "86689d860fbb0e16097a7e1765f2c36e"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e0b09ac9c8fa7c38bb0654c860b221bf"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "7373b71e9a1a0b89d25dcd6e1d0ebab3"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ba4811273bd1f67f87acce4123c0a4d9"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "0c3cd1a038246ac969e16fd27fbee63e"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ac3e5c45718c63c58b1ff2b468511930"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "4a92b0010f7779fe8146c7e119ad1e38"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "aa25f9417b4b647b4dd585481684096d"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "644979ebe6bc7ddad8e041fd6643c64f"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "3ddc1e7858e2835bae793e1b398af8b8"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7eaaf6173a0875d16f06151daf42b60e"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "77fb7d5486f709c3d1cca18426db011b"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "0d4346ce38f66e41d89d7ed4784833e6"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "088efa948932204d94d80b7a6875cfcc"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "85aa88ee63924ccc34f955bdecc749cc"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "31f7733650c0a63d200a9c940be44555"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b32e72d14b38e6ceb04698c3ad891d01"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "175c665ccd16fe72caf13df0f77391ed"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "076b3e469d8d6ea5e6a58ca061429ff0"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "d3ff4b7e1f19f50fc687bbeae49715b2"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "82060f5401635818d122cddc5b5c185d"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "33ca18a531504a44eb61575d013da59d"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "12539be0707daf80af062fe537e0f4f6"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "20a90cec41929343164315d90554e302"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "030432ae84253006e40b4fb977074b31"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "50bc5999a488a9ad297f82a1486a8f4e"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6ea07ab5a38db124863e509b07acd32d"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "626c8b2a505460b91b5c4b58fc9b7875"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "88700075416df65b850ba48c131725b4"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "842a56a0f7e583ae89e23cb4d099a7c0"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "ad2bd38575599ee2a36f279b6225f2fa"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "68cab40b14caeb9255c28b704803c5e6"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "d84be5520aee019e832d40fcf36822a2"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "5511bec942d3afdd65936d0da857722f"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "83f667fd608df6263e5ed7473a6faa23"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "589bb0a1ab6cc09bdd82783882a55694"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "27335bb9a3b5fa4324e4ff3ad39cf993"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "44add29b1c56f7f02b89e6e06b07195b"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "dc4afe30101e1f2325138af45e2c5292"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "219aba7b043ca595d412f23a6bf5a8c5"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cbecf041fe891ef327960cb09b1dc37b"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "8d9c6d6891df20fcff00f2ee710557a1"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2c87245d5fc6cc7c5f49bcd9bfa02bc5"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "e2dec404e99446470a2054abf6d2a1b6"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3f66d2e950e68589fcc70bab8da254ff"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "60afeee7b1ffb6a5fa5e2cae76a1e6fb"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bc241bf3bfcb43f07068f9c84e66c662"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "09d8eddef2e30603a5a53e3157b5a0ae"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "4e21374fb169864295a70e05bf77843e"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "798084215b2a4c8c3bc834c22c163af7"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6d3227175ca9bf4b5f4c7bae892fcebd"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "f3f2f2eb894ad1c498aa31c0308a2e60"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "06f878061a9ca1a9db307c125b2dec8a"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3b83323aad9509de69b17d9cefcf9eac"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "fcd03ffc2f4913ad199d5e25258af100"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "47073c6581c2dfe54d9c45bbc04a8d48"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "11390ad421d38afd76fec058952acd92"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7954727fec023ef6f9dde7af71633b6a"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "289eadf9123efca05ffd0055094bc8ed"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "295195734f58e386adfc264052f4ff35"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c63f2862167a32705ea76d71dd290f0a"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "f82105042c25e5b687c1ea11c7ec338b"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "407b6153d6a415f31ecf1869afdb7131"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "766d17b7a621aaebacd10f3773214514"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "987ca7537e2e73af6ad2bc5b3a7d9449"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "017b5837c3992a60457a79e72cacc626"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f22a1a0fd6dfa905299a65eaecc03129"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "91019f28d13461e2926f5281d5bcb71b"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7e5180ff86b725845b2c249c166ebe37"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "962c6b434bb9c2fa4f9188dd4de17f06"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2371a17cce67065e7dcd215e5efa35c1"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "049232b26fd661de4215be5883642d29"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d1e12289d3542e2b5717b2f2208b0a59"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "283339ebdfe83ed00863c24ddcb7c0cf"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "61edde125d206ee4068511cba24ed61f"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "16c3f02e527dfe7039d5c21f7b73634b"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "281560e4bb0abb04c74c6ef6e1edc8db"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "b4b58c5e010350402e3ad9022fca7fee"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c7dc9bb31b7d0aecc5615c46a96dc2a8"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c97e4fb122eb88ace1310a156b335459"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "db9ade35db53aa953d2a5b9b40faf437"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "d0c2e9348270fd3413eecf40cc515729"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 11886592,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3686400
},
{
"name": "lm_head.q_scale",
"shape": [
32000,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 3694592
}
],
"md5sum": "6a2926f35133e3f72911941f64f8330b"
}
]
}