Sailor-4B-Chat-q4f16_1-Opilot / ndarray-cache.json
julientfai's picture
Upload folder using huggingface_hub
a03001f verified
{
"metadata": {
"ParamSize": 445,
"ParamBytes": 2222822400.0,
"BitsPerParam": 4.501497946035061
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 194478080,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
151936,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 194478080,
"byteOffset": 0
}
],
"md5sum": "c3e9d6c33f156c21a7d4aa1aabe20140"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 194478080,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 194478080,
"byteOffset": 0
}
],
"md5sum": "f24bd841edada926b1bdf5a7628c6c68"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 24309760,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 24309760,
"byteOffset": 0
}
],
"md5sum": "4929a87aedb9dce94ca343bfea1befb7"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33162240,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
151936,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 24309760,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 24309760
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 24314880
}
],
"md5sum": "5eeace80b3449b997cd503ca9056cda2"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32092160,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 1105920
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 18800640
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 21012480
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 21017600
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 21032960
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 30863360
}
],
"md5sum": "fcd57d38a484b7ef1fe8d65b5bcd52f1"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33551360,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 3276800
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 3686400
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 3691520
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 12538880
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 13644800
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 31339520
}
],
"md5sum": "e5005ef4912dd09e9ec1c598f56fb543"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "ae42282f108fb585809ad6c53936c787"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 26956800,
"records": [
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 5120
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 20480
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9850880
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11079680
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14356480
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14766080
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14771200
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23618560
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24724480
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26936320
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26941440
}
],
"md5sum": "62cba6f40711457fce1e1a7ef934473b"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "69c2b88960ffa81d56e1840e7d6313f3"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "7d062ea11e26664d25dbd9c6ed956b1a"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "00bdad9477a2069a84733fa9a41cea74"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "52054698012966698c9c800381fac35f"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "01ab6ea004e12b8f24dbc1781c9da92b"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "afd636f7d4a778a77ba27ea0d32667dd"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "07054519cceff55193e4b3466a9de282"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "9f48b4aaaa4067b1ed088629b83aacd1"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "d4613f5d96a59ba51021a772ce147e15"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "14f549e41346b253782dcaa0dc8b5d52"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "54ec764f189559cd4c8fbe8f6e531df9"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "5a148d8c913b4637519c5145c9dceb9d"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "c4edc332889d20125202949e236e7da3"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "875e6a6e016aa93d481f9b9213678fff"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "ff058a135e3dde2f85108795d4155fb4"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "d1843f31d174acdf34360a3239f80b75"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "e21763103fc1ac0a230c07e66988e695"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "adc3c2220368d0df014a7495bbffcc20"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "1958b95c37d4019d1a3bcbfdabdfebf3"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "0fd7f9598737ee79e02545d9904d7ad5"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "ff75698217adacbbd1f004449b1f2c37"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "8b846bfba9b69fae849c2036449d6277"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "3fd3a93c99504e7a51da96cf6ef06d47"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "55527b116c6cc470042780e7dfdf81b1"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "8cf1441c5e6dc697f78c1e104d773476"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "28c5e9285bd3ef8c35c0a176b2db9b2c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "0e23d1829142a240347097fb8842f2af"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "0009e02fa270772a0c69f86e483c4cc2"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "6dc454761156189a8aa7b63ffe3b2174"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "15242419611f19a4b8b02b23dda09478"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "b4869bdd51d1db3fc60d901438451d9b"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "13a74b94fdd380f971b6c061d8ea7e7a"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "ea3c05a7e131a966e3cecfde6d4287c3"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "114a0b47e7ac4312caf647b38d4f9a6f"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "e0b159604e469642c61b962083024d89"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "b8bf6785f24ade67f8b2fd7359da956e"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "3d461879ca83df8c8c4833d3806bcc60"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "e8f97a666a66b143c26a553f5548ec5b"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "f579c1d4ed1988f7cb894fd7dcb2f7d1"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "51ad70ec0ab01f458e1906106aa94b72"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "48dd8d60a0e4fcddcaf4f2c383fbd0b2"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "db3b75264014ede0b092fbde337b7e80"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "884f40ef9d4291063da28fb4056f980a"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "f7bb94bf44235da4500bacfdc6cd3ea4"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "fac20210e4a2197d17f52f17303ab1ff"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "e93e5b4c83b0a61114347d87ef0623bc"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "62bc40a1075f946e1c08548151c3a7e0"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "865bd3ea9bb7856dd47ff029765bd48b"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "7081ca9671acfceb7f5d65553d592893"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "a948d14e82c2cb8f7eba3d8f4db9b61b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "dc5bbd920427c74436ec2ad881e05f47"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "673bfa3cd6796d21de363141ca59d934"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "86146f29f1edcf0556aedfb3488479ee"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "53cb760e1a70c97e57998b447a6bd2c3"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "7d2ea3116aca9b1b7e035f197eb69046"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "74d37dff56b8e1ae7e0c3dfdad0dbfb5"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "4dd36960c740f3081022ad3e094806e9"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "dd7f6a16b300317606dec91e9b0010f7"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "0b186e7154dfb607d3641b64943b4e88"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "9d41c8040c08778155500dd03725a44b"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "d28fa20bacb9a18dadd36cbc7d32518e"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "1d7bfdd204c4ba7538d49579cf6875ed"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "631eaff454cdfe910020247dde81a3f5"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "5464caf27c74761a7e7bd6170aa628a9"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "b77f59a22acb13e53dab87501e0e4fea"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "63bfe1e304873d9f132d9c1a74d9c528"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "9eca403e3152dcad61f60f664e91b32c"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "f1a38ab0222625abc9fd27f1c8fc593f"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "754ff8facf6f79fe93d660da4ae354a1"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "82967b78a426c0ed945d3943c2369502"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "165c83c894c64dbae6f5e6baebdb7772"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "34cfb6db23c3b9e4505fc516a8bc84fa"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 17694720,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
13824,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 17694720,
"byteOffset": 0
}
],
"md5sum": "ccc46ef1645b4a9e8d9d025bb0215993"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 26936320,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2560,
864
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14750720
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2560,
216
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 23598080
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
13824,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2211840,
"byteOffset": 24704000
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26915840
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26920960
}
],
"md5sum": "b20564c77c4bab0433f0c14bf2e43033"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 14750720,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
7680,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 9830400,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
7680,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1228800,
"byteOffset": 9830400
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2560,
320
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3276800,
"byteOffset": 11059200
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2560,
80
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 409600,
"byteOffset": 14336000
},
{
"name": "model.norm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 14745600
}
],
"md5sum": "c21a0d5b00ddef9b73a7ff9bcee560bf"
}
]
}