junrushao's picture
Initial commit
7575cb6
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 15201289216.0,
"BitsPerParam": 3.6039124214610974
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 104960000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 104960000,
"byteOffset": 0
}
],
"md5sum": "10faa78d7756e848d896a56b1c98705a"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "e530efdd94679456777a075747dfd17a"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "a69628a16c8929cc7aa7cd7211343fe1"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "89b39dba6e464a38466aa4bfdcdb8bd9"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "e8cf13d2b90dbfa7fe5cf797402d6105"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "3872a5860f7d3465631b6cd8f717c6a4"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 31240704,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13120000,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 13120000
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 13136384
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22163968
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22180352
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 22196736
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 31224320
}
],
"md5sum": "ea9c491a1b303ebe0f63e53b0c84487c"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "5e3cdbe153845d46e4232114de8262bf"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "e2aaf9332100001ff1565308a7476660"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "2e55c97f23b4bfd899b850a95cd9c85e"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "b8abd9022fb6e8739b4cc9a34b905997"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "b9efd91d2a1d0c32202822593b833ab2"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "f79e8725d3e57730d039350337328b26"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "6642ff0c55403ae67bb3675c82f27272"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "3cf631526bdc2e96440202dc80a088ad"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "afe8d1908340ae1285b8fb8bd6eac5ac"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "af160e96b5dbf2977a9861a4f80ed498"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "6017ece322d1eb87d2699f82c381c9fd"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "5638b01427bf2f5e035ad0ed06874b57"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "b4d47609e35c8520cac56d173c223332"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "a2d91a383bce6a7a6c3db1000d86f384"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "9b35c7a1c513f42b1a610bb04ceaca23"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "cc95df81e7a5296b0c0e0849682aa46c"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "f5721723ec549a6fe5a94a6a355a6476"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "2b3db2bf7b48447aa62eef77518f2d00"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "54ab76e92776d2b7b9c89c235a2c55e0"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "2fab169cc48a014a4de3b55b4050369a"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "28b62462e538c54cfc9a1549cf983f9c"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "73c5e7116d52590ed1977fc55c7e2298"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "d7f359b6f4aa83bbd504d3e048b07492"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "fd88e19f5b3e46448111af1172534aa1"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "3ffac08c9f8fa4477eb4c1b12a9f74d2"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "43f0d4a1c681ea10fa33e1dce44b20f2"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 104960000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 104960000,
"byteOffset": 0
}
],
"md5sum": "1712a1a3d527f18da9752a3d47d9ae5e"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "b188e0e53bd6483f31816e6daff55c3e"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "4ae3521c24c6c2f9f49187e8dab9967e"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "db360401c56023298a5891a9887eb625"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "3ec6f421a2b8a2b6aa034e8dafb318d2"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "3df19b984bb9d9c6a548eb83c515dabc"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "3f3dade7bc570037bb958590afa1df7e"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33128960,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13120000,
"byteOffset": 3375104
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16495104
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 16511488
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25539072
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 25555456
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 29753856
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 33112576
}
],
"md5sum": "984c020511e0f5a270998ff34630e720"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "80e1a97d709b7584a1208497e62c41e7"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "7048e633b41f527cf96a95c47e23aef5"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "0eb6986a21da42cdabd8b6d3f688d41b"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "023d27fb88c2d6fd80f9a1eaf1816cc0"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "1353ab3cb6d3a4a31a07a1ca75b62659"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "cb93e92f8d23d43c0de04a8ba345e3a6"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "bedd11857298eb2edcfedee3272fc95f"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "e372512f2793bb89328af70096cec9d4"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "0749109230fadb6143a0cb7e4f876782"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "568950f4917df0831d4945d3c22d8fc6"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "70ea2905744d67f08b2d859e98064503"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "62bb93780a1d00ac8c238685fd0f354c"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "3346ee36772a18a2c9c9da59fa132912"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "8ad6ff183811a2bfba004616f902651b"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "4c2160b6d9c116189a09df7ddf6d8433"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "95ac980a6fd8026b117d16e5e5ce84ec"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "c9b40908da992f1c535fdbe09f6ba989"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "2f616b3dcf6246243ac4710c23e3a6dc"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "c70eb1ab4552a4fe7b335ddf0d7a1201"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "07e09df31af8624a0c43c3a30e151476"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "75d60f7a56e17e3c4097ca0089855223"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "9999f8c3ef21bf43464ac54dfcc25b75"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "4f056ced48deacca1b1c3c98af5bab44"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "d0ae3ea266bb04606d03d5f451edb4da"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "f001d928e303e36cc0772bc992bd76d4"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "a67a23002f4032504878974e73ff4cdd"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "4391b1996a8a02c832b14e6a75cb422d"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "6a437e5beab168eebc1dd6b3382b3c6e"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "0b9158701246b7f5972edeba0a54cef2"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "646d224c5ed9091e14af6f77af24c194"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "4789a689efc3f0f84dae2dad280b4990"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "909cc57bf4c0efa304caef9c1dbf1e4c"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "48e845a3a1d9717032e5302cd839d0d3"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "92aa8796bd2dccc0c9de4c5565feee4b"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "56cffebd8ac7fc418936e41d9dc2e4ff"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "3d285d47e55102776094bdcd8d09be42"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "ee36783d25323d9b16b1d6d073bb875d"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "452b17c563e43200f1ba22f3b4ce7ee7"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "1a47dace39e336e24327a520eebf7876"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "c81c3f4b0dbe5b20aa7d8bd26fe67c00"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "1d6f01346839d4ea362ce1f22d14edd9"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "869ce509b3ed87566a6e94cb2a2e062d"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "3d1635a14591979cb61cd90b751321de"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "c9513a2c1a45d3cdfabe9ada359d11a4"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "9dfdd814e9520209d85450da6bf0a5c5"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "a1a5a349598039cefea30310c243b42d"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "1482867b0a4ed3e60c7b54e43fe8d43c"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "c672d2f158bfdfd50f841a502ffa4784"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "62398de8b64c52545edeaa745e5eb7ef"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "71c7eaecb745871008814b16d23b29b0"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "549bc3ed531cdbe500c8e6f4cdc84aea"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "3234389c165d78b147bc99f4bfaac69b"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "d17dad8e71661cc412e007db8f2b9b04"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "9363a0ef375c7adf001b507c11a9fdb4"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 25677824,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9027584
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9043968
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 9060352
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18087936
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 18104320
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 22302720
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25661440
}
],
"md5sum": "770afd5452c6a3d53944d94b0c60efbe"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "149bbf31effe9cff64719dc88846a55a"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "5d3758f04ac7439e09f9ef934595f4ee"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "b3fa065039c81b0c62f082fc894cc55e"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "2a294f35877f62e7b7a32a82e3dfe8d4"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "f32df20a9c47ec5f6f1aa6b970816d04"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "912e31be61c9d20f46d2528413e9d65c"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "c7a1afb5bd261dcd99c310327fa43a16"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "919a6370adcede15bf362be1ece4e1b3"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "17eae7a05bc5ae7640ad0bc3d1df5d95"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "cb56135ea54a60cbdad20fd103dc9d9d"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "0e9ae00ea48b35ae5d602c728494379e"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "3e220187fc7129e7775e5848379e705e"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "30700064cedc05436056335e5d0d173e"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "fa93b2a3307ff8869e7886c83c61bca7"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "777a964e476b31f216de2915ed3a49ae"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "3714f6bff641e153c859bce7697a8c97"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 29052928,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 12402688
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 12419072
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 12435456
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 21463040
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21479424
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25677824
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 29036544
}
],
"md5sum": "a53834b2ef9dd54051e60faa9b011bb3"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "05dbe8e0fd31a095af6cf77725c6211f"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "b434461fb5a536892c12c876966a880d"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "d062f301d49ceb4197b0d91abda9f0d4"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "2175f44b1f71d1c2f521128405d4955d"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "e833c0d40b46c3a8b8f88cd7748488c6"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "dff5db21da22c8cd46be72a7dba3a0b0"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "45eedc95fc961184ae3c62283301d73e"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "ce20fa7f4b3ea39decb962d0dc34174d"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "01a2f99d5ead7f16ac37307716219bfd"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "23870cf912886d82c0845f000dc682c4"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "61c54346043a438a5ca1b1690b834024"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "62844e4906d274d4cf32792cb6e9a374"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "7bbc48418119a202f25e94c350b5a226"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "879bbec454efaf0c7980e117dae1904c"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "ecfc00d8f94a6ee4610444851b738e06"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "68e743a082d6db538a31df0fce31c491"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "47c47046aa040185ff51f05c1cbe22dd"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "7d02d8331a2a60f242a01226bc1f6012"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "028421c2911989c8bcf0babccbabcc02"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "68f68d725fbe5029c6490e6d6255ef48"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "d4ca9cbce2416e6ae3414884680355c5"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "6d4e81e13488194897f7e63ee9ad3fa3"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "84dfef56b62bac7015e59edb483c6d3f"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "eb75bedd3f417dec98f2a3768a0f1ddb"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "4a3c3b3b98b38b22da201d2bc67c82f6"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "76defa48c02d9b15222250f2dc1c1118"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "26b006992ed91fb6bb2a966ea7110fd7"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "f7fe13063b6f9f6d73e0c538cae0b941"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "b4825a02ea48dde54461166f3da075a5"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "e013cddae05204057d9b1725c718325c"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "edc4a4a6c947fe6de1c218c819cfcc0a"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "f9bc97ee4ba3254c6ba70e9b72425580"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "15f0032721a84a7f583952cf5405f30f"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "440a0716fa096f111d53297994272dd7"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "36b42a588ab18194e9c6b659c8eb5a49"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 25677824,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9027584
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9043968
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 9060352
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18087936
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 18104320
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 22302720
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25661440
}
],
"md5sum": "23bdcd715ff5b224a93ee45a44a3bd2e"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "eb8b6ca86b17c6c819283b3a5358bff6"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "d349a755b422a67323dd02ef5165501d"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "2ae96cb72d5e197f91ff10070591d90f"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "39842a828b971892a5f32cdd94fe1a9c"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "b5338450344878dc44665e2249f75324"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "9574ed594799a1de3646f9ed72ebd59c"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "261edd13bbeed51315b52f9e18530d24"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "049ff09c19c708d8f82d627d538887d5"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "dde0979fd9196bc3f7b77a7ae6f17930"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "6bbe79f1518c24cee5200c54f75522b4"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "68d9ae9dfa0eb943f8dd5f7d03281694"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "6019afff0aa10835f76f4a5a4172acaa"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "59ac88d412035f1fbd62e88f9c5e7b93"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "b1afe87958a3d82853ee7b6e0fe75064"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "af8a77850bed18a2ea9fba458bac3dc1"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "a83090b8e79c536ec1d4c8ab443c3873"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "6ada7e80ebdf3f84d2d0a6d97cd18d1a"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "6553f1975e90cd4b3d990cede97f1893"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "a88e77d86ec197b0b35c067b3befb9a3"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "7c3e43ac11317922de97766d005f3a9c"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "e72ef0375da604d812fcb6fb0a8e6032"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "4f582b6e40179ec867cb569e2904d168"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "ce3165bebe60afbe168180b5b70ef8b6"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "a3855f08c3ed391839c84d1392cb251c"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "a5097ef02cf60fb2896fb7fbc76ceceb"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "ba63c5da743428a68db79857582ba921"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "f290e64c06e38b15c616074e709a3d6b"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "1280d25bc385d134b8b4ab101584b62f"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "3031c9832ba9c0a9d90d588eb03fc249"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "f6378dd0f40dceb8cf6a8a73f0d8e8f3"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "08c6bd7594831f9146eb2be057adc59d"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "3d3b4247700d79962d5653789e00e145"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "c872885a37f3fd11c165026af428866a"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "1d1d2f645989f65d30e478ad291fcd6d"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "dd18424966968c28619b83ac0abe3e6d"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 25677824,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9027584
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9043968
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 9060352
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18087936
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 18104320
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 22302720
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25661440
}
],
"md5sum": "dd280befaf3a5684d3abd761612a7c51"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "ca27ba6515d02eb7bf038a8e2096440a"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "14306488e16a967e3f1597a395b95ef9"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "7523db6414744d3cdf751ac4cad8a300"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "c7ccb75866898422fa7dceb1359a1c10"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "620f94220add60cecbd30d117ac98863"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "ec8cb0aed1a81fa455a43b2499fea3c3"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "e89bb5ebd8ab65d4980d33d2e1edbef2"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "0aaa2112822e7570859696fea75a6cf4"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "a3debe2f1a06138931e09e232c02e90b"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "74fb034034e248480bca56647e282e16"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "06b39ce0fc34565649e398fa8439b93b"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "96a176541f211522f567e9b8316de470"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "01bbaf476ea4368900a8f57d5b140214"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "6fa9860f590d5b39170efec35d1233cb"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "85ffc86403233ab4c0c68a195f9ee9c5"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "8c588a4e8b943738591292eb866d7696"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "438b4cebfd20dfd6d6ee756f4f18fde4"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "04078751c58b092a7e64eb68005e4e63"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "424491ad8730d3df18e554760541434f"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "fc95e5f815b5a51503c8be32ce5291f5"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "99c8ebabc910161bdab126289a1e99d5"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "486b1f891c0a8420d6c9e15fcab249a1"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "cc0f86424e498977cc8b27feae26a842"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "44adeae3664e5449b939a0345f9413d7"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "5f268de22fc4c67c2db8c5ae44c4f9b3"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "c000330f7b109f3f5760032feec3a937"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "d11f118a721a49f12a6fbffd73ae024e"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "a13872a78ac3ba4ed55fc306b4e70cfe"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 28985344,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28968960
}
],
"md5sum": "a74f851f2ca8cfed4c071c890e972525"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "6ea85f58cc17e46b790ce5b3f000b0e6"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "9f945c156e0b0c283fc7908e30e70f91"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 18053120,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 0
}
],
"md5sum": "9682c40005dda0f3eca8334e36ccdd70"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "cf08a784973fe09be8b4f89157c8b6bb"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "a75f7eaf058517fb441dfa5af425e4fd"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "47ac235634b2f5400fb97c6732f417fe"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 25677824,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9027584
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9043968
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 9060352
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18087936
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 18104320
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 22302720
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25661440
}
],
"md5sum": "ec39c462970ebe4ee10f5ee79576a79e"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "5868698d5c96fa28c20d7d199088bf83"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "c7f5d8c69cd3f55d6ac30517c51f3ae2"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "29e2bbbfedd140d4948e6910bb014354"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 31295488,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 0
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 9027584
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27080704
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 27097088
}
],
"md5sum": "f472c32a1adb655faefc09e220241442"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "f3cff3bc3bc2ae4ba5afee4202b481d8"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "7d79d7c8a9e50fedb64aa84e4b2cf588"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "c4242f169324f511a0373d752457f01b"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "7a78eb6ac0e86c2e5a77a72f30d6d7f0"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "5229dca6884f6654bc9e3abd7feded3f"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "3131da72b52111f394cdf2d1562aa625"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "92e5530162399b4ad126dfce6c1c2bb6"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "3ac36c3308c67f324c2fc9487c62b728"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "d789488f4bb9eb6926f92ce6ae95c4c5"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "4853f8071d659590ef357dd6782c7c09"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "7e288cb4dbdd57736df289cbd57cb42d"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "efb8406b8410c9d74f06349e635b8258"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "7f730617c8eae5741b25b4f6d97941f8"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "6c2ac9903312580c493c566eefc07df3"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "49e9fc7c8cf3b161b660d28dafc1884a"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 72220672,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
8192,
2204
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 72220672,
"byteOffset": 0
}
],
"md5sum": "4702d085f2757cc773efd691652d3852"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "f6126c71aee4ccfc72eaae805e1d037c"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "cfe65db297c32a7a174d8f9897cc8c0b"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 30472192,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 3358720
},
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
8192,
551
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9027584,
"byteOffset": 3375104
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 12402688
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30455808
}
],
"md5sum": "3170d9dc69b4fc226ee26f6c0d027ac8"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 31068160,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 0
},
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 4198400
}
],
"md5sum": "2cbfb23026c0d87cce43b03319fb33a9"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 144424960,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
44032,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 144424960,
"byteOffset": 0
}
],
"md5sum": "090d056cf4438b4a87654a585d39f184"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 33587200,
"records": [
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
10240,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33587200,
"byteOffset": 0
}
],
"md5sum": "4c73af4912a8783d675d8ca2e51241e5"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 26869760,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
8192,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 26869760,
"byteOffset": 0
}
],
"md5sum": "ac8a7bb01516a3c5040722f18baec4aa"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 28968960,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 0
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
44032,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18053120,
"byteOffset": 3358720
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
10240,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4198400,
"byteOffset": 21411840
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
8192,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3358720,
"byteOffset": 25610240
}
],
"md5sum": "f236c60dd4577aa5e3b3b7c9c082822a"
}
]
}