mistral_finetune-MLC / ndarray-cache.json
HyunWook An
Upload 62 files
90ebdbe verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4073857024.0,
"BitsPerParam": 4.50042279387851
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "00f4a306fe7996e4b12a167974949681"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0525a8dac39473541d9e762efeb5f63f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 31784960,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 8192000
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 20774912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 22347776
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 30736384
}
],
"md5sum": "eae2d0472d03dfa53592eeae4337e238"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "299a9caeb48663ffe739caf7b4b7ba91"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "f2a810a683d95b0c5f463678ac96dc45"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "10c20ca482a4c94a2b34096ea6770ab2"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a279a38603c5d7bdee281608f70a4348"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "7cd356f12d5338d7d2e7be895ba459d6"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "525d17c0245d1fab3051d86148439214"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3f0d2318a464274ff3ecff2f72094f47"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "3cbc98348b08d568b99eabc51262c751"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f59dbea892105e31b53942cd78b80948"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1a1099653ecbdd3b64d137cb4da0118b"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "88d355874044cd610f6dc75a951c86e4"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4515ff27a1eed41f065cf4c1d55baa4f"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "460f7a230bbaf6c7dc073c177023fdbb"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d37ad63df9aaa4d39869fc840cd48d51"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "7ff302024e7ccaa0887fdcddbdd0ceb4"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "df47195900f10118eafc9dcc95892a19"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "dd0b67596bda55b305e51b2bafdb0e35"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "3fc4e2c32ac09692a7b6d520b032d848"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bea41368bf435088cea3a93df6c9d1bb"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "35f39c83fac0e2d1e92f9fd74ae17c4c"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "50cc69084292a4fe49fe7f84eca7fd5e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "583a754121d25fc673681ba13fa0f9ff"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f89a68ff1f3780fb119ebd95357bae90"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "5f11b36919ff231570fceada16bf2e9c"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b6bde9bf76e9dfc30fdeef6548eed41b"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "74ddd8692325cd08afa9a2afbd1ff9c7"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d23dab60a3c29e403e9b2c53c9e053c3"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "02d8367307bbdb155543a27abdd8376d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "af024660f5dc4b4ff74338f36ac19fa3"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "08e04a2937e885c9be7aad69e3d36959"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "413e8710ab365432a8eac416100c80a9"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "18fccf989c6bf9fb12d74c4bdb9e776c"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e21a781e8e30130568a01ad3117f987a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "846eb5d47e250bce5787928c0287d042"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "43373a91685f5185ed1879b2ea4276e3"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "be9029495dba136993e14cc9d3ba3339"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "1c5d9b378c2dcf44bf9f6903036993f3"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c332a03974e9640452e075bea35b24f7"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "e4808f7c84dd9d4eda305dcd4365106f"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b21669d3c82e532275a1415e7a6351ad"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "85b4d7212cd70cc747f774b29e24604b"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9bc68664fd23171440af816b12640586"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6d6786f77a498196dace5db5eea5ea68"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "21eaf302996a5ff211bedd71a169a720"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "63846f6d5a608015d9526d3642dca419"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "36b6a3c842ff73b8d347aebaba19b4ab"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "65c6790bd66f3a9e3b9ad02ca8fb44e3"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fab1b99c649fd8b3aa65436babdd0c8b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fa44045a614033b1870d10c4e4943ee9"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "1aaddaa0eb9e3b8d5d5a5098d633eb8e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9a93daf248ccd90658e80ee0037d2742"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "724edd909e60ed15d74511c011c2ddff"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2a842494afdea53016514b550838ea38"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "41d9f6c127fbb51dfd4aef0784d8bf4a"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "02475c5de61aa27696cf043bb39f6589"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "225d923cd3e5801e1db3b2f78d429fc7"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "1bda9eb79b417fb8a71747d7480a83ae"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "43e93693355fa9b291c94e6016767e64"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1a5a8ad34377fb6db504a9bb2a186375"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "0c3aa518b7b76ad1ee7a31337787030e"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "223ae3406c18e7fcc00d645cbc3a7a1f"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4d9b40ff7fb1d6ff74a47c62bf2e5a3a"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "e0af20ad93163de8c5dd421992c9e176"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "133f06a8b27b50a9df7a9b30da4622be"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "491657fcf02b1a4f5be098d4bead3b7e"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "aad24afb39ad2a5cf958d6d71b54ab63"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "fb29c4ac94ecc8820861c9c92fa6ecb6"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9594dc08843f69ebc1f0b9c8856b34c4"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4b7bf93128a639f24caa8369aee406be"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "e6c76418440d746e7c0a21c4304a933c"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d7bf7718ddb239c283cb864a4c6cce61"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "450a9e2a7a172f23c6edde993263131b"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "4526604adb7e3ddffbf711f0e6bcb843"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "01f5231f38efbb46acd2f3521391abcd"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d33e0281f9908c09fd688347da9f36c2"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "25ee572b7d8039fcb7e3bc0bb415b205"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1e2eeb17902ecb9b8a245647d7bfe388"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "9f97a019512ba01578129153d27d090d"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e2390fd0fa81dcba5621414865d22ac9"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "77db8f39eecd72c680b94d32c07346a5"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7a9513445997776af2248b63afc2458e"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cf646801e7518a1ff8a6b1db2b2ed904"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "9ebeba5e8305f02ade442185fe334402"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a36afe2195638f459ea9adf70846422d"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "89866e283af575211ba9e13400619bf2"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "0ae32daaf46c5e50d16a96740fc87d56"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2aa39be07393fafb8654a2d0d159451c"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "416544ed42e36d5e10c165cdf9c26a09"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "aeffbda6ce531e2086184f6d343351c3"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cda4c93f180ca7c1319cbb673f54decd"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 27279360,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 3686400
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 16269312
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 17842176
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26230784
}
],
"md5sum": "7a88dffe1ebcbd2397c01b3cfaeacb63"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "31496f31ee8df21db32f090d82672172"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 7340032
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11018240
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11026432
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23609344
}
],
"md5sum": "a65f973e59e05b19220d770cbce77426"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7cd8570546c6b99c978e32293e99f149"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c19bafbf86f10d5f4d5003db8a5618bc"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 9437184
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16777216
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20447232
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "7a29dee29f241c91912e89ee69f4e1df"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b2c6645c1d3dbfdb0c596ed3223abd25"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "01a574703ef90d5e93d9343adde1fabf"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 18350080
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22020096
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "74ae41af991e040797142ba3f5bb369f"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "dd4b8aa7a121032b28c645bbc6546df1"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ae83f7ac4d4419b60025fd14b4df1383"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 30932992,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 23592960
}
],
"md5sum": "528443cf0cc7df7b35da3a7880c0b6e9"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "12c0ded503f67e8b140ef2175916860e"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 11886592,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3670016
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3678208
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3686400
},
{
"name": "lm_head.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 3694592
}
],
"md5sum": "4baa6b61e87b8406d123c2a97a7ce467"
}
]
}