{ "metadata": { "ParamSize": 677, "ParamBytes": 17174622208.0, "BitsPerParam": 3.23076730356745 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 155582464, "records": [ { "name": "lm_head.q_weight", "shape": [ 151936, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 155582464, "byteOffset": 0 } ], "md5sum": "6d2ea4f7c95de3a40e76414173a77466" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.47.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6e13c79fc3f420e761646ea5310b5b90" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.47.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "3de673099add932260e987e19e77c33f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.47.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ec8e9bc0c9e785bae163fd7a7bacf2f3" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 155582464, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 155582464, "byteOffset": 0 } ], "md5sum": "21da87578e5e16adb6060f1fbd9bc0c5" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 19447808, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19447808, "byteOffset": 0 } ], "md5sum": "b8e7d3aff0d235cddc290614e1dc0a7e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9db0bda105770b186cef129b66763064" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32047104, "records": [ { "name": "lm_head.q_scale", "shape": [ 151936, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19447808, "byteOffset": 0 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19447808 }, { "name": "model.layers.47.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 19451904 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32034816 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32038912 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32043008 } ], "md5sum": "472da9830b9ea398709adb33735262af" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.0.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b64ed50af2fb9aeba0c27e614807fb28" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "32d1d9ddd1254a4e7db7e1681e7ff29d" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "24b27b0a80ccb8665bfe0bb4436ef390" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.0.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.0.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.0.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.0.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "a4dc3af1d0c17252a477d5df443b34b1" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b954fb3c6af820a18c7cf9f3cdfaef6f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3b6b71a92454847a829c4a05af222712" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "597434ce96e7d0d6a39b73d21a407ae2" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.1.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.1.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.1.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "1f9719dd41a634a01015d4ded3f498b5" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b0429de8b476ddb9b218d033e744e5a0" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "92cdb0f51edb16626d1de8a5b6b3c176" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "28f670e6797cd06d2145ac40d38a850b" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.2.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.2.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 } ], "md5sum": "b0351f44d4dfca0c40f2e3449b835484" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "68e74e81ad3bdb35bc5dc76fbd9ce289" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9e59114be51dfbd09f0ea220f5616838" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0f45703d7a8002752cadba6e695c0530" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.10.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.10.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "c3982ead81ce19b845eb370889418039" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "91e6039a1d57bcff71f0d5a877b0838b" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bb2014245fee6373dbbaac5491a31f8b" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e22bae5e586ae213e367cc44ab7d0042" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.11.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.11.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.11.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "60fc2f4a82a4f4cbbf3cfc8772dd57ad" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "06d52ffb5b7ec4de887fcdae240732f4" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e55639a475c903b1dbd9b47a48b94861" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7c1dcf9acd1a4e618dcfac1b5a5beea4" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.12.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.12.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.12.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 } ], "md5sum": "8b6c7fd78aa5e5e02646a6aebd1347d5" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "9fc70e5138cefe53808ae6134fffc1d9" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5d633e8f6acb54a57f9d849b58e9b850" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e46a90dd6d847a176e79a993313ee00e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "922daf1b11536831beec4057f1c32aff" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2f25b1d32fa309177335a188406ef0e7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 31232256, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12582912 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12587008 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12591104 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12595200 }, { "name": "model.layers.13.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12599296 }, { "name": "model.layers.13.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 25182208 }, { "name": "model.layers.13.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25313280 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25329664 }, { "name": "model.layers.13.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25333760 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25334016 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 30576896 } ], "md5sum": "cafe0ce393d6a0bcf324ef255759a002" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c2fee1fd74c3d5acad9f67ebcbce9cce" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "40ceaa96b1049c4f91f14d065b3d7d15" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fe13f3702aafa0a021464c3face61f62" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f3a9bc597b1b45c832c60c4cdd68e4ad" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 28074752, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 4194304 }, { "name": "model.layers.13.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4718592 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4718848 }, { "name": "model.layers.14.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4722944 }, { "name": "model.layers.14.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 17305856 }, { "name": "model.layers.14.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 17436928 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17453312 }, { "name": "model.layers.14.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 17457408 }, { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 17457664 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 22700544 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 23355904 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 27550208 }, { "name": "model.layers.14.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 28074496 } ], "md5sum": "3bf13f7227e91df2a2accb2b200598d2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "e7915144926fc5d3ee6839303cc1921f" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "de1d8a43592215a1cc1a57517be71cd9" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "40122e47e67bdba27187ca24f4082d30" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.15.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.15.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.15.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "f8b95a5d0df3e7881baf940358c2547e" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "61f1fc8feb2f19f7b9f4bcad48214fc0" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "78df7af5f0646a04633742d56c5979a9" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "998cb28ec805ebf006bcc3cf86f3dd3f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.16.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.16.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.16.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "cef7e4d2c0118799a125df8b7ceeda15" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b558fa805d47280a259abda5095236de" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "575dae89a81c2c174d63c481b5b8feef" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4326a78619599b8c3bd20236b3f52f2e" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.17.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.17.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "d9a04d1df21b3c41f7a1cc7890a0deea" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "6a71a1b826496b6f3c93fdd97b08cadd" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c1fd8134468695f53a98cb8886dd0b87" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c7724aaf4295f57ec73a31e12de85520" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.18.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.18.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.18.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "50b62560e07cf74687e475dedaf384ba" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "548bac077c1c86342c984a3297b61dcd" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "08ee066d2231fdec0c366f07cbcea9b5" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2c6c95d15e3f7644b071fab41fb4a11c" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.19.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.19.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "da4aafbc1598c03a6b97d0586bc8bbbb" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "3c2da17e5d37799cd04359e9a8afd4f8" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8ed9581a388555f22f5db506f33ebd8f" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "047f0124f450cced2e83b892f2d61bb9" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.20.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.20.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.20.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "285ba6db99feeeda674209127e5c3b02" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "301adae24499978cbfdc7ad4bf4852c1" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "04533ee6b3b7abc6e32cdd2d50f1f1b0" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ab2675b76bcd28c3ce7f89a218dac94a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.21.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.21.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "d03574dab928a41c70a757899c3ff5f5" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "5b99ab367c0d36e7f1cfe5fe2294267d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c2d7aa2bfaded7e6aff3052d6bc5b0bc" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dc38b614f941ca28e48a5e0c5d961bb6" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.3.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.3.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.3.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "366f9ad4b89c6105f7a0e4e8b08aea70" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "27b01aefc75badc8b087ea040a99478a" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "be6fb966eb0b54f0446ce0facc54d95e" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5bea60c82eba19f5b1387ffbf3697553" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.4.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.4.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.4.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "049ba9438ff864dc4184c9bc6454c959" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "8f2847e075b0e3f2cda1951726bdc789" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "272d51f1a51da5b6dc366a11f0721023" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c8a2ca05de25faf24b0a443964c0df17" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.5.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.5.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.5.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "4a485464030866fe192fc0b3b59eb4c9" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b3bc888d3ecd16ad71826b7cf8ffdd06" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f55c9aaeb4b677e4a2c977c7ba2ba2b5" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4584cda4f544d7eb1ea307e4e8901f3e" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.22.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.22.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.22.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "b20c8030c874d993d8c08a984aa16628" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "46bcf914c5389a5f2c2a8f4ea9fb0c92" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "69680da3ec5ebaae20ff11777ba2bea1" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "95de708b20ccb983a8cd70cce732e813" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.23.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.23.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.23.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "e5b5ec13583b66c20f0adb5757fd0ae4" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "f44a681dd7d03857acc86a923df9348e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1d0a8ae31f755c8f62c4328cbc80aa47" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "237b2dd73f1e4036186f707fee09ef5f" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.24.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.24.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.24.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "8c4eef3b5387a5f21d84d5e3a8cb7abc" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "18420be4e138fc1e95e36fec72bd7116" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "189ce52f8c15e00db93ce956b60db0f7" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b42728bce1844ea6b82d27dbbe26be73" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.25.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.25.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.25.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "7db2c3212522865d9e3e0217b11d64d1" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "44c9de32d67c613d2bebf29f1f80e846" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "105d59e8c3aeee7c7a9905953d735c7c" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3d3e6cd2fe5524e80ea4c5c684b66c73" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.26.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.26.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.26.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "9f9d8071e915322b5b6dde5019fdced2" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.27.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "87e8ff9e842c2f7567cfc4b60f69e9b7" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "94b82601395e07e167c8585382d1390a" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "184dea4f7138594134fac7848d7fdfc0" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.27.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.27.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.27.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.27.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "606524aa869663e6ab05f9207ef260d4" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.28.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "1a3f8630625d8e98544a04f55b939c8a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "168bc6ac088fcd357617512ebe3fba95" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e2cdd0e9c201fe9c870d484eb7f34391" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.28.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.28.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.28.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.28.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "a7a5d2d670ae5c39def9a85ae98db6da" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.29.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "e3aac8f04817abcc8c693c0c34f85493" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "691ed7708ec4e788185fc1734e6e38c5" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "50f6d65f179d2b549fb9635d7308ce80" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.29.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.29.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.29.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.29.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "7df4f4ca90b422153a1f491d80b2832a" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.30.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "fbfee96bb57e4999ed6456394f8849bf" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "18e5f4d1eba9ca58464608d3262d2f2f" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "14617ab652b37ee78400b6b0a25f8c42" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.30.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.30.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.30.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.30.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "5cfb421f49f7afb0840ea62cfbfc85a5" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.31.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "45c17230a9e91631b9bb83dbc2c22530" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1cd8cd19beb930986dd2347302edebef" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4439c4b12fdf64548f75965c12e078c2" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.31.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.31.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.31.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.31.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "23670cf80a15ab674430ff322312b1df" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.32.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "e2562e8944e4a2b602697852e7c1c683" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.32.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "69df169ac7ecb3f545a19758e6c59e4c" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9396488283ef89fc3db8f7168f5a691b" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.32.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.32.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.32.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.32.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "b9a3ad96fc58d66e7f75771aff446fd2" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.33.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "c915bc095a1957aec2c8093fca2fe41f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.33.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6bcfa953470264c63b30653034ed6655" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "deb51d16b8d81864f2c265973224930a" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.33.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.33.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.33.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.33.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.33.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "833be77557aaaaec18ba3746234167de" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.34.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "9616436e339c351820ec54a07aa47b7a" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.34.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "695b807fc68968d9ccfd83175ef05a2c" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3d73661c4f6414523264a9d648993c80" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.34.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.34.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.34.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.34.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "749b1cd0d1a65f233408f1bc07911440" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.35.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "0376e2e1c15d7a3763ff0ebf686a25aa" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.35.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "eab917e19579773ea1b5162a512a40c5" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "585ae9e3862a9f7d44efec292f1b435c" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.35.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.35.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.35.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.35.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.35.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "bbf3ad5bda1f427c2dac415700ee7afc" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.36.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "e1b77dbec9b33bfe0295409d878a75fd" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.36.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "47a17b4b5bedbf37cd6cd02bc552be51" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e9316c9d4d38faff633415835fd906e9" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.36.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.36.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.36.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.36.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.36.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "99b7980f2a156a9fbef5e22ea65801eb" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.37.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "76d75cbfafcaebeee3748c51e7f80ef9" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.37.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8b619dbd2b6aea099b22f338010eeb51" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f75086273c71bd32cd37269886de317a" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.37.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.37.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.37.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.37.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.37.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "1c92501eabf522814f049c0b79fecf26" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.38.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "a0be541b328561b88a05982d05cf1a2c" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.38.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e9afa042c33bd3d8d9cfcf71acd4e613" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a0bc64c191ec881c9103f6485c844216" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.38.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.38.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.38.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.38.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.38.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "979bb1edc40691a4415f51464311465c" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.39.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "4ed6fb8469f622fc8c8224b5c2b89a0d" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.39.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "09a55e33393c66c71fc1a4a7892d6d59" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.40.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f3f416486c667fe90c0144904bd8c4ec" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.39.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.39.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.39.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.39.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "e2ee46baa3b797e2801bb806695de20e" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.40.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "066bef88adbac0def8a27239a6585e28" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.40.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3697e3dca7549e2675459c257fdd6492" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.41.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e65de28b65a93b5b861fe276a0bc9076" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.40.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.40.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.40.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.40.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.40.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "387db339878e3531a910c9a26be097c8" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.41.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "3178f3d44188f3dfeb8157c2edb5fbd3" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.41.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8a67489d32c50f740dc3f0753920c9b6" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.42.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "300ffb618dff7314bf9aa66e3304d2ea" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.41.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.41.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.41.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.41.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.41.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "551cb6ca31eaa612dbbf63d788c02aa0" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.42.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "14af0aa9b2c1d07f777bc7e97644055c" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.42.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1e300c1bc7ed8878a204936ed80a24ab" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.43.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9307e9aeb2087097a09dec2e9aa6646c" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.42.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.42.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.42.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.42.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.42.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "bba85c9231a3e4df1570905b73cadd74" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.43.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "302e8ac94ff446d0750a34319c770447" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.43.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "55059e3d12f9694bcd0319312b675a66" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.44.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3f8495cddc9bbdcb2bae19c8623f2be0" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 23351808, "records": [ { "name": "model.layers.43.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.43.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.43.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.43.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.43.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 } ], "md5sum": "b6eee69d96c299c1bfd77afa0dbbedee" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.44.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "84cfc6de999685a6fbc8802289083737" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.44.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d504a72b1f06140880c78be40179e1ee" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.45.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ee9a2cf7f30498c304f748e4ea200d7f" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 23360000, "records": [ { "name": "model.layers.44.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.44.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.44.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.44.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12730368 }, { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12730624 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17973504 }, { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18628864 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22823168 }, { "name": "model.layers.44.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23347456 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23347712 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23355904 } ], "md5sum": "06a51d21135b1c1f78adef743e43e7c7" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.45.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "5264d4175a3507843c6f73ef3e15f601" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.45.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4a461f2d1547810c93fb4ae3d5109a88" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.46.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f0351f82b3f8dbd63a2c785e689dee21" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.45.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.45.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.45.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.45.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.45.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "0a049fb298631c5fa2e93404528fd0f1" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.46.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "0817bf9c803546c12c8f69d1401e7538" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.46.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6196222dbfd96688c5b0a14980e178c1" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 29397760, "records": [ { "name": "model.layers.46.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.46.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.46.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.46.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.46.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.47.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 23351808 }, { "name": "model.layers.47.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23482880 }, { "name": "model.layers.47.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23499264 }, { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 23499520 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 28742400 } ], "md5sum": "d126739e90ede7332936da5fa437b7b3" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b91f318a1f4d17fc93f54cd994c1d7c7" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b3841785f8b1a8131558a5b74d730a99" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "93d59da49424815f6aab8da375af31d0" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fbacb0f0d13ea29ab68c955e85a2bf30" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 28087040, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 4194304 }, { "name": "model.layers.47.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4718592 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4718848 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4722944 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4727040 }, { "name": "model.layers.6.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4731136 }, { "name": "model.layers.6.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 17314048 }, { "name": "model.layers.6.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 17445120 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17461504 }, { "name": "model.layers.6.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 17465600 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 17465856 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 22708736 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 23364096 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 27558400 }, { "name": "model.layers.6.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 28082688 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28082944 } ], "md5sum": "dfe539c75e8523f3daf047b94ef56614" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "8999ac18490caf240133d584c836f7a8" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "723134c7bc765a5604f0f96fd647017f" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.q_weight", "shape": [ 128, 2048, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fe0a379a1d5691b1d6cb8f1cb24d6134" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 23355904, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.7.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.7.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.7.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23351808 } ], "md5sum": "81c8ba0b992def42729fa7449e5bac5c" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 1536, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "36e325a4254194f809c4fb06288657e0" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 1536, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ba219a1d1012605a06faf7391f061b69" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 29397760, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.q_scale", "shape": [ 128, 2048, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 12582912 }, { "name": "model.layers.8.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12713984 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 12730368 }, { "name": "model.layers.8.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12734464 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12734720 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17977600 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 18632960 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 22827264 }, { "name": "model.layers.8.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23351552 }, { "name": "model.layers.9.mlp.gate.q_weight", "shape": [ 128, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 131072, "byteOffset": 23351808 }, { "name": "model.layers.9.mlp.gate.q_scale", "shape": [ 128, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23482880 }, { "name": "model.layers.9.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23499264 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 23499520 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 28742400 } ], "md5sum": "34c8a3f797e028785ec393395aaea35d" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 4718848, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2048, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 4194304 }, { "name": "model.layers.9.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4718592 } ], "md5sum": "4bb07ff3fb60d8ebc0220a4a3976ea14" } ] }