JackBinary's picture
Add files using upload-large-folder tool
8510756 verified
{
"metadata": {
"ParamSize": 451,
"ParamBytes": 65527752704.0,
"BitsPerParam": 13.92728514370428
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1557135360,
"records": [
{
"name": "lm_head.weight",
"shape": [
152064,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1557135360,
"byteOffset": 0
}
],
"md5sum": "c11cf3478e75b671ce4c41390a4b0b7c"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.58.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "08382f845f3c178125be02e28cce5815"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.58.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "1991da3ddf42fb8cb8a7e25eea939308"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.59.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "eaf699c19614ed5b28bdfc3bcdfc59a0"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.59.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "078d5a2669019619cf7ac7024b7929bd"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.59.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "86353f7ce0710142ccd2bef37c49e01e"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.59.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "75b54e342b2eec6ca88023483a946945"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.60.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7a7e7a72f451b9f5d5ce2793b240c629"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.60.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f482bf2553c5f67f3cba12e6b7cab478"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.60.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "2ee68ef95fe7655ed0b52651a607d504"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.60.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8b077e411b1dfaa8f79bc16ef3a17fb2"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.61.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "ed77c541c31ab8829d13f560ec5f4355"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.61.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "85ad0336a80031f1419dfd410c94f652"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.61.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d907d7e411e468bdf2dd9bb34c0f49df"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.61.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1856554a4c5e40ce53673f05e94ce1f0"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.62.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "192e46b225038c15aa11137a0233e4f6"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.62.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "55c5da594f982688dfa871ea04c08967"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.62.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a2e6142514ab2389b026ed5d2b173a5f"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.62.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "933e361673feda3e13b79ab391f97852"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.63.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "452af359f8f2e7c57cb733e0b6c0c4e3"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.63.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "721d102f022c882e2bf4bb9bf3033600"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.63.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "459aa217d75dafb2144b141f562b61eb"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.63.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "440a42b547dc002eb3a8bcd72d575a74"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 1557135360,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
152064,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1557135360,
"byteOffset": 0
}
],
"md5sum": "7e52ab5dfe8e6a239b4720eace189e98"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "ace91badb3d3e1e4266ed959d1fca6d7"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "0f938aca4f9bfed6ff4058bb2d0c9335"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "12929ba2a9ccc0cd538dad56e0a3f596"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "dc2aeb200d9470e87d7f6f97fe144b3e"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3e3203142858a5f212ba3c6f7dad182d"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "434d00a426c23b9eb524f147a9bf350e"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ceb34c47eefcce073c44854f5267f733"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6e9801985d11c36d2b6f87a25b4c4c61"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "a44584f15f7a2210d580631f08a4b0b7"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "3588fd1b9597394c853821e5284a4ae4"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8cafe41db7db28f3004a517bde1c4ef6"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1cdcc1dfe6301092599532f9a4454c46"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "9a6ebc4223b9196dc92f1b6d84681d95"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "af6a7733a525b4658718c5316f512e2f"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1b0e5ef048c353eedc80754cc00b669c"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "dd5497f813a11bd7446659ee343c5c58"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0be3d921d8cd7c9d379adf939280b37c"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "24840caccc52f2aab6077a1bed9d84c6"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f963228f4897d218ae9b4a501bba63d2"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ad9fe2ee932f296fa83642b6af3940c0"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "90708c54f8d552917eabdafe41ae64cd"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "1ef39a88e3683c6c21a1fccc3e3c0efe"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "cdfec41c8f527a5d6f8e320be8653511"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6e70cdd636d8e3cb68aed158e6a4e5dc"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "67114310577007465a066cd8f8524b88"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4529270bfaa535a30b7f41d99f58f10c"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f274853986038be46388516964f6a29c"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4774986aa3f2485bf14c00881b1a07c0"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "9525f9ad897375b271276fe3b8f27fc2"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "0810a15124713d0a8198c1857006354b"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3b0015f538d7f2e069614abae2cb3587"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6800a07f49887e3b0473ab877d0e8393"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "2c6d616a3ef37e0023605225b2dcc5a6"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4ec4e1a92a8b73b669475ef8525384dd"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "17472ccb7af0a4ab9cc0b87d78a56b6a"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "a9d153badd4c4fc756ec25ff03c80463"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "77315db4c38410ec75241ef0106b83bc"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3f40fca1c532bb4ef4ce29617388d3b4"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1ca5be6a5f55c5a87ccfa4d435899d06"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "a96970618a9ea4733bee834fd27a81c2"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "3c0dc95dea6f6f7760fd7256a95c234d"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e8a8c59c19d2ed2ecb91611740690436"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "344be3d1fe17bc816f4e0dc9b349e8f1"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0212b22ca7c4b1d36f6270b0bb7be03b"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "8de04c1797aabaed325abccc6cfa9176"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "01d76ca02a55946d522171e853cccc78"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "12ba9c40c0d76ad4bf0f337822c2be57"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c9e6f2659ceae052b9a34dfb7d48515b"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4cebdcff986138ec67b9df88fa332dd9"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a53ecb99cebed09207d71fd040508d03"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c4603556f2b5caa335c76d2dd3928588"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "91695f78b2f9db83b2cfd5260ee72ebd"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "60bf6c3219205a0b58439dbb11efd3d9"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "0d48ef3eba0b702cb03f9caa9c2a4df2"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1f8e90978f80efef4d7bc9206183cacb"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7cae550b3e33cfed275fa9a83c367c70"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7bc3046c68a83dc4b43cf1d361ad1f3e"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b77f5e900843b449278461a0da57a51a"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ae8d93aaa16d6567303335fe5444b589"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "ae0d84fc84c78d6235e5f51ba033eea8"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e126e95b7354ee248424ab334a4595a9"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d4fda2fce1a8e0120ce613d4d6c5a45a"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "14e202f0f72d5c67f6fe338a1ddbc356"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "92f32b0b5f17e56f074f11cfe7e1ae3b"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b0625c80eeb0ca915b80106d0604e1c5"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a1bc3275fa3ad4a199e998cf457d8029"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "475330ea0f3a0d42f462d2b2ff2c0696"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "056ed00ec6850da568925efde53ffadb"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "906ae0628d713939a32222aa48976ab2"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5efd524b3d4a277dd124be9678f94e2f"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "407352f0c790ffe7158bf962f2c861de"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "20a3ad27e050c04d36c8a4cce4f77aba"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4b724cc8354dbc4a357ba8528725c77f"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "53e7fe85adc3cf2a9653659e3595dfb1"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "af445d043881e3dfc8e11449f843198a"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "95667c4db0eb3e7fe613e2f31817cfbf"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "590db25395696f506482b182c2ab0f60"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b7dd4948e858d6fad56d9c981ed22adb"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "86b918ecf2fafbf5a9e0db700f00af6d"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b66f6698ebee824a30983f7d892c0c65"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "08daa0954c34852a19aaa097462d74be"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "0f8c9da832c7386f59b9b6e82fddde8d"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "437198888a20303cc75635c6893f5473"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b5be96ee728cabf8c1e65adee4b468b0"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d19f831287d919e1f4214f6ddbca6bc8"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "3d931cead84ac8b60ad435d75cab4e4c"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e93e96f51c5f8eaad4e4eecd5303d7ea"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d83dc9e1e1c7a129e41739990cb38389"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "424fe40438e009e5a20dc4b1ad96f772"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "08d97d5daabb4eacab96e51441ce945e"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "91ce9c9d7a3a221c81d2ef82595298d7"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "942e77205affd0e30b9429a88c1518a2"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "a09db7c3b5e6fb95255b045ef067fe61"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "6e7c42f01ff92660a13318a6a3785b07"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ed08e7ef2d7bf43bf13f39f083b3527d"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0318886b973422467f8ca511bcc9a668"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d813e3c67148d223d17b81960416b88b"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "10d0a346373d758d217f64cdc2adaece"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4e245cd2e1968ffd2ca9bc229a6ca055"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1033a56d50496d8adf14ecac2db7758d"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "eb56b0aa69756e3882665717cda66219"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "93490233219d2f5e7a7cad76926127e7"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f9b00317a840bcf6828d29a58732d17f"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5d225df46ede9fe84e5caae1d8875837"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3078c5de9d190052c630633cc383f4e6"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "2c4a91bb75d35d793eb4199939ca98f1"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8fc29644343cc88f59ac2572e63ac594"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c2d67f7fbd78c58828a3e07d0d57aa86"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "15e43141672697e610aea6fe7ce7f3b5"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "a9c76bb50b75080ba00b4aa98cdcff8e"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "11b8fa0417841b860933a4ef96c81fe0"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2be726b77935a45f3d5ce3e8baf55fb8"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "5b1da532afc1509e409c6053378a20a3"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "fa1c26c0fc514aa22c398e923fd28ba6"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "636cb207241080246182a42935e20ba6"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6eaab88663ab140bfcb5a402a166cc12"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5d4a6188ba9ffe4d935fb8fe29e1a3cf"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "07f80050d09a6fa2ce2f85cda9b440b8"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "78a07eddbee3394e414b44072d6261a4"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1faeb6b644f5b209b4d705d73465fbd9"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "912c91c22ebad17f9e771c9bfd81890d"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "ee21593ae1e2738340bf07a3e2a5f086"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "65b7f348577e3a3da25fef4067386b9f"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c7f551114323af7b5c2266869b4c85ef"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "fd390325119ffbf3e64b08493d1f7a29"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "a7fe8ea88483103f1cb154adad35e476"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "88db0e0ffbfcc30ee1881414beddc147"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "bc37accf4f8424f1980706d4d72cc06c"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.32.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "018953b0932e4fa32ce227f4606b2bcf"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "9752ff9c4d2ddf845918cfb0bfd5faff"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "25a7879a7b9296000c5b18dafcf19b9c"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "dc7cf91ebb8b87f8d22f1f93ab174c0b"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.33.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "dd4376a46e978b28dfde6dbe6e713565"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "0ff9c331aaf562091652f04a0eac2cd9"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "52e807896b3888a33d3882de5c2028d3"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2fd4d329fd586344138dd0350bb52b78"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.34.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "246c3b98c46a21c358ef8b787c23c505"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "87d5da0bd6cee2da99f17e4762c4be3f"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "20c395885fd80b96a01d8681e680fcb4"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "bb24995e2a5ffe1383882d5af4cdfd91"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.35.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3e324dcfd38292bdec61fe1584944496"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7fbab8a1b213ff6aa179aeedb20b3678"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "0aae830b676bf2d925df1ed63749c928"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "646243ee0e76b1ddd557fb8f27ab99e3"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.36.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "bceb3e735a28c847730c21ea8b891cb5"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f3fd210b815272c2bb92958cb63011bc"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "aa5e36af555c0c60a1a368fef9117b76"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b014fa09adbb76e85f25ffb1b46e04db"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.37.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0582d0760b655e35d934a2cffde094fa"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "820b899385a420f0e39509941774d454"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "10d2dc9c6478458b59f60054a363a599"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3c1a68bc405ee2201a5732aebcbca44f"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "acee256e1953072816d1c741d64e1dd7"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "35df09cb7ddbdbedff32ae5997b901fe"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e35efdb5045467d18b2b05944fa6778e"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.38.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "41001c6dc4ac3f5effef66fa774c340b"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.39.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5c44d929b3c0c6dfc7ea09611575ebcc"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e260930affc69708f2d1b6b4f9d2eeeb"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "9918641ab660b66232faf7bab4707392"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d218902a7fd121f323d67185d6716511"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.40.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d887e1f37e2f3b277673e78990a0215c"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "2791b05140c4c37e1f085575b7b641bf"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6ed0ca567f823dd29d310740090f5ebd"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "bee7995ded99632d37113d0a067a793b"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.41.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6259840681453819fb8695e7b9934955"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "afa5daed816174d6571319862ffb172c"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e71e0abea9452662189d7615a8a2bb0e"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "169366e4241d2d330692db8533c4a05a"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.42.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "96a0c58c4de9a97283cce30539469608"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "56d078c57d3f9ece1e84053440a04d45"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7c4e35a1757f11eb5d4bbc6d65885c10"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "588c0008660b3f44568884582e70e298"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.43.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c96af2e433b270ae05266bab8a369dd1"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "dfaf243bdeadb028b210e0b2f7d352a3"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ce6d2408518a60c430a02a8cac541bd0"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b42d89de343997a9be6dde0ad48886ff"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.44.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d9bb502c230222137830935a00a86672"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "943539d605b665440e55bb7f518f08f5"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.44.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5635cdacba20d39009aea9744521322c"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "97589702de25c929b402cf9021a61731"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.45.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "188289d27fdec079f36c24bbeaa6287e"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f2356d135eab1b97a1999784c3bf7c01"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "95340b636baba41303919dfbc1a6eabc"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "78f612aa174b25dae2cb96022fa0e84c"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.46.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c1ed996ab0612650b5a0069862f0ba63"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d78392be6743b9556d16d1ae5d12f634"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d74d4948968229e940ce0779ad1e7672"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c9361e05a3b1789f756a0f1528bd3b79"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.47.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "a0e0f39350c818ecff360aeb09434d42"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "8663575f64fdb21bf3e9b68902e8d81f"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "50319ec57802d3b1ec00f2d4d82388d9"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3d90c4cee0803ad2da7d6a4eaf88eaba"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.48.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d0d43378648c346524f8c0f95d08b8b3"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.48.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c285c1b13094d27131d72c1a9026e038"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.48.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "79928a476f6ed0bae224d364120f373a"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.48.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "fb478e8f2f7ca40c8a3ad7c43a53a57f"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.49.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "aedbe67769b43fa5ace0cf9caebf15de"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.49.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e9e3028f3e2e1a639592962791d1e3b1"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.49.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4e0f9b0a2b31ce3e6ee6894845c35632"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.49.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "71e96a6c044c59bdc29422cd5d23cc0d"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.50.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "499207de95883514255f98836097126b"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.50.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "177528b456ee872403dcb2f503e66ecf"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.50.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "eca0b236caaf7c4cb5405b3e95a6f367"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.50.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e5fc1ff31f105cb71a414f9915025a94"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.51.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5fb3ca77b8ac7f1fd693e32b657b12f4"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.51.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "a80169cae28095e91192cf6a49aca432"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.51.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "89a39dc5f85080698d5fe7e282ff0e00"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.51.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "177dfccb6750916307a7865ada092592"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.52.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "64e735684c27e71c1e2d7bb2e8ee347b"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.52.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "ebf5d7a880b1e0a1e53e1dd2213a12d2"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.52.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a23ed7b33e9a387b84bf814f07cfd428"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.52.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ab7207ca6dbf5e90f3288527b06d0996"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.53.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "da1333293942255f0f94beaccbc8d37a"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.53.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d437b0cdec63d1ce2ab17da72ed80cec"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.53.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a0ac87111f16d3b4253c94e1171106d8"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.53.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "76a78553716928a1164e0d7eb4c10f0d"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.54.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "14a54ea4a344deb75c450f74e541d000"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.54.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4274f9994fd3232fa9301bf484ba7b0f"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.54.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a57e67d1a1daaa20b1e84f067e4244dd"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.54.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ade5c32b7048b9198149501cfa3c5fdd"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.55.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4bdebc607ebb67c6f232f99eb8f5b4d9"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.55.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "bebf519ea1b5b184df5efdb8cf98c2b2"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.55.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f56fe2a6de3e829a7edac9f4411293ee"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.55.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "da44a7a0bffc082a32a45d03874d4c7c"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.56.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "35b1c3538dfb8d2917352aadc90644b6"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.56.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "8cd0228326d0dde79848aa47db57c16d"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.56.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "988fc04a8395798c80733617b7b36fed"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.56.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4af75a89b793ad871ace19e846ac9123"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.57.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "ac6757e2e8d25f6b8bd8b1ca5deaa1a9"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.57.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "8b78d30a8ee0c738b3101768ed711171"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.57.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e0c01a97ae9c374d2aa1769c16f865c2"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.57.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d8f85ae54c84f49179ebbc861ae9ce20"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.58.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a5ce8a5ad7d7e1a16a6bd09ee080d47a"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.58.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7ea4e0acb194aabed6caef8f889ea673"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 2238464,
"records": [
{
"name": "model.layers.58.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 0
},
{
"name": "model.layers.58.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 10240
},
{
"name": "model.layers.59.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 20480
},
{
"name": "model.layers.59.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30720
},
{
"name": "model.layers.59.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 40960
},
{
"name": "model.layers.60.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 55296
},
{
"name": "model.layers.60.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 65536
},
{
"name": "model.layers.60.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 75776
},
{
"name": "model.layers.61.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 90112
},
{
"name": "model.layers.61.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 100352
},
{
"name": "model.layers.61.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 110592
},
{
"name": "model.layers.62.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 124928
},
{
"name": "model.layers.62.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 135168
},
{
"name": "model.layers.62.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 145408
},
{
"name": "model.layers.63.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 159744
},
{
"name": "model.layers.63.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 169984
},
{
"name": "model.layers.63.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 180224
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 194560
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 204800
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 215040
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 225280
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 239616
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 249856
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 260096
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 274432
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 284672
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 294912
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 309248
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 319488
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 329728
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 344064
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 354304
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 364544
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 378880
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 389120
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 399360
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 413696
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 423936
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 434176
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 448512
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 458752
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 468992
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 483328
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 497664
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 507904
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 518144
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 532480
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 542720
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 552960
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 567296
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 577536
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 587776
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 602112
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 612352
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 622592
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 636928
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 647168
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 657408
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 671744
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 681984
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 692224
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 706560
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 716800
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 727040
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 741376
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 751616
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 761856
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 776192
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 790528
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 800768
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 811008
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 821248
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 831488
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 845824
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 856064
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 866304
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 876544
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 886784
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 901120
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 911360
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 921600
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 935936
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 946176
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 956416
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 970752
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 980992
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 991232
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1005568
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1015808
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1026048
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1040384
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1050624
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1060864
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1075200
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1085440
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1095680
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1110016
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1120256
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1130496
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1144832
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1155072
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1165312
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1179648
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1193984
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1204224
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1214464
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1224704
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1234944
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1249280
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1259520
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1269760
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1284096
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1294336
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1304576
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1318912
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1329152
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1339392
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1353728
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1363968
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1374208
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1388544
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1398784
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1409024
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1423360
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1433600
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1443840
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1458176
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1468416
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1478656
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1492992
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1503232
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1513472
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1527808
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1542144
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1552384
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1562624
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1572864
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1583104
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1597440
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1607680
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1617920
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1632256
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1642496
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1652736
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1667072
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1677312
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1687552
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1701888
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1712128
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1722368
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1736704
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1746944
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1757184
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1771520
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1781760
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1792000
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1806336
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1816576
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1826816
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1841152
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1851392
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1861632
},
{
"name": "model.layers.48.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1875968
},
{
"name": "model.layers.48.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1890304
},
{
"name": "model.layers.48.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1900544
},
{
"name": "model.layers.49.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1910784
},
{
"name": "model.layers.49.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1921024
},
{
"name": "model.layers.49.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1931264
},
{
"name": "model.layers.50.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1945600
},
{
"name": "model.layers.50.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1955840
},
{
"name": "model.layers.50.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1966080
},
{
"name": "model.layers.51.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1980416
},
{
"name": "model.layers.51.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1990656
},
{
"name": "model.layers.51.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2000896
},
{
"name": "model.layers.52.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2015232
},
{
"name": "model.layers.52.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2025472
},
{
"name": "model.layers.52.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2035712
},
{
"name": "model.layers.53.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2050048
},
{
"name": "model.layers.53.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2060288
},
{
"name": "model.layers.53.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2070528
},
{
"name": "model.layers.54.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2084864
},
{
"name": "model.layers.54.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2095104
},
{
"name": "model.layers.54.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2105344
},
{
"name": "model.layers.55.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2119680
},
{
"name": "model.layers.55.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2129920
},
{
"name": "model.layers.55.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2140160
},
{
"name": "model.layers.56.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2154496
},
{
"name": "model.layers.56.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2164736
},
{
"name": "model.layers.56.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2174976
},
{
"name": "model.layers.57.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2189312
},
{
"name": "model.layers.57.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2199552
},
{
"name": "model.layers.57.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2209792
},
{
"name": "model.layers.58.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2224128
}
],
"md5sum": "3b8e829c0d6be42b592125838fc02486"
}
]
}