{ "metadata": { "ParamSize": 451, "ParamBytes": 65527752704.0, "BitsPerParam": 13.92728514370428 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1557135360, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1557135360, "byteOffset": 0 } ], "md5sum": "c11cf3478e75b671ce4c41390a4b0b7c" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.58.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "08382f845f3c178125be02e28cce5815" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "1991da3ddf42fb8cb8a7e25eea939308" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.59.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "eaf699c19614ed5b28bdfc3bcdfc59a0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "078d5a2669019619cf7ac7024b7929bd" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.59.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "86353f7ce0710142ccd2bef37c49e01e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.59.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "75b54e342b2eec6ca88023483a946945" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.60.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "7a7e7a72f451b9f5d5ce2793b240c629" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "f482bf2553c5f67f3cba12e6b7cab478" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.60.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "2ee68ef95fe7655ed0b52651a607d504" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.60.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "8b077e411b1dfaa8f79bc16ef3a17fb2" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.61.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "ed77c541c31ab8829d13f560ec5f4355" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "85ad0336a80031f1419dfd410c94f652" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.61.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d907d7e411e468bdf2dd9bb34c0f49df" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.61.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1856554a4c5e40ce53673f05e94ce1f0" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.62.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "192e46b225038c15aa11137a0233e4f6" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "55c5da594f982688dfa871ea04c08967" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.62.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a2e6142514ab2389b026ed5d2b173a5f" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.62.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "933e361673feda3e13b79ab391f97852" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.63.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "452af359f8f2e7c57cb733e0b6c0c4e3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "721d102f022c882e2bf4bb9bf3033600" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.63.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "459aa217d75dafb2144b141f562b61eb" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.63.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "440a42b547dc002eb3a8bcd72d575a74" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 1557135360, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1557135360, "byteOffset": 0 } ], "md5sum": "7e52ab5dfe8e6a239b4720eace189e98" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "ace91badb3d3e1e4266ed959d1fca6d7" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "0f938aca4f9bfed6ff4058bb2d0c9335" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "12929ba2a9ccc0cd538dad56e0a3f596" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "dc2aeb200d9470e87d7f6f97fe144b3e" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "3e3203142858a5f212ba3c6f7dad182d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "434d00a426c23b9eb524f147a9bf350e" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ceb34c47eefcce073c44854f5267f733" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6e9801985d11c36d2b6f87a25b4c4c61" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "a44584f15f7a2210d580631f08a4b0b7" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "3588fd1b9597394c853821e5284a4ae4" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8cafe41db7db28f3004a517bde1c4ef6" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1cdcc1dfe6301092599532f9a4454c46" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "9a6ebc4223b9196dc92f1b6d84681d95" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "af6a7733a525b4658718c5316f512e2f" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1b0e5ef048c353eedc80754cc00b669c" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "dd5497f813a11bd7446659ee343c5c58" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "0be3d921d8cd7c9d379adf939280b37c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "24840caccc52f2aab6077a1bed9d84c6" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f963228f4897d218ae9b4a501bba63d2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ad9fe2ee932f296fa83642b6af3940c0" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "90708c54f8d552917eabdafe41ae64cd" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "1ef39a88e3683c6c21a1fccc3e3c0efe" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "cdfec41c8f527a5d6f8e320be8653511" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6e70cdd636d8e3cb68aed158e6a4e5dc" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "67114310577007465a066cd8f8524b88" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "4529270bfaa535a30b7f41d99f58f10c" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f274853986038be46388516964f6a29c" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4774986aa3f2485bf14c00881b1a07c0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "9525f9ad897375b271276fe3b8f27fc2" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "0810a15124713d0a8198c1857006354b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3b0015f538d7f2e069614abae2cb3587" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6800a07f49887e3b0473ab877d0e8393" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "2c6d616a3ef37e0023605225b2dcc5a6" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4ec4e1a92a8b73b669475ef8525384dd" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "17472ccb7af0a4ab9cc0b87d78a56b6a" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "a9d153badd4c4fc756ec25ff03c80463" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "77315db4c38410ec75241ef0106b83bc" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3f40fca1c532bb4ef4ce29617388d3b4" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1ca5be6a5f55c5a87ccfa4d435899d06" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "a96970618a9ea4733bee834fd27a81c2" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "3c0dc95dea6f6f7760fd7256a95c234d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e8a8c59c19d2ed2ecb91611740690436" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "344be3d1fe17bc816f4e0dc9b349e8f1" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "0212b22ca7c4b1d36f6270b0bb7be03b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "8de04c1797aabaed325abccc6cfa9176" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "01d76ca02a55946d522171e853cccc78" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "12ba9c40c0d76ad4bf0f337822c2be57" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c9e6f2659ceae052b9a34dfb7d48515b" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "4cebdcff986138ec67b9df88fa332dd9" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a53ecb99cebed09207d71fd040508d03" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c4603556f2b5caa335c76d2dd3928588" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "91695f78b2f9db83b2cfd5260ee72ebd" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "60bf6c3219205a0b58439dbb11efd3d9" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0d48ef3eba0b702cb03f9caa9c2a4df2" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1f8e90978f80efef4d7bc9206183cacb" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "7cae550b3e33cfed275fa9a83c367c70" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "7bc3046c68a83dc4b43cf1d361ad1f3e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b77f5e900843b449278461a0da57a51a" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ae8d93aaa16d6567303335fe5444b589" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "ae0d84fc84c78d6235e5f51ba033eea8" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "e126e95b7354ee248424ab334a4595a9" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d4fda2fce1a8e0120ce613d4d6c5a45a" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "14e202f0f72d5c67f6fe338a1ddbc356" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "92f32b0b5f17e56f074f11cfe7e1ae3b" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "b0625c80eeb0ca915b80106d0604e1c5" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a1bc3275fa3ad4a199e998cf457d8029" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "475330ea0f3a0d42f462d2b2ff2c0696" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "056ed00ec6850da568925efde53ffadb" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "906ae0628d713939a32222aa48976ab2" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "5efd524b3d4a277dd124be9678f94e2f" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "407352f0c790ffe7158bf962f2c861de" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "20a3ad27e050c04d36c8a4cce4f77aba" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "4b724cc8354dbc4a357ba8528725c77f" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "53e7fe85adc3cf2a9653659e3595dfb1" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "af445d043881e3dfc8e11449f843198a" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "95667c4db0eb3e7fe613e2f31817cfbf" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "590db25395696f506482b182c2ab0f60" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "b7dd4948e858d6fad56d9c981ed22adb" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "86b918ecf2fafbf5a9e0db700f00af6d" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b66f6698ebee824a30983f7d892c0c65" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "08daa0954c34852a19aaa097462d74be" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "0f8c9da832c7386f59b9b6e82fddde8d" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "437198888a20303cc75635c6893f5473" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b5be96ee728cabf8c1e65adee4b468b0" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "d19f831287d919e1f4214f6ddbca6bc8" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "3d931cead84ac8b60ad435d75cab4e4c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e93e96f51c5f8eaad4e4eecd5303d7ea" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d83dc9e1e1c7a129e41739990cb38389" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "424fe40438e009e5a20dc4b1ad96f772" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "08d97d5daabb4eacab96e51441ce945e" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "91ce9c9d7a3a221c81d2ef82595298d7" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "942e77205affd0e30b9429a88c1518a2" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "a09db7c3b5e6fb95255b045ef067fe61" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "6e7c42f01ff92660a13318a6a3785b07" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ed08e7ef2d7bf43bf13f39f083b3527d" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0318886b973422467f8ca511bcc9a668" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "d813e3c67148d223d17b81960416b88b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "10d0a346373d758d217f64cdc2adaece" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4e245cd2e1968ffd2ca9bc229a6ca055" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1033a56d50496d8adf14ecac2db7758d" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "eb56b0aa69756e3882665717cda66219" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "93490233219d2f5e7a7cad76926127e7" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f9b00317a840bcf6828d29a58732d17f" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "5d225df46ede9fe84e5caae1d8875837" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "3078c5de9d190052c630633cc383f4e6" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "2c4a91bb75d35d793eb4199939ca98f1" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8fc29644343cc88f59ac2572e63ac594" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c2d67f7fbd78c58828a3e07d0d57aa86" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "15e43141672697e610aea6fe7ce7f3b5" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "a9c76bb50b75080ba00b4aa98cdcff8e" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "11b8fa0417841b860933a4ef96c81fe0" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2be726b77935a45f3d5ce3e8baf55fb8" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "5b1da532afc1509e409c6053378a20a3" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.28.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "fa1c26c0fc514aa22c398e923fd28ba6" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "636cb207241080246182a42935e20ba6" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "6eaab88663ab140bfcb5a402a166cc12" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "5d4a6188ba9ffe4d935fb8fe29e1a3cf" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "07f80050d09a6fa2ce2f85cda9b440b8" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.29.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "78a07eddbee3394e414b44072d6261a4" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1faeb6b644f5b209b4d705d73465fbd9" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "912c91c22ebad17f9e771c9bfd81890d" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "ee21593ae1e2738340bf07a3e2a5f086" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.30.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "65b7f348577e3a3da25fef4067386b9f" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c7f551114323af7b5c2266869b4c85ef" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "fd390325119ffbf3e64b08493d1f7a29" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "a7fe8ea88483103f1cb154adad35e476" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.31.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "88db0e0ffbfcc30ee1881414beddc147" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bc37accf4f8424f1980706d4d72cc06c" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "018953b0932e4fa32ce227f4606b2bcf" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "9752ff9c4d2ddf845918cfb0bfd5faff" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.32.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "25a7879a7b9296000c5b18dafcf19b9c" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "dc7cf91ebb8b87f8d22f1f93ab174c0b" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "dd4376a46e978b28dfde6dbe6e713565" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "0ff9c331aaf562091652f04a0eac2cd9" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.33.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "52e807896b3888a33d3882de5c2028d3" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2fd4d329fd586344138dd0350bb52b78" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "246c3b98c46a21c358ef8b787c23c505" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "87d5da0bd6cee2da99f17e4762c4be3f" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.34.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "20c395885fd80b96a01d8681e680fcb4" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bb24995e2a5ffe1383882d5af4cdfd91" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "3e324dcfd38292bdec61fe1584944496" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "7fbab8a1b213ff6aa179aeedb20b3678" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.35.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0aae830b676bf2d925df1ed63749c928" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "646243ee0e76b1ddd557fb8f27ab99e3" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "bceb3e735a28c847730c21ea8b891cb5" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "f3fd210b815272c2bb92958cb63011bc" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.36.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "aa5e36af555c0c60a1a368fef9117b76" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b014fa09adbb76e85f25ffb1b46e04db" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "0582d0760b655e35d934a2cffde094fa" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "820b899385a420f0e39509941774d454" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.37.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "10d2dc9c6478458b59f60054a363a599" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3c1a68bc405ee2201a5732aebcbca44f" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "acee256e1953072816d1c741d64e1dd7" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.38.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "35df09cb7ddbdbedff32ae5997b901fe" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e35efdb5045467d18b2b05944fa6778e" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "41001c6dc4ac3f5effef66fa774c340b" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "5c44d929b3c0c6dfc7ea09611575ebcc" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "e260930affc69708f2d1b6b4f9d2eeeb" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.39.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "9918641ab660b66232faf7bab4707392" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d218902a7fd121f323d67185d6716511" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.40.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "d887e1f37e2f3b277673e78990a0215c" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "2791b05140c4c37e1f085575b7b641bf" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.40.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6ed0ca567f823dd29d310740090f5ebd" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bee7995ded99632d37113d0a067a793b" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.41.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "6259840681453819fb8695e7b9934955" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "afa5daed816174d6571319862ffb172c" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.41.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e71e0abea9452662189d7615a8a2bb0e" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "169366e4241d2d330692db8533c4a05a" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.42.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "96a0c58c4de9a97283cce30539469608" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "56d078c57d3f9ece1e84053440a04d45" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.42.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7c4e35a1757f11eb5d4bbc6d65885c10" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "588c0008660b3f44568884582e70e298" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.43.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c96af2e433b270ae05266bab8a369dd1" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "dfaf243bdeadb028b210e0b2f7d352a3" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.43.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ce6d2408518a60c430a02a8cac541bd0" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b42d89de343997a9be6dde0ad48886ff" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.44.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "d9bb502c230222137830935a00a86672" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "943539d605b665440e55bb7f518f08f5" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.44.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5635cdacba20d39009aea9744521322c" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "97589702de25c929b402cf9021a61731" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.45.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "188289d27fdec079f36c24bbeaa6287e" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "f2356d135eab1b97a1999784c3bf7c01" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.45.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "95340b636baba41303919dfbc1a6eabc" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "78f612aa174b25dae2cb96022fa0e84c" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.46.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c1ed996ab0612650b5a0069862f0ba63" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "d78392be6743b9556d16d1ae5d12f634" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.46.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d74d4948968229e940ce0779ad1e7672" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.46.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c9361e05a3b1789f756a0f1528bd3b79" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.47.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "a0e0f39350c818ecff360aeb09434d42" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "8663575f64fdb21bf3e9b68902e8d81f" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.47.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "50319ec57802d3b1ec00f2d4d82388d9" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.47.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3d90c4cee0803ad2da7d6a4eaf88eaba" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "d0d43378648c346524f8c0f95d08b8b3" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.48.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c285c1b13094d27131d72c1a9026e038" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.48.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "79928a476f6ed0bae224d364120f373a" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.48.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "fb478e8f2f7ca40c8a3ad7c43a53a57f" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.49.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "aedbe67769b43fa5ace0cf9caebf15de" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "e9e3028f3e2e1a639592962791d1e3b1" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.49.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4e0f9b0a2b31ce3e6ee6894845c35632" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.49.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "71e96a6c044c59bdc29422cd5d23cc0d" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.50.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "499207de95883514255f98836097126b" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "177528b456ee872403dcb2f503e66ecf" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.50.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "eca0b236caaf7c4cb5405b3e95a6f367" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.50.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e5fc1ff31f105cb71a414f9915025a94" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.51.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "5fb3ca77b8ac7f1fd693e32b657b12f4" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "a80169cae28095e91192cf6a49aca432" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.51.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "89a39dc5f85080698d5fe7e282ff0e00" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.51.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "177dfccb6750916307a7865ada092592" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.52.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "64e735684c27e71c1e2d7bb2e8ee347b" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "ebf5d7a880b1e0a1e53e1dd2213a12d2" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.52.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a23ed7b33e9a387b84bf814f07cfd428" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.52.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ab7207ca6dbf5e90f3288527b06d0996" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.53.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "da1333293942255f0f94beaccbc8d37a" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "d437b0cdec63d1ce2ab17da72ed80cec" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.53.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a0ac87111f16d3b4253c94e1171106d8" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.53.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "76a78553716928a1164e0d7eb4c10f0d" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.54.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "14a54ea4a344deb75c450f74e541d000" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "4274f9994fd3232fa9301bf484ba7b0f" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.54.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a57e67d1a1daaa20b1e84f067e4244dd" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.54.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ade5c32b7048b9198149501cfa3c5fdd" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.55.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "4bdebc607ebb67c6f232f99eb8f5b4d9" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "bebf519ea1b5b184df5efdb8cf98c2b2" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.55.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f56fe2a6de3e829a7edac9f4411293ee" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.55.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "da44a7a0bffc082a32a45d03874d4c7c" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.56.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "35b1c3538dfb8d2917352aadc90644b6" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "8cd0228326d0dde79848aa47db57c16d" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.56.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "988fc04a8395798c80733617b7b36fed" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.56.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4af75a89b793ad871ace19e846ac9123" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.57.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "ac6757e2e8d25f6b8bd8b1ca5deaa1a9" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "8b78d30a8ee0c738b3101768ed711171" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.57.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e0c01a97ae9c374d2aa1769c16f865c2" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.57.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d8f85ae54c84f49179ebbc861ae9ce20" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.58.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a5ce8a5ad7d7e1a16a6bd09ee080d47a" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.58.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7ea4e0acb194aabed6caef8f889ea673" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 2238464, "records": [ { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 10240 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20480 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30720 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 40960 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 55296 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 65536 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 75776 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 90112 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 100352 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 110592 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 124928 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 135168 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 145408 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 159744 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 169984 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 180224 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 194560 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 204800 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 215040 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 225280 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 239616 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 249856 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 260096 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 274432 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 284672 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 294912 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 309248 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 319488 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 329728 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 344064 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 354304 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 364544 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 378880 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 389120 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 399360 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 413696 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 423936 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 434176 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 448512 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 458752 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 468992 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 483328 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 497664 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 507904 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 518144 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 532480 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 542720 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 552960 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 567296 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 577536 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 587776 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 602112 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 612352 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 622592 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 636928 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 647168 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 657408 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 671744 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 681984 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 692224 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 706560 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 716800 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 727040 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 741376 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 751616 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 761856 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 776192 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 790528 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 800768 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 811008 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 821248 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 831488 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 845824 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 856064 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 866304 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 876544 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 886784 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 901120 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 911360 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 921600 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 935936 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 946176 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 956416 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 970752 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 980992 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 991232 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1005568 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1015808 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1026048 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1040384 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1050624 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1060864 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1075200 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1085440 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1095680 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1110016 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1120256 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1130496 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1144832 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1155072 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1165312 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1179648 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1193984 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1204224 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1214464 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1224704 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1234944 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1249280 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1259520 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1269760 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1284096 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1294336 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1304576 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1318912 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1329152 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1339392 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1353728 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1363968 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1374208 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1388544 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1398784 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1409024 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1423360 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1433600 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1443840 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1458176 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1468416 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1478656 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1492992 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1503232 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1513472 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1527808 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1542144 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1552384 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1562624 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1572864 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1583104 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1597440 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1607680 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1617920 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1632256 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1642496 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1652736 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1667072 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1677312 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1687552 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1701888 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1712128 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1722368 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1736704 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1746944 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1757184 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1771520 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1781760 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1792000 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1806336 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1816576 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1826816 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1841152 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1851392 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1861632 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1875968 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1890304 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1900544 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1910784 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1921024 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1931264 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1945600 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1955840 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1966080 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1980416 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1990656 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2000896 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2015232 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2025472 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2035712 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2050048 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2060288 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2070528 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2084864 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2095104 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2105344 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2119680 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2129920 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2140160 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2154496 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2164736 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2174976 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2189312 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2199552 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2209792 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2224128 } ], "md5sum": "3b8e829c0d6be42b592125838fc02486" } ] }