{ "metadata": { "ParamSize": 199, "ParamBytes": 15231233024.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "dcd913747b6d87ab207ef37ef9f7fee1" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ab39bbb3ea6166fe564609e3df0cc707" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8a493af534d34f9e54ebe63a29af883b" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "aca3377a7a59426d634692a81d69e966" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "1918cb231e4c0a78adea7f425f85975b" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "aae0866ebfd59372f3cdf0aa89181f0e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "9bdb83c432addf592c071ad7523f958e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "64ccfbeee7c0d533415db5ef0531a468" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "caa34b0c7b47e18474e236679195e046" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "c1378cc6240dac0d24314cc055797284" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "d60b929a803ff89b75d41dc21b2ef7fb" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "52e14bf60e71a9f5a0448db583c3a073" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "581eab76d13f24fd822e8538cb8a5fbf" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "117815d56d880f6c889eaa2a5f406121" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c9b3eac039a4e7062d69f34ad241d192" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "31455ca170428d1dbc606abab13e1618" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "9f46bf025c4734d737b62c6a96aa5c58" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "44047e7698cec8657a31d0a450303604" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "46236d06fefb37c94e18a390b6c24b89" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "52f52ec7815f508e7cc1a7d4b057eea1" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "0b2302cf822dedfa73a9c58c552405e5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "ab772922eb83bee95399519b0d70a1e2" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "234c8a053fe3279d53265a652a078c03" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "e0b0c5fe266e65cf6678146830761272" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "89048878c9db1eabc209b3ac7dfc0c74" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "63587e7a673a9cf1a609e4c50d41be97" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "5dacfbc0c17e33631d5cdf8d47c111b0" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "16d2072d203e11100afc32f2c7afcda5" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "9ae165695f55c0b78118639eb599f63a" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "bef013f07fe6a907cc6997f3e452459b" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "cbd2d1cdff66082b28d7836cf0dcf51e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "e221025395d53fc7db3edc5108a5e37f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "4ab4b6adf6728ac10fbfb46da56ce044" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "3e8a74316a7be5530ccccc42b69fbb37" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "6e8ad5b778b70a83ef596042aee327b7" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "254abd287d334208de44c1f355454f1d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "cd37904ee5237e6d21e43b22a7dbf8e2" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "94ef3721abe33da63c96d4628de93c88" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "167c28bd65048906e7f6250ce657380a" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "6dd1667e195073638ad5327a5782cba6" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4b63ee2064b0b1f3b76acc5db57c1272" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "b40704de954f59ec29564840b6a11169" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "2ec8663eeb85fac87e675da743a322cc" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "9dc76d400acdbd440d79fe38833cfd9f" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "de27ae42c1a253033d9b7c30bbfdbcd8" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "1a106a75074f2b4593541af8ef716664" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "c471b3cb9d5da399c9a965f3d26c53af" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "836998ceba602dafa9ee9c80c5cbc530" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f6e55ddf5bdd0f81833b64909799b8ab" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "393a2559bd5fa656d3093163cfd9fb8d" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "310cb2ad7ee85417d5c1ba4d43311038" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "ed81584bc3d46928218e42c64ddfa334" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "b8afb6fc392e59a230c8f449c2f9a4be" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "85d2dc8c147818f18c2c03ef51b691bb" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "fadc998cf7ecad1f3c0a269586e3d90a" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "aff73ae14a84b6313ef1b53f2288d2a6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "1cf5e4a0f5c7a0cb37795dc3d487c122" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "813de318208be4cb0721092a3d3e25d4" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "b76c8602800998e411d9fe83a1c1461a" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "6ed74d0730e0db45f97a1754bdcc2c7d" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "3ddf8961a04f5ebf7bc425494d116568" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "113438a6c39db1d434e11695190c1b6d" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "cdcc31107ca6829f203e92dd5addf5cc" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "20d1f08d485fcf28d253081e2a0f6c7f" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f66296d6524b1c81ff00ad70ebd8152c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "0baf85ae6423b5d5bb13ffee108dfe8f" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e8156f7aed4a1dc1148b01cc4eb0ed97" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "f1bcd77a02971fa623a94a9f67b40b4a" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "423409e9c2c994c97141bd21eabce4a5" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "fa5c8ebc03b43a3176e9e89f76f19df6" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "9654f98275ea5f798b9d690008f1a461" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c9a1d46fb82ed043fe8a905f5c28e1b3" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "daa9c2b305e8833bfe5dda77883d792d" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "20c1948a3adb04ed45d6eea4a3c507b3" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "7b6c5a70cf5006b13b1fcb93e2d6f1d9" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "08190962b20ea73a205f94d7a9fa966b" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "9ffef47a4f3e178207b7c742fe5fabac" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "900ec597f771b29362bc5ec140f9c65a" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "902bdb9cff38b6442129c92579ca0df4" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "74b62c8b11a5eab772136594e40ede27" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "8042e0afd1d2c038bad50168bf1f2cb0" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "0df2f4f0c17c4f20ca498b7e23840ec6" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "63581172bf34b03b5d0defe786d36999" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e87541271179409bbdd8316e40e1dcc1" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "35d2c2265a1d68c86352d6d6115be4c1" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4c7457fbdd7eb260fb5d35670398f48a" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "6e5d8e445aacc132be01fdab6ab3e7fa" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "7246a707d36e428ad0974211d84e9e44" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "2ed42e29dadf859990839ee0d5914623" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "97200233d4c8cbe6dc430f6316a5b5a7" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c2e8a379ce5dd2de73f965b57fa525c5" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "106d503a7d359b607411e77ba4b76b2e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "165f35bace1ecaa339de5d359b2bc1b7" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "519de89df300c7edeb1190b928d3c640" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c075173b850bbfb5ca61503bcfe8d6f0" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "b1b4e23ff0aa129456e5d87e2c081e84" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "087915547db9c86442eaeb53315c1553" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "eb4a810f39aecaf9d82533be60d6d382" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "aa012cae74ab0436925c5ba9eb87e296" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "1d326a4d5ce400ab3b65d9495bc93ba4" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c8a2220d1d0099ea446720fbac8e3f6c" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "00920b341df121cfcc54eaf7dedabf40" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "32113240d8aca15ec8ae39430c3f3f22" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "207e4e2b0ced9ceb4a35e6a75654a773" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "895cd9e1fa8ebdc43aef89745a422263" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f885957c113595fd7abf88d5202f1cde" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "fe4d7f48ad2e6a701f5e9085e8b0271f" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "d6af4ea9517130ccc5307ba6c33756c4" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "254637c53e5887f23ba66af6da8743c1" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "6dfc05d9494307bfed654b5e948025d8" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "147c574708867fe9e8dec55ca9466280" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "470b4a47f24467ec4471b43435063d60" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "bb771974e63d490906f710d3683fed64" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 26356736, "records": [ { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 7168 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 14336 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25704448 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25711616 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25718784 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25728000 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25735168 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25742336 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25751552 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25758720 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25765888 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25775104 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25782272 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25789440 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25798656 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25805824 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25812992 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25822208 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25829376 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25836544 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25845760 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25852928 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25860096 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25869312 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25876480 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25883648 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25892864 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25900032 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25907200 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25914368 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25923584 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25930752 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25937920 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25947136 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25954304 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25961472 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25970688 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25977856 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 25985024 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 25994240 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26001408 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26008576 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26017792 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26024960 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26032128 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26041344 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26048512 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26055680 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26064896 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26072064 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26079232 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26088448 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26095616 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26102784 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26112000 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26119168 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26126336 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26135552 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26142720 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26149888 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26159104 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26168320 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26175488 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26182656 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26191872 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26199040 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26206208 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26215424 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26222592 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26229760 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26238976 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26246144 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26253312 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26262528 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26269696 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26276864 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26286080 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26293248 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26300416 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26309632 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26316800 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26323968 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26333184 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 26340352 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 26347520 } ], "md5sum": "41f8301782ccec72fcfbc67780084399" } ] }