| { | |
| "metadata": { | |
| "ParamSize": 199, | |
| "ParamBytes": 15231233024.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1089994752, | |
| "records": [ | |
| { | |
| "name": "lm_head.weight", | |
| "shape": [ | |
| 152064, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1089994752, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dcd913747b6d87ab207ef37ef9f7fee1" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ab39bbb3ea6166fe564609e3df0cc707" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8a493af534d34f9e54ebe63a29af883b" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aca3377a7a59426d634692a81d69e966" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1918cb231e4c0a78adea7f425f85975b" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aae0866ebfd59372f3cdf0aa89181f0e" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9bdb83c432addf592c071ad7523f958e" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "64ccfbeee7c0d533415db5ef0531a468" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "caa34b0c7b47e18474e236679195e046" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c1378cc6240dac0d24314cc055797284" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d60b929a803ff89b75d41dc21b2ef7fb" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52e14bf60e71a9f5a0448db583c3a073" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "581eab76d13f24fd822e8538cb8a5fbf" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "117815d56d880f6c889eaa2a5f406121" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c9b3eac039a4e7062d69f34ad241d192" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "31455ca170428d1dbc606abab13e1618" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9f46bf025c4734d737b62c6a96aa5c58" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "44047e7698cec8657a31d0a450303604" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "46236d06fefb37c94e18a390b6c24b89" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52f52ec7815f508e7cc1a7d4b057eea1" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0b2302cf822dedfa73a9c58c552405e5" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ab772922eb83bee95399519b0d70a1e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "234c8a053fe3279d53265a652a078c03" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0b0c5fe266e65cf6678146830761272" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "89048878c9db1eabc209b3ac7dfc0c74" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "63587e7a673a9cf1a609e4c50d41be97" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5dacfbc0c17e33631d5cdf8d47c111b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "16d2072d203e11100afc32f2c7afcda5" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9ae165695f55c0b78118639eb599f63a" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bef013f07fe6a907cc6997f3e452459b" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cbd2d1cdff66082b28d7836cf0dcf51e" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e221025395d53fc7db3edc5108a5e37f" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4ab4b6adf6728ac10fbfb46da56ce044" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3e8a74316a7be5530ccccc42b69fbb37" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6e8ad5b778b70a83ef596042aee327b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1089994752, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.weight", | |
| "shape": [ | |
| 152064, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1089994752, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "254abd287d334208de44c1f355454f1d" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cd37904ee5237e6d21e43b22a7dbf8e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "94ef3721abe33da63c96d4628de93c88" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "167c28bd65048906e7f6250ce657380a" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6dd1667e195073638ad5327a5782cba6" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4b63ee2064b0b1f3b76acc5db57c1272" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b40704de954f59ec29564840b6a11169" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ec8663eeb85fac87e675da743a322cc" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9dc76d400acdbd440d79fe38833cfd9f" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "de27ae42c1a253033d9b7c30bbfdbcd8" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1a106a75074f2b4593541af8ef716664" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c471b3cb9d5da399c9a965f3d26c53af" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "836998ceba602dafa9ee9c80c5cbc530" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f6e55ddf5bdd0f81833b64909799b8ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "393a2559bd5fa656d3093163cfd9fb8d" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "310cb2ad7ee85417d5c1ba4d43311038" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ed81584bc3d46928218e42c64ddfa334" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8afb6fc392e59a230c8f449c2f9a4be" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "85d2dc8c147818f18c2c03ef51b691bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fadc998cf7ecad1f3c0a269586e3d90a" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aff73ae14a84b6313ef1b53f2288d2a6" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1cf5e4a0f5c7a0cb37795dc3d487c122" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "813de318208be4cb0721092a3d3e25d4" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b76c8602800998e411d9fe83a1c1461a" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6ed74d0730e0db45f97a1754bdcc2c7d" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3ddf8961a04f5ebf7bc425494d116568" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "113438a6c39db1d434e11695190c1b6d" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cdcc31107ca6829f203e92dd5addf5cc" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "20d1f08d485fcf28d253081e2a0f6c7f" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f66296d6524b1c81ff00ad70ebd8152c" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0baf85ae6423b5d5bb13ffee108dfe8f" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e8156f7aed4a1dc1148b01cc4eb0ed97" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f1bcd77a02971fa623a94a9f67b40b4a" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "423409e9c2c994c97141bd21eabce4a5" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fa5c8ebc03b43a3176e9e89f76f19df6" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9654f98275ea5f798b9d690008f1a461" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c9a1d46fb82ed043fe8a905f5c28e1b3" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "daa9c2b305e8833bfe5dda77883d792d" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "20c1948a3adb04ed45d6eea4a3c507b3" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7b6c5a70cf5006b13b1fcb93e2d6f1d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "08190962b20ea73a205f94d7a9fa966b" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9ffef47a4f3e178207b7c742fe5fabac" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "900ec597f771b29362bc5ec140f9c65a" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "902bdb9cff38b6442129c92579ca0df4" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "74b62c8b11a5eab772136594e40ede27" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8042e0afd1d2c038bad50168bf1f2cb0" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0df2f4f0c17c4f20ca498b7e23840ec6" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "63581172bf34b03b5d0defe786d36999" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e87541271179409bbdd8316e40e1dcc1" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "35d2c2265a1d68c86352d6d6115be4c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4c7457fbdd7eb260fb5d35670398f48a" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6e5d8e445aacc132be01fdab6ab3e7fa" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7246a707d36e428ad0974211d84e9e44" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ed42e29dadf859990839ee0d5914623" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "97200233d4c8cbe6dc430f6316a5b5a7" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2e8a379ce5dd2de73f965b57fa525c5" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "106d503a7d359b607411e77ba4b76b2e" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "165f35bace1ecaa339de5d359b2bc1b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "519de89df300c7edeb1190b928d3c640" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c075173b850bbfb5ca61503bcfe8d6f0" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b1b4e23ff0aa129456e5d87e2c081e84" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "087915547db9c86442eaeb53315c1553" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eb4a810f39aecaf9d82533be60d6d382" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aa012cae74ab0436925c5ba9eb87e296" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1d326a4d5ce400ab3b65d9495bc93ba4" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8a2220d1d0099ea446720fbac8e3f6c" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "00920b341df121cfcc54eaf7dedabf40" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "32113240d8aca15ec8ae39430c3f3f22" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "207e4e2b0ced9ceb4a35e6a75654a773" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "895cd9e1fa8ebdc43aef89745a422263" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f885957c113595fd7abf88d5202f1cde" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fe4d7f48ad2e6a701f5e9085e8b0271f" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d6af4ea9517130ccc5307ba6c33756c4" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "254637c53e5887f23ba66af6da8743c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 135790592, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 18944 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 135790592, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6dfc05d9494307bfed654b5e948025d8" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 271581184, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 37888, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 271581184, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "147c574708867fe9e8dec55ca9466280" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.weight", | |
| "shape": [ | |
| 4608, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33030144, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "470b4a47f24467ec4471b43435063d60" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25690112, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bb771974e63d490906f710d3683fed64" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26356736, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 7168 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.weight", | |
| "shape": [ | |
| 3584, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25690112, | |
| "byteOffset": 14336 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25704448 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25711616 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25718784 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25728000 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25735168 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25742336 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25751552 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25758720 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25765888 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25775104 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25782272 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25789440 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25798656 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25805824 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25812992 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25822208 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25829376 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25836544 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25845760 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25852928 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25860096 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25869312 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25876480 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25883648 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25892864 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25900032 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25907200 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25914368 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25923584 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25930752 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25937920 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25947136 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25954304 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25961472 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25970688 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25977856 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 25994240 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26001408 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26008576 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26017792 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26024960 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26032128 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26041344 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26048512 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26055680 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26064896 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26072064 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26079232 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26088448 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26095616 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26102784 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26119168 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26126336 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26135552 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26142720 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26149888 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26159104 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26168320 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26175488 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26182656 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26191872 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26199040 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26206208 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26215424 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26222592 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26229760 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26238976 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26246144 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26253312 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26262528 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26269696 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26276864 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26286080 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26293248 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26300416 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26309632 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26316800 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26323968 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26333184 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26340352 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26347520 | |
| } | |
| ], | |
| "md5sum": "41f8301782ccec72fcfbc67780084399" | |
| } | |
| ] | |
| } |