| { | |
| "metadata": { | |
| "ParamSize": 313, | |
| "ParamBytes": 3439746048.0, | |
| "BitsPerParam": 3.6133605651807272 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 218972160, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 152064, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 218972160, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "20d6e7fe62912f2bc757d82da7887891" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27380736, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 152064, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27371520, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 27371520 | |
| } | |
| ], | |
| "md5sum": "e6cba5ff5b1343a6fe07c4cfb751cc2f" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f2926ccf8c5e7338600dec9371ba2aa9" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0d1d3def2c4e744cd5f0652b2a4bdbd5" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30977024, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 6635520 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 7464960 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 20090880 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23488512 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23495680 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 23502848 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 30147584 | |
| } | |
| ], | |
| "md5sum": "2622196c8679a9c889ef39e7551aa42d" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ebe2a67574ab4abc8618f0cc2ad1afc0" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bb6b746127c9b2772fa31339df320ecd" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4a15acd5f3e571b1e8ee0d1d6f12846e" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29318144, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 5160960 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 5806080 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16023552 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16030720 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 16037888 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 22682624 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 28673024 | |
| } | |
| ], | |
| "md5sum": "4e419f7f37788fa85b3702d7d07b0445" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f148a0dbc163e64e6cba8bef9ab72f9c" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b74d8e5320e8c3bc67ae21a7856eb7d6" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "80a57644a987228b4490f12f86a02b92" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30331904, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 6819840 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10217472 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10224640 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 10231808 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 10241024 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 16876544 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 17705984 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 22866944 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 23512064 | |
| } | |
| ], | |
| "md5sum": "3f0a8be2a2eb1548724df7fc3868c084" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d99f052a6d1a9d1ed4f0d380a0eae45b" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9e945905e1ebf7dac6b6ba251cf86405" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26933248, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3397632 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3404800 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3411968 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 3421184 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 10056704 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 10886144 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 16692224 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26909696 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26916864 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26924032 | |
| } | |
| ], | |
| "md5sum": "0986ff19b355b7dbb816ad9a45d1c191" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6322af817165065a9e711ae60620b13d" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "639941d5f0dffaa22babb2fb928056c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30977024, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 6635520 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 7464960 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 20090880 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23488512 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23495680 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 23502848 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 30147584 | |
| } | |
| ], | |
| "md5sum": "f658c701a1d57b5470d8a8e37dbd371a" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bd323da8420292d570e0c35db1f7a129" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0e879e6956104fe87bd064fccf628f33" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eaa36148e4dbdce32a7cfad461d93a04" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29318144, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 5160960 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 5806080 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16023552 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16030720 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 16037888 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 22682624 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 28673024 | |
| } | |
| ], | |
| "md5sum": "7b4d61f77f7fb2b0888198fd575b7d47" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6c86642a7d76054003e968b9ae6f1af4" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f5cff6dac4e2c910931a79021cfac7ae" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "83b2e3f1b96e90f6abe0491e80d911bf" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30331904, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 6819840 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10217472 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10224640 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 10231808 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 10241024 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 16876544 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 17705984 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 22866944 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 23512064 | |
| } | |
| ], | |
| "md5sum": "7a8096e98aeaf22ebdd95334f1c7b17a" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ef99ec929ccef0f59fad840799408b74" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "48deb39c530029a8484d624048c4dbac" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26933248, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3397632 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3404800 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3411968 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 3421184 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 10056704 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 10886144 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 16692224 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26909696 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26916864 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26924032 | |
| } | |
| ], | |
| "md5sum": "89155a9ccb6d5e34bad8fe64f2c5db4d" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a64b8e0dbc8412b07945c49ddeaba3c7" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d82c304c5b7a2cab8cc1a2b69761f49c" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30977024, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 6635520 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 7464960 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 20090880 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23488512 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23495680 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 23502848 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 30147584 | |
| } | |
| ], | |
| "md5sum": "e324abc8943911bb5d3c5a05d01d74da" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "94d4b972ee22c8c606cb67ccbba23623" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "343e8459c5cfcadd7bb353bf5d66e6e8" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "45e935aa5151d5e8a91f0d9cc149b6ee" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29318144, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 5160960 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 5806080 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16023552 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16030720 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 16037888 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 22682624 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 28673024 | |
| } | |
| ], | |
| "md5sum": "4cdc65a0ec2fc5bf5e4efad34646fd82" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8dc61cb07e676a2eece4d84577a5bfc7" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5910d3c9317909afb8f0ad9f0732231c" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "006efd95e341c3af634419bba61d3104" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30331904, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 6819840 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10217472 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10224640 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 10231808 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 10241024 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 16876544 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 17705984 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 22866944 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 23512064 | |
| } | |
| ], | |
| "md5sum": "b03eb35ebdf3e82ef5f27a709234ee70" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b2fbf638c6f0eafb3e93ae9aa069b802" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cd8dd1a3c24d2e88509e7898815337fb" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26933248, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3397632 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3404800 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3411968 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 3421184 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 10056704 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 10886144 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 16692224 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26909696 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26916864 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26924032 | |
| } | |
| ], | |
| "md5sum": "3c5026f4348104082684e4d96697e0ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e2e9144edf576ef4a66fba1720ed827b" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2c8f48ac276931fab0820c048ece56c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30977024, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 6635520 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 7464960 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 20090880 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23488512 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23495680 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 23502848 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 30147584 | |
| } | |
| ], | |
| "md5sum": "80e3946938eeaf237feaa20572a620f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "73f979927f1e3f6d73a3e8f77290ae38" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d90b362217f96e6573c2c8648f541254" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e7f23567198607ff3762dc6c4732d25e" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29318144, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 5160960 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 5806080 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16023552 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16030720 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 16037888 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 22682624 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 28673024 | |
| } | |
| ], | |
| "md5sum": "c08873dd448535efa6b624de595d7136" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bc05a2e2f87c4693291564cc4b038b3e" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b4238925aa2621415222f57f99f013ee" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "50c11c2bfea03db8a15921c732a85b6f" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30331904, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 6819840 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10217472 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10224640 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 10231808 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 10241024 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 16876544 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 17705984 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 22866944 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 23512064 | |
| } | |
| ], | |
| "md5sum": "9a5c460913d8ccb7f7cb3deb4faae368" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c287af0f4249ba4b57720a6e31efca6e" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2bf1f5d9201d5874b31d5c2151a6ce49" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26933248, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3397632 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3404800 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3411968 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 3421184 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 10056704 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 10886144 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 16692224 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26909696 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26916864 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26924032 | |
| } | |
| ], | |
| "md5sum": "8a6ce3a377c2e681d543219223eb3df4" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bf0117af395e2d0ecb5173e02e768ff2" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2667a9ee9de3d3103785d876f4d01039" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30977024, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 6635520 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 7464960 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 20090880 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23488512 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23495680 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 23502848 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 30147584 | |
| } | |
| ], | |
| "md5sum": "86554654054d9b0ff9434c9d91b9ac6e" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2f8368d394f2def59f99491fc910eed7" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5b215ababea2518d5bdac53c51cc8cee" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9c45db8cf61728bf61f728db0471d2c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29318144, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 5160960 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 5806080 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16023552 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16030720 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 16037888 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 22682624 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 28673024 | |
| } | |
| ], | |
| "md5sum": "692c517dbee27e059c0f2020cf65c49c" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a4748c436bdd8372cdd48382a727b5d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ba51d509ebceb6d72005d7b75f5c12b2" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "084d4ca76277deb82033f1c8bd4f758e" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30331904, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 6819840 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10217472 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10224640 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 10231808 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 10241024 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 16876544 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 17705984 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 22866944 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 23512064 | |
| } | |
| ], | |
| "md5sum": "52bbdf6350920cd517ab13c37adce1b3" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4626961ea87e6f1d37ac3d76c6e0bd54" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "34af059876eeb87324868adfeb53518d" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26933248, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3397632 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3404800 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3411968 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 3421184 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 10056704 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 10886144 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 16692224 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26909696 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 26916864 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 26924032 | |
| } | |
| ], | |
| "md5sum": "ca0c409cd46eb83375daccff30ddc446" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "260f4915c6bebe3f3d35ce71b38e7d5a" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5f870a91a33e316d22438627f90576bf" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30977024, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 6635520 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 7464960 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 20090880 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23488512 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 23495680 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 23502848 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 30147584 | |
| } | |
| ], | |
| "md5sum": "167ab58ffb87cff97c407de90818679f" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4f887e990bc71f6f8a4e0decebfc7594" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e7b9bc135e68e5dc8a589ff764415fff" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 54558720, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 37888, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 54558720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a1a30758cc45630c0d93d51f3f818d4d" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29318144, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 5160960 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 5806080 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 12625920 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16023552 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 16030720 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 16037888 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 4608, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6635520, | |
| "byteOffset": 16047104 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 4608, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 829440, | |
| "byteOffset": 22682624 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5160960, | |
| "byteOffset": 23512064 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 645120, | |
| "byteOffset": 28673024 | |
| } | |
| ], | |
| "md5sum": "736abd9b5c719582e80835ee09a8ccbe" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27181056, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 3584, | |
| 1896 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27181056, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8d130ae08af50136b86c6646b152ca0b" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 218972160, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 152064, | |
| 360 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 218972160, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f29c6f6cb47b0e1bdcc147a97cc45c58" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27371520, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 152064, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 27371520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f0695bd8440e484d82e1c828c8e1e229" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 10238976, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 37888, | |
| 90 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6819840, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 3584, | |
| 474 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3397632, | |
| "byteOffset": 6819840 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10217472 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10224640 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 10231808 | |
| } | |
| ], | |
| "md5sum": "c834978673c0fca7555d7eb588d620b7" | |
| } | |
| ] | |
| } |