| { | |
| "metadata": { | |
| "ParamSize": 313, | |
| "ParamBytes": 3927297024.0, | |
| "BitsPerParam": 4.12551973205239 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 272498688, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 448, | |
| 152064 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272498688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e937497aae5865d20c5f1467c151988d" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 272498688, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 152064, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272498688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f7c32a9c15551c9f594e3ab9a79e59d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "35000756a943bfe656c8bb2570c91fa4" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "09902969dd2a63b1b763c1d2885c3854" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28752896, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 28, | |
| 152064 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8515584, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 152064, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8515584, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 17031168 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 17038336 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 18099200 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 20220928 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 20228096 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 20237312 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 28494848 | |
| } | |
| ], | |
| "md5sum": "b5a69167fbb271617cc06331cb077a84" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aea6866db00119ed8105d8671930ce87" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b37027a661de9b355862041f4f370a6a" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6c670fde6e6ea26d417c7edd4be83080" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f9a7e983d09396ca324795d2075ec32c" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28174336, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 9829376 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 18086912 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 24968192 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 24975360 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 26036224 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28157952 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 28165120 | |
| } | |
| ], | |
| "md5sum": "8e7a736c33ebdd92cab0c66c964da43c" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ae2d453bf409b64995cc5d15576813ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "80657f3cf97e01deea0b3293e83aad95" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dd4ec0a59589c67296747a4f0d27423f" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "da651f30ac0c8c67699a824eca94d89a" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52d16c27ed17de1c95570cac1f676ef5" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "86a7c6bd43c46ddc6dcd7ff19f1ae1a5" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0b432d83ca69f4f58d02e4e20b7959ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30059520, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 19405824 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 21527552 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 21534720 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 21543936 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 29801472 | |
| } | |
| ], | |
| "md5sum": "88ac561c656d51350ea8be8f6d699ac2" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e6012055ebcc108d09ae8b503e784212" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb06d726dba70ce49ace7a76cf418fd6" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d824bac4ad3d48e204dbde25c608c91d" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f955cbdcac4acf8e556e0e46a91f940f" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28174336, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 9829376 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 18086912 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 24968192 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 24975360 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 26036224 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28157952 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 28165120 | |
| } | |
| ], | |
| "md5sum": "78ebb328883e299b06868a5cc70eb08e" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df437f04f5c9bdbf100c9eeb966eacc2" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "edeb5d06e5085867235d5272af3f53e3" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33492992, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 15148032 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 23405568 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 23663616 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 30086144 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 30286848 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 30294016 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 31354880 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33476608 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "be8c707520572c78a191e998cb238c1b" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "35a3c5b49a23fa905e1649f3759e72a7" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bff5a6334c34cef272eeb66b1c621abb" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "59e7bc4f55acbcc50598490bb1ea3113" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "0d589e655f81ea29dc62a6955bc85134" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "207d539469036c90ad449b0cc0eb6a13" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e455481d4bf4f5d7ae05de996f2b08ea" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e5970039d2dfb369a5f1246049eeac05" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30059520, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 19405824 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 21527552 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 21534720 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 21543936 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 29801472 | |
| } | |
| ], | |
| "md5sum": "cde1c43ebbd68a802f3b60ffd6228515" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6086419071ec2f4c8a079ba779c77bd4" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cf1bbc8e4b5e3de8ce6686b6867dedd1" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "09d939f800d1a2739803ddcf60bf90e9" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ddc25338b617513d3f67ab1e4ed514bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28174336, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 9829376 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 18086912 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 24968192 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 24975360 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 26036224 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28157952 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 28165120 | |
| } | |
| ], | |
| "md5sum": "4cb3b9791d7354bed70b0c33e0a44576" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1a69f041922a3bafe502afea0f7e1e99" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8ff80f67e306a54f3f8448e80bbc2856" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "392616aa9e74631c1e1e62cdfb2c634b" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "69d91a2243e63c04e32ffd2fb5de38d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "110a9377706f5e4b2dd470d1632f6499" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cf55eb96b012649d8202788d591a851d" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28984320, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 20459520 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 20468736 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 28726272 | |
| } | |
| ], | |
| "md5sum": "af46d3e5557cd6c15320071a03b3bedc" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b3c302a2c2d1e27c415d404ec59b4e89" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8fc583bfecaf44fa3160da1e869a7b85" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bb1bd914c98c4b5c33973c9e07b7fcb9" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "19187980772ef2f85283cb828ed5b85c" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dfc3a256a013f654d5e4f4eb3140aee7" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aa32c80d63526b6cf5172935f7c9c752" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3025dc1c87438c157d30d0993de5a9ae" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32446464, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 9827328 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 10888192 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 13009920 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 13017088 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 13026304 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 21283840 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 21541888 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 27964416 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28165120 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 28172288 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 29233152 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 29240320 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 29247488 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 30308352 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 32430080 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 32437248 | |
| } | |
| ], | |
| "md5sum": "5ff7866f207667b587c571bebe8816e7" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6d7126e210997257eaac8439d159ed88" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0127585524a193ca1724573bdf30de89" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "74c73485fd55bccc7df5319db756d132" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "6fcd12aea079d7490a62d7f8d2e8e1e8" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8d2e153e1c369e97b1f03b4765722c2" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "16dab989f030bc459ac45ad44a34b4ce" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e90f58b8247e9031735bdc6b1f484c0f" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30059520, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 19405824 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 21527552 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 21534720 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 21543936 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 29801472 | |
| } | |
| ], | |
| "md5sum": "5f1b33d0f34a321807618c69e6b8cf83" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a011784596c9d8ca3d983509024e8319" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8405dbf80c94a20f9bb7515b6bdf8ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dedf1d569d0e02ca877316b889f7a82a" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dc786a3cb829aa038f791fd29b50924c" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28174336, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 9829376 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 18086912 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 24968192 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 24975360 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 26036224 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28157952 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 28165120 | |
| } | |
| ], | |
| "md5sum": "ace50441279153e8451152f02674adb4" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6e7cd4422e075f24588022ace802f118" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "106872bb5e9f8d11642d027274a8a444" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7278e2d449ee7bab60455c5194e2a477" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "c16a25b552925452ca11433387b2c192" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "013a30e7fc5099b3353d95d4938d113d" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "675fe121161f0b8a339faf94719c9348" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8923ad9dc41dbaf11457255f887c2da1" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30059520, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 19405824 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 21527552 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 21534720 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 21543936 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 29801472 | |
| } | |
| ], | |
| "md5sum": "2f874b1c8f0189fdef6229d652b36a28" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 6630400, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| } | |
| ], | |
| "md5sum": "f94c26854f73d3e8be9cf0df25f7049c" | |
| } | |
| ] | |
| } |