diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4327 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3048549376.0, + "BitsPerParam": 3.619307029695688 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 52736000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32000, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 52736000, + "byteOffset": 0 + } + ], + "md5sum": "9837c89d545850f3cd0d2637286c72b5" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 18087936, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 0 + } + ], + "md5sum": "0b3892fd87005159cae3135feb15481f" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "4eac62fcd0f886e724cdc40cf81cd941" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29226496, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32000, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6592000, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6592000 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 6600192 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 24688128 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26949120 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26957312 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 26965504 + } + ], + "md5sum": "7b53644d7b0c813717f65ebf11235608" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "ba44c2e9965a65f35d9db6bd925860dc" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "2be4f03707263b27c49924d2eda86299" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "d9e03bfd02709e8f05d30249ed39536c" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "cedf294ee3f2dd7ead342991db8d05b7" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "a03e69c96e83aea5bd7f3252517d7cfa" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "796d81a11fbf19e0728e8e58e9f04a97" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "9ba26206a1c6b829849edc19f477eb7a" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "aa5ae7d711608d59f910483e621a643c" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "08b5e4ba67b0b1464c599c672315ff9a" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "6c5dfa04a79db8d5aa0c97ea4eb75892" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "736a6ded920d351a0ba6ee593fd33bf7" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "df2a12a1071b900820cafdea9474a11e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "8454540ba9b5e2f98b22f417e712eebc" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "6bdf3b9ce32e2a28bdbd0ff5396334b8" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "ae58701b107dd184fcb0877329418995" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "1bd984b6714ebe828f8ac45e45e7b226" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "b4fdd4d7aba0c33b7b24cd296ce52a39" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "f6377a6192aad921c9a257f85383493c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "537aa38eef35c6850fbf9835344fc320" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "04ee6768eed61211f9fff2a959efdb50" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "e359ec1f1f82082341d5aeee964483d8" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "83eeb59c0ba757e7081b390ee21b69b6" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 52736000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 52736000, + "byteOffset": 0 + } + ], + "md5sum": "d01b089f0beed4795ce259f291ef38a2" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 18087936, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 0 + } + ], + "md5sum": "276facd44f8601842dc48a1da8c4494d" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "073d93ebf6889233637ee1a33bb3bf93" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "85a1d9b29ef60762dcb84a28c2e65f08" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 32819712, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6592000, + "byteOffset": 10133504 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16725504 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 16733696 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 18994688 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23529984 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 23538176 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 26069504 + } + ], + "md5sum": "2238fe06b147a60d6774f94c6b63f53b" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "9105ac0fed7cb450442a775ee98f9055" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "57e1956d6a09f71cb01daf62e7ec6cb7" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 28275712, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 843776 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 851968 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 18939904 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 21200896 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25736192 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 25744384 + } + ], + "md5sum": "2b8799702b69ba201c496c6d52769bed" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "0cfaac8ca06224e04cde47c14f5637c1" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "ce5307f1afe10f5a589c2b9f723ec05f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "630eda6fee482469ab88c94663af1208" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "73122d378591d4857e7e0ebae5a72556" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "86814866596ab1fc5088cd19bbb24ec9" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 18087936, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 0 + } + ], + "md5sum": "83bde830156064662a03172c3b6e13ef" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "9a52675436254a8cdd25d9c02fd5cf62" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "2bc382a626c4e8f723a09c0d58b56e83" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 31598592, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 10125312 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 14660608 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17191936 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 23942144 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24785920 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 24794112 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27055104 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31590400 + } + ], + "md5sum": "8d51f7d6c12235b2abf387d529c01b7b" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "cbd9f23ad633e37551a3b64a51b4b729" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "8bc804e781ae6864b6d5046ddbcb0e5d" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "c3aedd052c46ad3d90ed4d02d4d0c383" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "9ffb2b8f14577858f7be056df4bc09e5" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "8295c00c13128a087586c4984671cac8" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "869157e380854ebd5b9b00c214b05838" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "f0fb9c735bb82546d4becbd8cafe7705" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "5aba9e6137383abb728ec6dc01ac010a" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "304149984b3ccb211fe88e828a395b87" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "64c7c0272ed3d1402fae3adc11d3fd13" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "22896762997b92f94930c8a2c8d90ff8" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "2c1fd7c0a6e995c3f823b4072c76fa49" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "f8259b65afd8eafcbcd9a679e5d6dbbc" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "26f3e6325f953b0b13ba8283e817ab37" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "33e5e31bdd0b2421ff9b257622f553c6" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "1d165c4fc19f09987ff94a635e169112" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "de5e55d28850b54d5d26d4d0cd961298" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "a8a50295579addb7e4c010894124f77a" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "5041c43fd7f3ba1f0460fb76caef7ae9" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "84e46244ade810ec313c46a7abe40128" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "b035209c306d2b46f1fa89cea05a0c1a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18087936, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 0 + } + ], + "md5sum": "efa29a69e6573eeebf11611d37a6fca6" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "089e14f6b0840ff4fd9e222c61245afd" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 30228480, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27951104 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27959296 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 27967488 + } + ], + "md5sum": "3a66005cf61cc5b0424bb804a8cb3eb9" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "6304b7818496c0aae10bb4f3379ac64c" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "e804e07c9252309a571ac7c30f700849" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "117034f73e7aa1fcf86820c617f8373f" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "836d7e0622283dc44f62a659d9ed2215" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "f81bfdfcd16051cc6432a25756fbc53c" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "4bedbc11f235391afdbf084fb29f80aa" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "13322be5f2729c3b453b61f70ded2d1b" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "ecc6647459376b03b8c5bce35c6f9150" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "f5743ebce68a711782ca3d1ecd458743" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "e8d61cdf7e88eddd42d2dc867581c2e4" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "683a947bb5ff5c2468695a39cae10e74" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "1b0fabd5e4a27575d11c69939db0140b" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "a96c228e1921b194e729aeefe21417df" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "31304f9b3905d972e49fa3746ea6d1d2" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "b5ef1597ecd9c6f5706d9eaf8357aa3f" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "911f1e93c43f21357910b755765a5553" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "c089f4e50a8212105865e7927ead03f3" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "baa160fd29ab4bdc146fb3f24667ceca" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "8a1792f09eecf2efd6332906524cd2db" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "457099fbb5fca4e1630034c48c4c4cd4" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "d37ed0690f7e631e6de4f891552cbe97" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "d075798d5dcd6d2040e2291032b29c74" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "88104d111cb155de4bdd9f3cb6172667" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "bc58f72778b1683dd1655f4f60574831" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "0589f9cccea49fe3610734b60361a353" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "6bd46ac523c0e4b2a2263f76c0ef69bb" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 20250624, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 0 + } + ], + "md5sum": "9d7b6c0530910d8a6627af7a9dc7340d" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 32494592, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 7602176 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 25690112 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 27951104 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32486400 + } + ], + "md5sum": "f22827dfcea8f24fcb4addfd328b9cae" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "7070698d3525dcfbb4dd6d02002d934a" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 30482432, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10125312 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1104 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18087936, + "byteOffset": 10133504 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 276 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2260992, + "byteOffset": 28221440 + } + ], + "md5sum": "7a87f85958bda94b5343276c281ba26f" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 27325440, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4535296 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 4543488 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 24794112 + } + ], + "md5sum": "11cd68ad01464500faf104ed291a76f2" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 36282368, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 36282368, + "byteOffset": 0 + } + ], + "md5sum": "68769a31a8abe08412608e49b4566d4a" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 32379904, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4535296, + "byteOffset": 7593984 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20250624, + "byteOffset": 12129280 + } + ], + "md5sum": "5c0f5b5d300406d0c219823e92311ad4" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 10125312, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2531328, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 2531328 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 9281536 + } + ], + "md5sum": "e30a83fb4611c0d2556a52dbc086b3f7" + } + ] +} \ No newline at end of file