| { | |
| "metadata": { | |
| "ParamSize": 325, | |
| "ParamBytes": 3048549376.0, | |
| "BitsPerParam": 3.619307029695688 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 52736000, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 32000, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 52736000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9837c89d545850f3cd0d2637286c72b5" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18087936, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0b3892fd87005159cae3135feb15481f" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4eac62fcd0f886e724cdc40cf81cd941" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29226496, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 32000, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6592000, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 6592000 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 6600192 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 24688128 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 26949120 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 26957312 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 26965504 | |
| } | |
| ], | |
| "md5sum": "7b53644d7b0c813717f65ebf11235608" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "ba44c2e9965a65f35d9db6bd925860dc" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2be4f03707263b27c49924d2eda86299" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d9e03bfd02709e8f05d30249ed39536c" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "cedf294ee3f2dd7ead342991db8d05b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a03e69c96e83aea5bd7f3252517d7cfa" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "796d81a11fbf19e0728e8e58e9f04a97" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "9ba26206a1c6b829849edc19f477eb7a" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aa5ae7d711608d59f910483e621a643c" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "08b5e4ba67b0b1464c599c672315ff9a" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "6c5dfa04a79db8d5aa0c97ea4eb75892" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "736a6ded920d351a0ba6ee593fd33bf7" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "df2a12a1071b900820cafdea9474a11e" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "8454540ba9b5e2f98b22f417e712eebc" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6bdf3b9ce32e2a28bdbd0ff5396334b8" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ae58701b107dd184fcb0877329418995" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "1bd984b6714ebe828f8ac45e45e7b226" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b4fdd4d7aba0c33b7b24cd296ce52a39" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.30.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "f6377a6192aad921c9a257f85383493c" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "537aa38eef35c6850fbf9835344fc320" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "04ee6768eed61211f9fff2a959efdb50" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e359ec1f1f82082341d5aeee964483d8" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.31.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "83eeb59c0ba757e7081b390ee21b69b6" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 52736000, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 32000, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 52736000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d01b089f0beed4795ce259f291ef38a2" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18087936, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "276facd44f8601842dc48a1da8c4494d" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "073d93ebf6889233637ee1a33bb3bf93" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "85a1d9b29ef60762dcb84a28c2e65f08" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32819712, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 32000, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6592000, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16725504 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 16733696 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 18994688 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 23529984 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 23538176 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 26069504 | |
| } | |
| ], | |
| "md5sum": "2238fe06b147a60d6774f94c6b63f53b" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9105ac0fed7cb450442a775ee98f9055" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "57e1956d6a09f71cb01daf62e7ec6cb7" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28275712, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 843776 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 851968 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 18939904 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 21200896 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 25736192 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 25744384 | |
| } | |
| ], | |
| "md5sum": "2b8799702b69ba201c496c6d52769bed" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0cfaac8ca06224e04cde47c14f5637c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ce5307f1afe10f5a589c2b9f723ec05f" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "630eda6fee482469ab88c94663af1208" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "73122d378591d4857e7e0ebae5a72556" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "86814866596ab1fc5088cd19bbb24ec9" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18087936, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "83bde830156064662a03172c3b6e13ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9a52675436254a8cdd25d9c02fd5cf62" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2bc382a626c4e8f723a09c0d58b56e83" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31598592, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 14660608 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 17191936 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 23942144 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24785920 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 24794112 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27055104 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 31590400 | |
| } | |
| ], | |
| "md5sum": "8d51f7d6c12235b2abf387d529c01b7b" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cbd9f23ad633e37551a3b64a51b4b729" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "8bc804e781ae6864b6d5046ddbcb0e5d" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "c3aedd052c46ad3d90ed4d02d4d0c383" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9ffb2b8f14577858f7be056df4bc09e5" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8295c00c13128a087586c4984671cac8" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "869157e380854ebd5b9b00c214b05838" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f0fb9c735bb82546d4becbd8cafe7705" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "5aba9e6137383abb728ec6dc01ac010a" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "304149984b3ccb211fe88e828a395b87" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "64c7c0272ed3d1402fae3adc11d3fd13" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "22896762997b92f94930c8a2c8d90ff8" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "2c1fd7c0a6e995c3f823b4072c76fa49" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f8259b65afd8eafcbcd9a679e5d6dbbc" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "26f3e6325f953b0b13ba8283e817ab37" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "33e5e31bdd0b2421ff9b257622f553c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1d165c4fc19f09987ff94a635e169112" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "de5e55d28850b54d5d26d4d0cd961298" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "a8a50295579addb7e4c010894124f77a" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5041c43fd7f3ba1f0460fb76caef7ae9" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "84e46244ade810ec313c46a7abe40128" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "b035209c306d2b46f1fa89cea05a0c1a" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18087936, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "efa29a69e6573eeebf11611d37a6fca6" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "089e14f6b0840ff4fd9e222c61245afd" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30228480, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 27959296 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 27967488 | |
| } | |
| ], | |
| "md5sum": "3a66005cf61cc5b0424bb804a8cb3eb9" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "6304b7818496c0aae10bb4f3379ac64c" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e804e07c9252309a571ac7c30f700849" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "117034f73e7aa1fcf86820c617f8373f" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "836d7e0622283dc44f62a659d9ed2215" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f81bfdfcd16051cc6432a25756fbc53c" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "4bedbc11f235391afdbf084fb29f80aa" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "13322be5f2729c3b453b61f70ded2d1b" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ecc6647459376b03b8c5bce35c6f9150" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f5743ebce68a711782ca3d1ecd458743" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "e8d61cdf7e88eddd42d2dc867581c2e4" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "683a947bb5ff5c2468695a39cae10e74" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "1b0fabd5e4a27575d11c69939db0140b" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "a96c228e1921b194e729aeefe21417df" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "31304f9b3905d972e49fa3746ea6d1d2" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b5ef1597ecd9c6f5706d9eaf8357aa3f" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "911f1e93c43f21357910b755765a5553" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c089f4e50a8212105865e7927ead03f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "baa160fd29ab4bdc146fb3f24667ceca" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "8a1792f09eecf2efd6332906524cd2db" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "457099fbb5fca4e1630034c48c4c4cd4" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d37ed0690f7e631e6de4f891552cbe97" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "d075798d5dcd6d2040e2291032b29c74" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "88104d111cb155de4bdd9f3cb6172667" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "bc58f72778b1683dd1655f4f60574831" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "0589f9cccea49fe3610734b60361a353" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6bd46ac523c0e4b2a2263f76c0ef69bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20250624, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9d7b6c0530910d8a6627af7a9dc7340d" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32494592, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 7602176 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 25690112 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 27951104 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32486400 | |
| } | |
| ], | |
| "md5sum": "f22827dfcea8f24fcb4addfd328b9cae" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7070698d3525dcfbb4dd6d02002d934a" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30482432, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10125312 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1104 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18087936, | |
| "byteOffset": 10133504 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 276 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2260992, | |
| "byteOffset": 28221440 | |
| } | |
| ], | |
| "md5sum": "7a87f85958bda94b5343276c281ba26f" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27325440, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4535296 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 4543488 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 24794112 | |
| } | |
| ], | |
| "md5sum": "11cd68ad01464500faf104ed291a76f2" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 36282368, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 36282368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "68769a31a8abe08412608e49b4566d4a" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32379904, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 6750208 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4535296, | |
| "byteOffset": 7593984 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20250624, | |
| "byteOffset": 12129280 | |
| } | |
| ], | |
| "md5sum": "5c0f5b5d300406d0c219823e92311ad4" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 10125312, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2531328, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 412 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6750208, | |
| "byteOffset": 2531328 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 103 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 843776, | |
| "byteOffset": 9281536 | |
| } | |
| ], | |
| "md5sum": "e30a83fb4611c0d2556a52dbc086b3f7" | |
| } | |
| ] | |
| } |