| { | |
| "metadata": { | |
| "ParamSize": 325, | |
| "ParamBytes": 4517404672.0, | |
| "BitsPerParam": 4.500381277757404 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 262668288, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 128256, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262668288, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ac87e150b7abff8f5e373138a275e757" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "047e72c8050e810cdb07400dab9813cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32841728, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 128256, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32833536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32833536 | |
| } | |
| ], | |
| "md5sum": "be9925afc3bbb649702b98905ad06bbf" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 262668288, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 128256, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262668288, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ba53bcbc5a7ad9bdf66a93b23571dfe2" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32833536, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 128256, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32833536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ec1f5c954a5962ff7a038ec0f8e110e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33054720, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 3686400 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 3694592 | |
| } | |
| ], | |
| "md5sum": "ffd99a3ccf020e7354624070c563d400" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0e8291c355a29a1b12321eb856c4e698" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25174016, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "6465a42df00395ca1e2b6c310f0044e4" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eb13baed456ba5525bef68f5b7b335e0" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6fdec9b6589096955bc93f78c90e52fa" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 13115392 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "db40d941d2620d11497b376c1360c82e" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cefb0af7156b56a2ecf0ff89937329cd" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "64f6d1ddd44b4dbaa235f7d9de4b40d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 14688256 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "a3b84aad00a14b9da6028e26537d6e07" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7ea4a686f03d242d42d43492b0467cb3" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bf0b7665e821015990b8594611360722" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27271168, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "cdb47a2db6aadef42612744053dd15b6" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "19c53c0ab42ebd9ab0cec64975f21a16" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30949376, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 7348224 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 19931136 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 21504000 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 29892608 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30941184 | |
| } | |
| ], | |
| "md5sum": "50f8d7af22d57cee64048105dbe62443" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f228b2fdc7b6f510c11559632c00efff" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25174016, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "82787a462202839a793614f2b010d461" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4d89bc41f268c9209ea9f65e447a3e5f" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b2b6e9c65721d7f1166b3a3598b13d54" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 13115392 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "0cae387318c75481d4e72636fb3387a0" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "288e87e0d820710247186b131712e38c" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6d451f8fb458cc40d40c91ba077a7d8a" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 14688256 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "e96167246cd5c2937cbc21e28f4b628d" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ed37b8f747e9b5978e63fdecd2ee8d58" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b1c14ba57d3c2a3f8e54371b43a0acf6" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27271168, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "5bc993a66e47925c62b7322cd13cf10c" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f3a3476b4cebabd153abed03b1a4913b" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30949376, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 7348224 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 19931136 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 21504000 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 29892608 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30941184 | |
| } | |
| ], | |
| "md5sum": "354190bdf97967bdf2768f67b56dbea2" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "28df08f1f95d34ed983dbf16b3296fd1" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25174016, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "e3e1b052bbc615ff5c7a95f53078573d" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3ca0e9f5d5c5883e17bc104031df7940" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "23145869fb49546507855380110dfcb8" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 13115392 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "bc9fb0488af76073c92e53d38ef4bf4e" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "54416218d3f6830936e197751b3d4f93" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb3a6551f579029f5d70861c15041cf6" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 14688256 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "3a79ad251325f5aacf7322090564fd1e" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e6d8978a453972af68648dee1d9d5e62" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6ba3d1d2b976f30b68b304d424c6625" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27271168, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "c76f972fe136526402fe4c4a834ebe75" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b88f8f072ed4ef17e2040ffb583832d0" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30949376, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 7348224 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 19931136 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 21504000 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 29892608 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30941184 | |
| } | |
| ], | |
| "md5sum": "bc2983e9c8cb8ba42901dffb47f16082" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "63226c30e8100d834352c8becc927f6c" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25174016, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "614c94696150b3c5383b985d1e4bba3f" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5d4ff648b4e74f953f3be1c0da5c3e4a" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a2325bde3dd1886449d67841cc0be76e" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 13115392 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "61b39d83a4bd5889ae3f60e955cd626d" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4dedb753c39a40284fff37ebd734f02a" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "51ee749bdfd00dd6777c9eb84f8dbe39" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 14688256 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "8c277e887a84219d0627f3ce5f2eac7a" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6b3ffe6ce6454267e43f59e54fe211fd" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "398bfcdecb97d89aed8344b30b312db4" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27271168, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "e1addbcdf12c97d1ec1095d2ee8f87ad" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7001a51110910ae0b0fc08bdf47c79ba" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30949376, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 7348224 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 19931136 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 21504000 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 29892608 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30941184 | |
| } | |
| ], | |
| "md5sum": "3f39955a1a92708b083f6a136dc69746" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "204c17232319a225b8db0c546a3bde36" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25174016, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "196c8b422cb8c942b1c2c99f2f10efe3" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0daf1cb2075e25c61cca4bf4a67a005b" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "84976ff8368c844cbe6792db4cd86569" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 13115392 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "4fb6f4db18c3825b0bc85d0d14652351" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c51ff8d30fca6842e3b9c755b0c030e9" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0009c16f898703063575de6439906801" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 14688256 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "ccfdf7bab11aaac26160a045911583b4" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "793654fabf99ae05a4de6a8786719d89" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30932992, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 23592960 | |
| } | |
| ], | |
| "md5sum": "6bec3473026ec0ca6b0335269907cbd9" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "07025e60d38faff80d12c976bb7f3a17" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0e933529d3ea9933c3c7ae1155a17e97" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27271168, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "e33f1e27ebcabb554f3d19946d0e9694" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8c93eec8ef933dd6a80080ed16a8bae7" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30949376, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 7348224 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 19931136 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 21504000 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 29892608 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30941184 | |
| } | |
| ], | |
| "md5sum": "949fe332c9e3583696f6096efb85a7d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 3678208 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 3686400 | |
| } | |
| ], | |
| "md5sum": "c41a48d1267daa7cedc41b7fa75b4639" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ddb05f063aef716f5b37433c0c29c414" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25174016, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "8ab796b789957309529c87aa8c78f298" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a473b3d887615e7a02d9a2f3f2ce3057" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b33b78be9123e3aaf955237b022feb23" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 13115392 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "d9dba0417b7300070e8cd419c56bd7da" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eecca75430d0fedad38293be502a9ba6" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4c2aa01a4c51ed56268254c347515600" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 14688256 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "9241d4b49e77443f348d69376cde0874" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "51e24f069e5205eae6c165d965ed4d4b" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9059c21fb71e18b79831f7410f0df25d" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27271168, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "4bbe43899903669a23fdc0e1e02d8f95" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "66ba736f7900066a6aa906d409035739" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30949376, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 7348224 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 19931136 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 21504000 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 29892608 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30941184 | |
| } | |
| ], | |
| "md5sum": "ba6892118056acb20829e555f431ad92" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "284f24538c284ef1c550024551076b7a" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25174016, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "69214974529417d5a0f4a8e7203a60af" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "87f40c2fb3a277a681783e067d378967" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d497a85a004274f12aaf8b4dd0fb289c" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 13115392 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "361723cb1523daee8081a0b0bf93c192" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0df3662b56d83b413d7d87feda320952" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fb6c4b10d898476013d190bd71addb3d" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22036480, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 14688256 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22028288 | |
| } | |
| ], | |
| "md5sum": "bd320a860bf0b6c460741d88fab39902" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "93721186742df4d223fa5f9972c1b223" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2f0ac3b7ab8da85579dc135cb320b8ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27271168, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22544384 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "b73f3cdbdbbaadd51792e4770ad64f09" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4c4d11a315216695309d623d9518b776" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30949376, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7340032 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 7348224 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 19931136 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 21504000 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 29892608 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30941184 | |
| } | |
| ], | |
| "md5sum": "6effdda9b8ae4e9abe03a1bfe242efe9" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fc757a65957c3d82c87f177256c3a79d" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25174016, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 11018240 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23601152 | |
| } | |
| ], | |
| "md5sum": "adf979db3d14eb781a6bfafb6962b488" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1792 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 29360128, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "42f4e8212326a6616e5f4429fbb70c14" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5904b77532db0e3ce0162133ad2ad9bc" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33046528, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.30.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 448 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 13115392 | |
| }, | |
| { | |
| "name": "model.layers.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20455424 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 20463616 | |
| } | |
| ], | |
| "md5sum": "3271355feebcf743d1fd9473676a31f2" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 58720256, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 28672, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 58720256, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "67d25a8779a586d6e99009b596f50043" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32505856, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 1572864 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9961472 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28672, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7340032, | |
| "byteOffset": 11010048 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 6144, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 30932992 | |
| } | |
| ], | |
| "md5sum": "154ee764c444310f76c313c49bcd24c5" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 9437184, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| } | |
| ], | |
| "md5sum": "e107a6ecce6cc3f37f7a31e66103d59d" | |
| } | |
| ] | |
| } |