| { | |
| "metadata": { | |
| "ParamSize": 485, | |
| "ParamBytes": 18982125568.0, | |
| "BitsPerParam": 4.500270809152499 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 131072000, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 32000, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7289fa97cd43f2502fa51a8c1bfe570f" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.41.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "690c605108ea8f34557d58cb846551ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.42.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bc3da80d0e4f6cce1af27bbc6aa20195" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27705344, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 32000, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384000, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.41.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16384000 | |
| }, | |
| { | |
| "name": "model.layers.41.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 16400384 | |
| }, | |
| { | |
| "name": "model.layers.41.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27672576 | |
| }, | |
| { | |
| "name": "model.layers.42.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27688960 | |
| } | |
| ], | |
| "md5sum": "50a28b46fbb4eb4ae2cab67630a169e4" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.42.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f6ef26046a4ed196a3895be4378887d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.42.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ad8e8d1c461c1b823cb70bf9db33462" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.42.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "68eea3e4872bfa0520721142dd5d9688" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.42.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c21b0b25622f89df84302453a7e362aa" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.43.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f0342ac7be70bc1a58b936d4d4790834" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.43.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5d85aefd7dd083f39282fc0d2167d0f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.43.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "34d8b8c23cbd575821ca7e570f6a34d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.43.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "921b69810dc25b132221d4e39b45f14f" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.42.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.42.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.42.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.42.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.43.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.43.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.43.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "4e4401717473facc8e484664370e20c7" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.43.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "64a1444226713dfc58bd01d08c7badf8" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.44.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "538b95e3adecb212a7f6a556eb99f40d" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.44.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "acb3850fe060dbbd1efb4198c23cc4e9" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.44.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6da183f89ec664fc91e26a130298bcf1" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.44.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fde56351b5e2a09829efe0065aa16e1d" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.44.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a7765cf2a5310caed9f3119ecf018cfa" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.45.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2fc0aaabe0565b3e295bb477d0b5acfb" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.43.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.43.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.44.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.44.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.44.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.44.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.44.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.45.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "e714710ebcbdd17a55820418ae388c8c" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.45.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e931ba52d25fc891c907a7ae8db2bfda" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.45.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fdf90ed37b3807cbb6837e5f86816f18" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.45.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "13231d28b12d9dce254a28a32b1e09b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.45.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0778044e54a88aeaf726d4fd2f830558" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.46.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "71738117cc0e4f397033262874d78673" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.46.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e7c85fdab433f50e8d257bc6eec7f008" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.46.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "43b150a3529e2748e185ab1b79caf774" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.46.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "816b22f73db67468cc80efe5dc9992d8" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.45.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.45.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.45.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.45.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.46.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.46.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.46.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "a1a78bd824f880ac72f8fa4f9f8feab2" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.46.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8e716211261c83c81f6ebacb5d8c8369" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.47.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8115a25623ff3ad8e3f032e4d1d90da7" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.47.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "299d1c467084b368078ffba6075138c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.47.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fe4243b6f23807a38ea0afc5ae23365e" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.47.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "40222b9023340a5efe7c4f37f3d3c76d" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.47.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cd82ac9d50f4e9c86db06ef0d081227d" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 131072000, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 32000, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8bc2c15665a0c6d8163ec278362a7846" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.46.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.46.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.47.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.47.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.47.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.47.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.47.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "db5712187deab30ba40ae743654df56c" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "48f568e02e0bfea8107a3f989d34dc8e" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b704f410e497cc65c6b3a9f2b68af4f6" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aaebd5421c68c68c26be10a8ae99bccc" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8a20ab681f70c3e31563208b8d1fab2" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0b857f87900299d0669ff9a9869f6fec" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32931840, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 32000, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384000, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 16384000 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 16400384 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 27672576 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 27688960 | |
| } | |
| ], | |
| "md5sum": "cacc0b304a4de69db88a3b74e704dd7d" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ba47e2a9b42dc6a309c1815d6f647a8b" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bc6210b9309403c3b3aab163fac54d08" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d1e6d418669416ecc1dfde2839390dad" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "622275923a6a94432e7e64d90a6ab7c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fa12a3c4ec5a3ca4532840c7d5cacc6d" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "02383761325fd8d73d78fc6c3860353c" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24952832, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 4194304 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 4210688 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 15482880 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 15499264 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 24936448 | |
| } | |
| ], | |
| "md5sum": "30ec5459978f11d692d8edc8928fe93a" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "94f06398b862257587684996b4b1d497" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d2a85dc204070711bdedcde92af12bdb" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "24f353c10e65689e2dd9037225f41f95" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cd39dfa98ddfae867af776cd0ea75245" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4bf63af4d3112711b70b2e4098d738fe" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1a52d131c3377b1539b9fe7c7193be69" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "383ca1be8f92cabd7c4d6d331cabb26f" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "145bba7a7eddff69589aec8ed5bdd868" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "778d75ebc0eabfdd2078207fa4ab60f1" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fb2a61577db54d5925f534ba1c85705d" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "48bf5e83a05e8e4ec0d85d74e4fcde00" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1117510c2a62d1d7efe1ffd63bb3e20b" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8f4f302ed8482d8326db502600ad281" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "96feb9931d39de82bcf96ce2ce17d933" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6740ae77abfb0c72c58e294b96fb777" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b45f2fcfbd878356c8ed5b94ed4d06ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "286caf2ed01c8fb594d20e0cb6d3185b" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c7b4eb17f046eb08edaf03c8da32ef72" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6537e581077da2d2faac4efa373b7b81" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "85197a83004428b4510d8ec9fb9ca1a9" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "82cde4335c275209a1dec96528fe5aed" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d20dbad1c4397351b715ca3a05a8268b" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e819dbaf63584819ac88451cc757d597" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9e18d7ba93591b4e2c6365dd4c1575d1" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "603b397f86f5b70da0d9ab590c28344b" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aaf2c75fa4e1bb67a4930aac604c86f2" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30179328, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25968640 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30162944 | |
| } | |
| ], | |
| "md5sum": "a9574e1298b22739cc3549b53dde1e8a" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "349de5574b91fe570bdbe8be02f3b445" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "472378f5a02ecace50802276cf1f1086" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4dc6723f25ea1081a63b71d19afcf337" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b4e57443bb99be16b680d23e86ea5c7a" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "af28c9e9590bfc58b1d7c217db1cfb72" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d0ddde3d44684c265b8940f2999327ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4dd7ae2a72b5af7ecdc39e1bbeb61ef6" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f51c9ad5c021b3f02c83ccff87a2011e" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "cac0c157888340b2b6a0c9221d7b3b5c" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f496b5ad77a851987c746046b188def1" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1a93bbc5422467002d2c91c0db0ce287" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "82edd70e5cd37497ab1a93ddd199e2c7" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4bff922dbddfd0eeb846842b3fbde602" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0b3b854961e5ada4aeebbdf18df06acc" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c31d830b221d26d835f394e8efc1eb92" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bf761f5286d5f88c8a15f8c14fdcb826" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "56c4fb750e829e27b9bc42ee3bfe9fdf" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "96c6902bd9f8c1d51d453b2c466826f2" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30179328, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| } | |
| ], | |
| "md5sum": "12e5127075fa0b358fb967558678417a" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8c67d0fd61f20bb0233922c7a7acb70b" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4d96c1fcfce4072cbab44f873dbed7d5" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c16e572deca7f168ac5b1948b7bf6a25" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9d1cdcfc41c63d66daabdb368fad8bdb" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b71aa8717f80ef4f400e07f657bb97d5" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9198d27f800b60f1c5f25b416ec9cd2e" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32047104, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20758528 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32030720 | |
| } | |
| ], | |
| "md5sum": "043ab3737d873f1180f6e2df8f713925" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "854b64b6c0942a1a4f81436e548b68ea" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "914123630566f15bba3559c25427bd32" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3b927d01f3361e5b7e629d12be2d5922" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ed9b3db11e3f265a9110e740ebc2b70" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6faa024b20ca3d124e842fb6a34aad43" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "021892341fca9ca0e3779dba2ed003df" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1c09d4dc691f45c445fa309b32124421" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "b1f16df4a3cdb8e0c2e18ff3ffdbed17" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b384b221634a0bd8185c560e5cef0b9d" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c1f6aa960c4e8678d41b674492a0398e" | |
| }, | |
| { | |
| "dataPath": "params_shard_114.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5a0455632a5224e2bbc5e7fa28a5bc99" | |
| }, | |
| { | |
| "dataPath": "params_shard_115.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f676284083424393f0464b7d98908686" | |
| }, | |
| { | |
| "dataPath": "params_shard_116.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "130f06d6790a0f7b4be516b2db73d2b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_117.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c4d3096c6af7b96ce681b7c916007e8a" | |
| }, | |
| { | |
| "dataPath": "params_shard_118.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32047104, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32030720 | |
| } | |
| ], | |
| "md5sum": "9ae02b356a50dbe05c498ca89f5604f6" | |
| }, | |
| { | |
| "dataPath": "params_shard_119.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0cf385d106684368d6160c0417694789" | |
| }, | |
| { | |
| "dataPath": "params_shard_120.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "453f0702b38f510b4364bffa87958631" | |
| }, | |
| { | |
| "dataPath": "params_shard_121.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f2c9eec0215df295629f792183b9fe86" | |
| }, | |
| { | |
| "dataPath": "params_shard_122.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "15a5e888fd7b11c7768e31ecb1f28bfb" | |
| }, | |
| { | |
| "dataPath": "params_shard_123.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fcca991b065e8b4cc1fad784f554a4f9" | |
| }, | |
| { | |
| "dataPath": "params_shard_124.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0b5ce01006450462a32b133217ff4783" | |
| }, | |
| { | |
| "dataPath": "params_shard_125.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7dcfe5433011dedd1a35884cbb8f3e43" | |
| }, | |
| { | |
| "dataPath": "params_shard_126.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "99b363a177f772db4804c99fb71f99a9" | |
| }, | |
| { | |
| "dataPath": "params_shard_127.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "0458196202b35dc54119b07c97847e3d" | |
| }, | |
| { | |
| "dataPath": "params_shard_128.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e3345f4961dd086036dade6165fb731b" | |
| }, | |
| { | |
| "dataPath": "params_shard_129.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "90a97e76b3f42fc71ce199e5587d7bc4" | |
| }, | |
| { | |
| "dataPath": "params_shard_130.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f449be834273671d3e8356a8d6ecf1ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_131.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a0df38ba8e30250adb669923f1aa006e" | |
| }, | |
| { | |
| "dataPath": "params_shard_132.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "78b7b98468e796666d4d52af22a28066" | |
| }, | |
| { | |
| "dataPath": "params_shard_133.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6cc1191c30dfba3ff8a1a009a91c709d" | |
| }, | |
| { | |
| "dataPath": "params_shard_134.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a440b41f4b0b9e466f1c7f794b50450a" | |
| }, | |
| { | |
| "dataPath": "params_shard_135.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "ec820e4ea598557cf7a48f2015c0fc45" | |
| }, | |
| { | |
| "dataPath": "params_shard_136.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "334d0419c05794a8269fe83d5043f754" | |
| }, | |
| { | |
| "dataPath": "params_shard_137.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f9cd5bd3731b003ea1c04d12f6cc0eb3" | |
| }, | |
| { | |
| "dataPath": "params_shard_138.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4361dd1d6ad597d539475bee709521af" | |
| }, | |
| { | |
| "dataPath": "params_shard_139.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "14a34a8a277359c898f47f679b9846c4" | |
| }, | |
| { | |
| "dataPath": "params_shard_140.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e313aad765d84308891d9e467c4b8571" | |
| }, | |
| { | |
| "dataPath": "params_shard_141.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1619ef1c597de62c7a36c86c8b268970" | |
| }, | |
| { | |
| "dataPath": "params_shard_142.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "369037d83ebdb8f552340cc06f37d7d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_143.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ecce66444bea5504322e6b1cc00fbd1e" | |
| }, | |
| { | |
| "dataPath": "params_shard_144.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "d5c84415242e11a33041a99bb27ca177" | |
| }, | |
| { | |
| "dataPath": "params_shard_145.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "63e87f59be7b344329ba316d66b9958c" | |
| }, | |
| { | |
| "dataPath": "params_shard_146.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "edee6be6ffb2ef96d6a7e7912b6a7901" | |
| }, | |
| { | |
| "dataPath": "params_shard_147.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "59caac1b3591287090b636bd3718aead" | |
| }, | |
| { | |
| "dataPath": "params_shard_148.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "24316ead71c49df051cfde0a24b3befc" | |
| }, | |
| { | |
| "dataPath": "params_shard_149.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "96ad4394d33e283b42d0e2bd863d9aec" | |
| }, | |
| { | |
| "dataPath": "params_shard_150.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6febc0030cbde8f0af121ce547732b66" | |
| }, | |
| { | |
| "dataPath": "params_shard_151.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "37d0d38c19ca44b7dbfb604c53b82df5" | |
| }, | |
| { | |
| "dataPath": "params_shard_152.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "56786f7ffc3b6161ef56625a1ad425a4" | |
| }, | |
| { | |
| "dataPath": "params_shard_153.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4c71586032e0f84c15736b2bef6aced9" | |
| }, | |
| { | |
| "dataPath": "params_shard_154.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30179328, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| } | |
| ], | |
| "md5sum": "e685b41c5d97ba0d421fe28f1fce7265" | |
| }, | |
| { | |
| "dataPath": "params_shard_155.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4e753b0d9c9a78be4ea5ea7c02f4ffad" | |
| }, | |
| { | |
| "dataPath": "params_shard_156.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a7ed480d6cce79a8ae007aa35978fc4a" | |
| }, | |
| { | |
| "dataPath": "params_shard_157.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0fe0ad82b0081a5282138f0c70d3c5ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_158.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "47c79313d8c92a6567745b393f47ceb7" | |
| }, | |
| { | |
| "dataPath": "params_shard_159.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6ba908553afa9b94a0510f1d21384b64" | |
| }, | |
| { | |
| "dataPath": "params_shard_160.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b7fabcbe1096acc1c3516af3dc35d497" | |
| }, | |
| { | |
| "dataPath": "params_shard_161.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32047104, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20758528 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32030720 | |
| } | |
| ], | |
| "md5sum": "7e63348a3dcf3d4a59d05fc8a3c4fb84" | |
| }, | |
| { | |
| "dataPath": "params_shard_162.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d7315d5664f6f829d9fbdf8adb722230" | |
| }, | |
| { | |
| "dataPath": "params_shard_163.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "247511f30296eb7f00483ae554e7bdad" | |
| }, | |
| { | |
| "dataPath": "params_shard_164.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e98b51e2f6392e771daf117202796813" | |
| }, | |
| { | |
| "dataPath": "params_shard_165.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1ddd55657072081d480458d4e45d6ff5" | |
| }, | |
| { | |
| "dataPath": "params_shard_166.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9c2a89ec8bda20154a4f4268a7411266" | |
| }, | |
| { | |
| "dataPath": "params_shard_167.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df55db7445f73888a7b73c7f412150c9" | |
| }, | |
| { | |
| "dataPath": "params_shard_168.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4a1e570baebffd7bd54337ba6dd80f9f" | |
| }, | |
| { | |
| "dataPath": "params_shard_169.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "1e766f418d94098aa7842e7d55c10ab7" | |
| }, | |
| { | |
| "dataPath": "params_shard_170.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "464f450a36a62efe2199a770084a7bd0" | |
| }, | |
| { | |
| "dataPath": "params_shard_171.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a6ca59c19d4e0807000e7268ea37f101" | |
| }, | |
| { | |
| "dataPath": "params_shard_172.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8888b677ae78133f6ca29221902210f8" | |
| }, | |
| { | |
| "dataPath": "params_shard_173.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a2de4d74e6e3e053c35f7eacdd921d7e" | |
| }, | |
| { | |
| "dataPath": "params_shard_174.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ac3fae478bca212db51093f79a8b9e01" | |
| }, | |
| { | |
| "dataPath": "params_shard_175.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "130bc7e10886a0084cd604ab3ef7d9ee" | |
| }, | |
| { | |
| "dataPath": "params_shard_176.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "252065824b28896455adcd97b5823d68" | |
| }, | |
| { | |
| "dataPath": "params_shard_177.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2500498d9066098fa38b591930d8e0f7" | |
| }, | |
| { | |
| "dataPath": "params_shard_178.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "28cfa0779cbd7ac7f91fded88b5d737e" | |
| }, | |
| { | |
| "dataPath": "params_shard_179.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2b52c1827ce2879d726d0a8a846982ee" | |
| }, | |
| { | |
| "dataPath": "params_shard_180.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b4e9634d0a6d2804270e3269b04d0229" | |
| }, | |
| { | |
| "dataPath": "params_shard_181.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "97dbe5f0e3ce8dac0733ea5f92bb1cb9" | |
| }, | |
| { | |
| "dataPath": "params_shard_182.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7f0b1f52a33d22dd55b4efa68ad6cd19" | |
| }, | |
| { | |
| "dataPath": "params_shard_183.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7d80e2567e2de3dcb50aec4730e65fa1" | |
| }, | |
| { | |
| "dataPath": "params_shard_184.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "09c1e5e38595ea88863631e4c71c26dd" | |
| }, | |
| { | |
| "dataPath": "params_shard_185.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1ba62e1ed7074b112599ba17eeed1350" | |
| }, | |
| { | |
| "dataPath": "params_shard_186.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "2451813a82d745fd70e8e6c62fda0ef5" | |
| }, | |
| { | |
| "dataPath": "params_shard_187.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5d6d46e22f5cd4b2e8309dbfa9dc1f0d" | |
| }, | |
| { | |
| "dataPath": "params_shard_188.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "58e3fb24910f38bb315fdd2c3cb4e3dd" | |
| }, | |
| { | |
| "dataPath": "params_shard_189.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d75e5af03c9e51e73b920a3f2a0b4bfc" | |
| }, | |
| { | |
| "dataPath": "params_shard_190.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0b241b7aee8ac3a06410264e28ecf72e" | |
| }, | |
| { | |
| "dataPath": "params_shard_191.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1808a360eb3f06ac93dd595c8b9309a3" | |
| }, | |
| { | |
| "dataPath": "params_shard_192.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eb00f2b7813434bd71fac68675eb7be9" | |
| }, | |
| { | |
| "dataPath": "params_shard_193.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8d1d63601f2c392700b07fbf1d7d3542" | |
| }, | |
| { | |
| "dataPath": "params_shard_194.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3fb292b2880e837b54554bef965bbda8" | |
| }, | |
| { | |
| "dataPath": "params_shard_195.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5cff6eb5d0ceb2a43ba08f157c67cd45" | |
| }, | |
| { | |
| "dataPath": "params_shard_196.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30179328, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25968640 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30162944 | |
| } | |
| ], | |
| "md5sum": "075025b52cbd6017ad0f67bf56e2ff38" | |
| }, | |
| { | |
| "dataPath": "params_shard_197.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1f25bc33082239e13dbdfc1ec1c415b5" | |
| }, | |
| { | |
| "dataPath": "params_shard_198.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "04b8e5c934ff18466ccc181e2e389a56" | |
| }, | |
| { | |
| "dataPath": "params_shard_199.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6bde864dca08dacc8fed57d900806a3" | |
| }, | |
| { | |
| "dataPath": "params_shard_200.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2c737a08b6f70b436a9c04498d4b7358" | |
| }, | |
| { | |
| "dataPath": "params_shard_201.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "33f0bbc5b4370634390b1fa711f7be55" | |
| }, | |
| { | |
| "dataPath": "params_shard_202.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "060d364a27d7bd0495bf4522c296a157" | |
| }, | |
| { | |
| "dataPath": "params_shard_203.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32047104, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 11304960 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 22577152 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 22593536 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 27836416 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32030720 | |
| } | |
| ], | |
| "md5sum": "b1274f99b9015e05629432be297b7145" | |
| }, | |
| { | |
| "dataPath": "params_shard_204.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d5d2c7d6d4e40ce58cb0fe6613f44dd0" | |
| }, | |
| { | |
| "dataPath": "params_shard_205.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a4541a11dba0994698efceef84d23a27" | |
| }, | |
| { | |
| "dataPath": "params_shard_206.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6807fcc435c7eac9a544c017df219a9c" | |
| }, | |
| { | |
| "dataPath": "params_shard_207.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "46a394dd3d585dab1723fd24e24766bd" | |
| }, | |
| { | |
| "dataPath": "params_shard_208.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "851a969057b7c45167f213901ba95869" | |
| }, | |
| { | |
| "dataPath": "params_shard_209.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d415791800d6bb79cad406f5f2940d0f" | |
| }, | |
| { | |
| "dataPath": "params_shard_210.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4e1e3bf0b1052c506bfe2958bca78765" | |
| }, | |
| { | |
| "dataPath": "params_shard_211.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c9e46a56949185f0ad1dcad443685831" | |
| }, | |
| { | |
| "dataPath": "params_shard_212.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.30.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "bd34cab085a639e73fe2645ec5173714" | |
| }, | |
| { | |
| "dataPath": "params_shard_213.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "63cc32a102bf6a75182147bece6fbb37" | |
| }, | |
| { | |
| "dataPath": "params_shard_214.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1a07242e294d98969f5313e31a816cd1" | |
| }, | |
| { | |
| "dataPath": "params_shard_215.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b19e7979d5bff76ee438a935893a243f" | |
| }, | |
| { | |
| "dataPath": "params_shard_216.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "15e523da987debed681f969072628564" | |
| }, | |
| { | |
| "dataPath": "params_shard_217.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f7817faf348570c32da60f0bb7f81836" | |
| }, | |
| { | |
| "dataPath": "params_shard_218.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb537df72ae8281eac89c79b0688d2b4" | |
| }, | |
| { | |
| "dataPath": "params_shard_219.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8ce183eb92a5e55d5fbd8e091d2e1403" | |
| }, | |
| { | |
| "dataPath": "params_shard_220.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.31.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.32.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "835e1d251b5e8933f15f6cfd75710172" | |
| }, | |
| { | |
| "dataPath": "params_shard_221.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9016d61974c2e16ded6ace9aa116dc39" | |
| }, | |
| { | |
| "dataPath": "params_shard_222.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5c05a994a6fbe52800fd317078388b2d" | |
| }, | |
| { | |
| "dataPath": "params_shard_223.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0a4f68fb05211be98a7da493c17caa7c" | |
| }, | |
| { | |
| "dataPath": "params_shard_224.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ce12f299cf817c87cf9f35fe2a721229" | |
| }, | |
| { | |
| "dataPath": "params_shard_225.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e5bd73d9b2a1238271cbc804531aee6e" | |
| }, | |
| { | |
| "dataPath": "params_shard_226.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5684ad4dfe99a185cb8bb64e2447c2cd" | |
| }, | |
| { | |
| "dataPath": "params_shard_227.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4ad03a5f05b7bb16f526323ff9a5884c" | |
| }, | |
| { | |
| "dataPath": "params_shard_228.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5822ec931c227f701bf7db1bd4aa04ad" | |
| }, | |
| { | |
| "dataPath": "params_shard_229.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.32.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.33.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.33.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.33.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "9cddb6db59159539545d6cdcdb6844b3" | |
| }, | |
| { | |
| "dataPath": "params_shard_230.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eeff6fd22c02f841ae725d6ded427cbf" | |
| }, | |
| { | |
| "dataPath": "params_shard_231.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "686bbfa47b0fbe2fa5f164e49a6a1e5f" | |
| }, | |
| { | |
| "dataPath": "params_shard_232.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "37144d8b1b51b3d2e4dd6202e83f11d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_233.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31981568, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.34.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 9437184 | |
| } | |
| ], | |
| "md5sum": "4ca2d40f1cbb8b4b0991e5f0f51d5a47" | |
| }, | |
| { | |
| "dataPath": "params_shard_234.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7cc199ef6471124e6b89f5801af73c2f" | |
| }, | |
| { | |
| "dataPath": "params_shard_235.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "adf8d155654cf6ba540491f8384819dd" | |
| }, | |
| { | |
| "dataPath": "params_shard_236.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ccccbf60635d4a55fe95855ddde8e961" | |
| }, | |
| { | |
| "dataPath": "params_shard_237.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "472f81265b94dc8d1a92b811a6ae38e6" | |
| }, | |
| { | |
| "dataPath": "params_shard_238.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0209a2adc1b4280bb22c3adce50d069e" | |
| }, | |
| { | |
| "dataPath": "params_shard_239.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "68a919666e247956e8384aa858392d43" | |
| }, | |
| { | |
| "dataPath": "params_shard_240.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32047104, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.34.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.34.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.34.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.35.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.35.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20758528 | |
| }, | |
| { | |
| "name": "model.layers.35.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32030720 | |
| } | |
| ], | |
| "md5sum": "1da05eeee0d9e41ef3d60eae8a2b0959" | |
| }, | |
| { | |
| "dataPath": "params_shard_241.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0e1bb6009923c8ea817d8741e418ef4" | |
| }, | |
| { | |
| "dataPath": "params_shard_242.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "07067e449fdeb36f43463be2de01d14d" | |
| }, | |
| { | |
| "dataPath": "params_shard_243.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5cbaf75ce71574708cca35b9b131893b" | |
| }, | |
| { | |
| "dataPath": "params_shard_244.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a2660d70bbe50279e8e9cfea1797c4c2" | |
| }, | |
| { | |
| "dataPath": "params_shard_245.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bf32cc4c0111399d02212c676dffbf46" | |
| }, | |
| { | |
| "dataPath": "params_shard_246.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e011127e54e8b3b162aaf2bd0cfa72d4" | |
| }, | |
| { | |
| "dataPath": "params_shard_247.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1c94c23c454a2f428a981d51f227fa0d" | |
| }, | |
| { | |
| "dataPath": "params_shard_248.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.36.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.36.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.36.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.36.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.36.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.37.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "545cc59fb21258c72b343ccb3c073ed9" | |
| }, | |
| { | |
| "dataPath": "params_shard_249.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "32ec24098029839b2f4e47f1ecfd4055" | |
| }, | |
| { | |
| "dataPath": "params_shard_250.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f7793ea955e1961cbfc3ba7a7a9a5df6" | |
| }, | |
| { | |
| "dataPath": "params_shard_251.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d466fd80cab65d2fc236aa844056f29a" | |
| }, | |
| { | |
| "dataPath": "params_shard_252.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e346fe4656160b27644a0a17a134b78d" | |
| }, | |
| { | |
| "dataPath": "params_shard_253.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d7725cdb6b4c98e246d2eb265f164d6a" | |
| }, | |
| { | |
| "dataPath": "params_shard_254.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8f39cb9b5d0f0c0d03300569513dd3f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_255.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "691a8bd2895631838766cb489c90f194" | |
| }, | |
| { | |
| "dataPath": "params_shard_256.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2cd1f7156666c4754c5a0a526909bf4" | |
| }, | |
| { | |
| "dataPath": "params_shard_257.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32030720, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.37.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.37.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.37.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.38.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.38.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.38.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 32014336 | |
| } | |
| ], | |
| "md5sum": "97f7a682b7348ad2197385cc1b7d6150" | |
| }, | |
| { | |
| "dataPath": "params_shard_258.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "25223d5e33d028dce8ba4a1f7337bdc7" | |
| }, | |
| { | |
| "dataPath": "params_shard_259.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ce8beae48fb45d51ab9c90196d9529f5" | |
| }, | |
| { | |
| "dataPath": "params_shard_260.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dc6518177a6cdb857238fc802b4dc7af" | |
| }, | |
| { | |
| "dataPath": "params_shard_261.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a6a739179cc92ef42a73d34c3e96af9d" | |
| }, | |
| { | |
| "dataPath": "params_shard_262.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ead6688c27b2111acbdea0ec58ca5d51" | |
| }, | |
| { | |
| "dataPath": "params_shard_263.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e6ea42b8542654f9c8d8600de9b936a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_264.bin", | |
| "format": "raw-shard", | |
| "nbytes": 90177536, | |
| "records": [ | |
| { | |
| "name": "model.layers.40.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 2752 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 90177536, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dc3354e02a7227a9647cb45455a7777b" | |
| }, | |
| { | |
| "dataPath": "params_shard_265.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30195712, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.38.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 5242880 | |
| }, | |
| { | |
| "name": "model.layers.39.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.39.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "model.layers.39.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.39.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20742144 | |
| }, | |
| { | |
| "name": "model.layers.39.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25985024 | |
| }, | |
| { | |
| "name": "model.layers.40.input_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 30179328 | |
| } | |
| ], | |
| "md5sum": "e58b0fb482d7b158aaec5ac3417f5157" | |
| }, | |
| { | |
| "dataPath": "params_shard_266.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.40.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a1d5619e879b000764a89957b9345154" | |
| }, | |
| { | |
| "dataPath": "params_shard_267.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.40.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e7a3fade972ee897b79d7f0c1315570" | |
| }, | |
| { | |
| "dataPath": "params_shard_268.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.40.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4d03f17cb80f2ad821f3b5dca1ed05ea" | |
| }, | |
| { | |
| "dataPath": "params_shard_269.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.40.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ef900be73173ab17f9c0f29ba3b70bc7" | |
| }, | |
| { | |
| "dataPath": "params_shard_270.bin", | |
| "format": "raw-shard", | |
| "nbytes": 180355072, | |
| "records": [ | |
| { | |
| "name": "model.layers.41.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 44032, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 180355072, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e4d0720f37e20ad197280cb526a5b4d0" | |
| }, | |
| { | |
| "dataPath": "params_shard_271.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.41.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 44032, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df08277e8450479088bf918d288d5c86" | |
| }, | |
| { | |
| "dataPath": "params_shard_272.bin", | |
| "format": "raw-shard", | |
| "nbytes": 41943040, | |
| "records": [ | |
| { | |
| "name": "model.layers.41.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 10240, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 41943040, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ab37a5b7109ac6d4995529a65d652dfe" | |
| }, | |
| { | |
| "dataPath": "params_shard_273.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33554432, | |
| "records": [ | |
| { | |
| "name": "model.layers.41.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 8192, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33554432, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e94406000f1ac53f70d12553e422e8ca" | |
| }, | |
| { | |
| "dataPath": "params_shard_274.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30162944, | |
| "records": [ | |
| { | |
| "name": "model.layers.40.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 688 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.40.post_attention_layernorm.weight", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 11272192 | |
| }, | |
| { | |
| "name": "model.layers.40.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 11288576 | |
| }, | |
| { | |
| "name": "model.layers.40.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 16531456 | |
| }, | |
| { | |
| "name": "model.layers.41.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 10240, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5242880, | |
| "byteOffset": 20725760 | |
| }, | |
| { | |
| "name": "model.layers.41.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4194304, | |
| "byteOffset": 25968640 | |
| } | |
| ], | |
| "md5sum": "e911520af24a8d92158fd4bebee0357d" | |
| } | |
| ] | |
| } |