diff --git "a/tensor-cache.json" "b/tensor-cache.json" new file mode 100644--- /dev/null +++ "b/tensor-cache.json" @@ -0,0 +1,13367 @@ +{ + "metadata": { + "ParamSize": 885, + "ParamBytes": 45448593408.0, + "BitsPerParam": 4.619993414045882 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 622854144, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 622854144, + "byteOffset": 0 + } + ], + "md5sum": "69623354f39f1b1fc3b73004ae2e4af2" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 77856768, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 77856768, + "byteOffset": 0 + } + ], + "md5sum": "1eff3949187cebc031d5c3c9b15dca2b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ae3948436a625600d7f843611ec6fdf8" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2604ad79a768b00715be1c628ceaf52c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "a05d62a377c19d54ec4b70f432c48887" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 622854144, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 622854144, + "byteOffset": 0 + } + ], + "md5sum": "86be08810928ea6eccec1b6ab8ecc41c" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 77856768, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 77856768, + "byteOffset": 0 + } + ], + "md5sum": "7690469695095fd3e675f9b2c53cdfec" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "5177bd70f69922c1879e5c93f31af9e0" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c3817d5d3ce53dca834956372bee1379" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3240116e10fafb3e65d7b013a168a899" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9a1d7d0af85d46eb37ee03b156dd8212" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e9988bfe3d21311b23261ee5096a7b36" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 24645632, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15171584 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15187968 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15208448 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20451328 + } + ], + "md5sum": "e85ff832b441ab74419273cbf814d665" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "840b485439c4eee2d4dd5fab22bd071c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "cfc694518207cb7fb49f2455875341e4" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "66623e910f3af11788f4e7b9b4406f5f" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f04405d4aaf11918edfbd02a3a6ca7e4" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30330880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 15155200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30294016 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 30310400 + } + ], + "md5sum": "fb58e40a475915aa4a161d0227a1f0cd" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "19fa3cf9a7afe66839ee1a85e521cd9c" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "290007dcb3f737183980c37befbb78a5" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "236755d3b68ec7091706f689d6fb1076" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "789ebe8087f50b8be256f0258eb4ccfb" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d7a72373790c220d0b186e9b353b5dbc" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a7de1b99dd1633154051152cb9440758" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29872128, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 9453568 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24592384 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24608768 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24629248 + } + ], + "md5sum": "78cb3aba2d5e643799b4c0586bea95b1" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b33dcfc10b3d4711211fa3d50b8da0ed" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f03992006d585318402cebd3f9bc3d37" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "4b71084ea33ebfc3b3dc27bf275d4c99" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7881ae23370ca2e2fbe5b795540bd71c" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d9494307d1b62b9be2d33e8ae2536690" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9232c04b3588dcc1e6c53d715f523538" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 28856320, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4210688 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4227072 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 19382272 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 19402752 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24645632 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28839936 + } + ], + "md5sum": "cc11769e540239985ecdcb4b6ae8e7ac" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "5e6a9af06b8b43389bf29e7193fb5714" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "dbcc19a48347d81ee477cc4db3930e06" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8875787663642b47bd6b09ea38bf9ec2" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9b0e0bb63a3b8d77fb5f53801a71fad9" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "bd33ccf69370d5ab365d2aa107d5b144" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "c2eb7139b7380425281c9fe2c8699ac8" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7753c76696f9809252ae1c053c03a485" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "aa38744f1299072056426cbd2328a8be" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b7f31d79854d915a30f8d31606c6ff21" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8363e417af1b140e82140ea24d1b6c62" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 30314496, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 15138816 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30277632 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 30294016 + } + ], + "md5sum": "a6d5ddcd78185c874e536b8037086283" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a6612af08c818a1a6cddfbe350309c65" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "957e191a018c636bc552d03f8aeb507f" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bb0f546b701d3596d60756cf5ce8a62a" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7028bf4a5c17d6543b37408f1055596f" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "683cfd1929bfaf339a38a06a0644da72" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f46b94af4e84cf7d67f4a6a5fe85975c" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29872128, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 9453568 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24592384 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24608768 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24629248 + } + ], + "md5sum": "fae0c786423399124182673085584d26" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c90ca3c6a33c96d59edea6c2591ac51f" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "65415c71e917cbdaab5add6a08e9fe5c" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "33af702c16a5877f94c4cd2d2df2ef2f" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7858733cc3195d95a33c20a7da7079c4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7727e019a4b957da91ccf162bf859d72" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c4d7100080760e8e146c14137b49befc" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 28839936, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4210688 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4227072 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4247552 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9490432 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 13684736 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28823552 + } + ], + "md5sum": "c40ffbbb6a1c2e6cee830cfc06061648" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "20b7f3d4d4f91a26db257f5d9f847bf1" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "bce194c4ccf7b86b1c918bb1d0736b80" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "461d98129c181004bcfb9cd1befad5b0" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "550470917926b411752630f2917fe599" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e44432b7bdf7d0e0d82a17ea09da2b4b" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0d7b648659dda3b4b58e394462548c84" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a3c390f428cd05596a934e501081f1c9" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2e9717b40ce879fe134d7d9dbbd22c68" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "22ec41547de315c561db1794f8c6f2e4" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "aaace40f8389d53b99f7355dd92d92d8" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d27aff2e3832178d4748c7d0754cdf41" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "0428dba75e5f04f48a618d397ae3a203" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9d4543d05f63900263d679586194ca00" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1751a2391398d7cd311937020d1b9fbb" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dc3effc6ef6abaa9b55b3f2ef507a20d" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7ed2b4aa60056db33dd84de65fdec731" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "0af5521109eb84f1f5b13e9b0c9ead86" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "803dc010d22bcfb368b0561ab32989b6" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "42c46936b92629bd9e780cd2ec8ceac5" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "de7523646d1d36fd3ee6f6db898aa92f" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "ae528746662fa5c9770e106d09e4979a" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "816c63d01ffd3f3fbc8fd09d1cd5943d" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "82a391961adb167b54e1e2ab76f486e8" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "6233888fd1a1e179fcfc30ff78ddf429" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ffd8f348a340b8ebd94232cced61385d" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "896c4490b54db31564ce392166453173" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e6280db84db208c73a7615e423e559eb" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "45faf801872b2b8d99253cfd423939c7" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3acad7152ae34851097475aac4cc7e0e" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "c113e0db4d4e8ac6da6e87be1f983381" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c97fd97953d6401de1bf72b230bb08d2" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6b609f7ef21d72423329f53cd6af2037" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "bf4f5a4462d408df434d8a125804ac04" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "126023cb2628b4e7eb04f01fd41616c4" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "466ba00517d4b088818a3d94da8b13f1" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "f196eccb807e95899fc5ad27dca9b77f" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "3b23ca201c6f0262489b434115b0ffe4" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "59d2017d554470660470c4daecd694ec" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9242ad2f32c100bde2bb5eadfdc26273" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e1dc8090442c1cd324c4dc33764acba1" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "fb2c64e784fe1ee284d5aafa68378a2f" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "f42c8686b7412df541495a18cd4dd3ad" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "96cefe3c0a706281013198e42c2be972" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "26ffd395d44e7b7302fe27c740ae1681" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "209292a1e4bb55548ec30a209edc9f0c" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7254bb8e0825f6c4bb3700f0698e9ddc" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9b9453c8b483c2ba49f77fd2a7713e57" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "665c22aa5f01e91714938722763e7744" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "4c39a939b7705b78dc7e64989c6e0141" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3d0c18952e4ba9b67e7d17cc01dcb80c" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2374b250188f005bb20ea96dfcb945e7" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c75489c1c3a6db02b8239e890ec4ca24" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "523fdd761e74730ec970f610323d9c8a" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "6d8e8993b6560ca0dc808e5d545549ea" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e9aa89238e0c2f31d4dce191ce5ca22f" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "cbfa18a61046809f233cbf0bae4c7f19" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "da9054f2d9281768135073520887b761" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ffe745e31bdaa315ce5021bb1bdeae42" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ad64969a39552f6fa0b5828699247614" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "64fd008ebbe4f9d905423a21566ee6c7" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "17c6954312dd21511df535a6844ca083" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5ba0a9b6a1da5338a2e901869c6257cf" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "e50c62faaedb5bf88f8ecffb3c1e2c1f" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0be5160989ec1c4a3b3d0546d66c320e" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "63e4efacfa7b062df89d46ab3243d7d6" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "cea4996c10ae3cc38dbfd89ea256be1c" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7dcd2d51061a5027dc3e1f0ddb93135e" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2bbf16e66f296969fd6cedd5bc80185d" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "45bb784763b268e151e83da4bc0fd032" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9b49452afbdabfde3d95f7711d3089c4" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8ab752549504e349b985b8dac510913a" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0a8776da18b30602d4acd6a71991b911" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c96343d8b3caf0ae09768dadfb7ed199" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f51bcdfcd75cfc463cb2d3b4669b8cf0" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "6b8785004d12cf7e8b395e0efcbe0d48" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "497716051a0260852fc0fdfb9f4e66c6" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3e43895a44ba90edcec11d2d80da8170" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "39554a10de8eac0627333acc92285e48" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e0cf397ff3d2f94a8a57f0713ffed4e1" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d69a7d9837b5b6d82de6c5c0710d1792" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ea5b76311b3b61195ab00b0d0888cddb" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3823ab9e064e163e86511f5bf1b03cec" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c9bab509baef5a3d976620d69a0f734f" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "e63db724cb31c30c9411936078dfb2e1" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "598c68d81c5b1a64342bffb3fd21d81a" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "70d70c05919745292940892abd0587c3" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "313be29707f344cfae22e0ec4ebb1082" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "35bccee6acaef170066c6c25fe36bf4c" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "270f3814abe600fff93b45c70192afee" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "91e9e20d2ec9c1ab737af5f536ed2f20" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f3d1181063fce8beb50a04f5abe6b223" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6bca0117ae4f94c18e069a42a97f85f4" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a48421d623a9e180fdbe6c9b06a38744" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d1e6ee257f471cc2ecea28bc21d98e56" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9f64186444828de5ab62aaac0a59df54" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "0e670b9003a9fa034a525f585fd09469" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "181f6901f5ce8974f3496e35370961e0" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0966a504a65a0d987eb7285217afdd89" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "057790d454f285975ca7de59f333c2e4" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "16f959ce68eacc7ef67a0275ddcdfdb2" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f92799be5cf8110884909a58f6b0b542" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "124631958c84b0cf5321010690b0b660" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "6f39a09fa2bc391348c11e285c516410" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9069b7c9d344f8a908c3fddbd7931f2d" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "01ba42d58853406e16177511085ae4bc" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "3cf91f259846aba6d64f4c5b768a1130" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "40bc2053fd0c2161b0944ece22441e55" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ff21070ed212ae20ef8b9e3172187e89" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2c7be3c79976c795b34a6dfb82de18e0" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 28823552, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 19349504 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 19369984 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24612864 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28807168 + } + ], + "md5sum": "004727cc386328b87a355d5f1e807612" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "be9f2fdd52f609c48dc8b45759663616" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d1bb30f1073b1750b1f3f0c2b003d980" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9c958d834508f5e843772734a04ad8b0" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "013eed42d22cb29513fdca0620a7a527" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "177f7cf72e6c8043f1f68ff9044ddad5" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2e13888864d811418bd16737ac5bd4fc" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "f49f6fbdb742405817e0026eefe7f1c2" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d5d07df5e24e67e273f5b60059472abf" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "647592a8a202213357b2266283fc445b" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8297d7766f18826b24355468dbcf4241" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e016631be962226b7470aa353c4e7b21" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "5064784cb5ed816d6a63c5db142757a8" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "1b13f08bafe344691f4a5af2608bb82a" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d05f654e0745eb748b885ae02cf792e0" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d176f85ebb8be6bd4a40b2b66a92cc52" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "966231433fc862ede85ac7511901dbf9" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2429a3fd03b64c93f448629a06a3d14f" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "bc392b72c4d13f944a8cd3c64080ac97" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2a47ca7d9f53ad4a30deddd43a5aee0e" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "58a81cacad7e870993bf9c283e7ee874" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "f70e771bb4bde3f1a400a2bee17f12fa" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8a6c2e2ff93a37912ae0ea39696cc8b3" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "346bab921753a13e021b577b5a48782b" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "996180612cb612745e44d2c9c6847a44" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d72ac5aa5da7d9f9050d7ef6ec065147" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f6c4fd38573baa56cab7b562bef945f0" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1a5ad946b79294c531c067fc72114505" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "19d6cd9caa86b3b0fb8190060606283c" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c07fed218d53e20dea04c0d7cdf9e897" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f4cda11ca0d0c949132da47e441a5649" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9377b86686ef72d1f5db74a6beee5083" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3747500e488a688976adbfc74f6c1666" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "491f5ebc8eaf6f934dae641c23aa10c4" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "965ac6d9477d19320a70b442dbb7d0c2" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "bdb4910b6fbd9663acd2d7f07650ad39" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "7e3412a93cac4def639dbb6082f6b8e5" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "814bdfca2a9f04639a1099f11dab885f" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d0431c6094aea33c21c23f6f67abb06f" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9726fe82ffe1fe5c8d19323e0a4a7b91" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e338641efb324b9ce4694ea9993e266f" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "07f94cb416a7927f11c34697f63145fb" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "519ffc0d0fad9aac9be2ed10098f8a33" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e6efb5a44f45242cef67ca18d465f9ea" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e1a4d473a51f48dbda6c9245adffc788" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "94c96e5142ce335340797cb752fa5cf7" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b1f69aea8f9028163236fef920e32df0" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3c675a435d18fe85d71663c27a3315fd" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "8394c37272bc67f892f12f7a6247be37" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d0a881e679bfaa6b1b1223dccca228e3" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "bfb67e1ac8257d2a1176075e756a9625" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d60fa9e7841b2930fe65ebab1024ed8c" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b5915a030bdbff8607c500569b007190" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3890e1fe2d18523428f9cde24a88bd4c" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "90afa14248edb1a1ca6c7bf5d5a55b90" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6ccd72910617128f0822c4df349eb750" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6963e6d52aa89b859e0b38e12539bc65" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7f646335bcc86d1c76e784076331f738" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8da5a2bd8c94c7d10d1b82f88086509c" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "31e6387cb9eda7ebba9bfd87c663cbbc" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "64e3eaf51efe66bd6c93742bf896a22e" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "6c031982a2575b00e50400d91ec30f5f" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "95f297c928d9f0abb9042bbe5acc1672" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f94f4957894c83a36a52aa7e9aed0a5a" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "609813ec9407a990470da5e4dd7b4191" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1fa4192705c9ba12f277a88ff3cfea1d" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "56bc387718639a8b78338b4e58a95490" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "085faa5ea146e8707376a70d0497305d" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ad2e2c85fedc76b5eba8a9502af6f00f" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "60b21a75e04afc6e60229f147b46923a" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3745640a3eac0eda2bdbac60c564cc9b" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "86e9eb38a2e616c2dace9d1720342b45" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "98a79c806c4d31659f2c02468a4cdf18" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f251c76c6fb18ef60db52a0b12be530a" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "10ba6b03d439ee093f2534e3db6ca147" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "85dbe359777deee887dc779e03e521f8" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "79da2dfc18a01ee599ff73e11ca48b1a" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ebf58c6ed20cec2fbfa521ac0af7bded" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "e4396ad635f6379a40af7cecaabcd551" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9fd8298dcd75720266ce767d93948ff5" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "556f12df1bbd5be7723bf7bd48456cef" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2182f1924d08d82bf17ac3ec7b74ff14" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "575a661aae7857f24ca66b9ee1ae797d" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "042543ba3639c6d4c7c3204e59e01e22" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "78d3a837c8d4f6ba71f41112ee929c57" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dbc8324a8623fdb7e9c1f48bb0994c73" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "50fc09e6cb13f22b487ff1f3b435832f" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "ae0833745cb560389fa06406965b18cf" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4fd1ebb0f0a8842e1b201e2c371667da" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f90ac6db698571089ef543dbcd1f00e9" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "223e1b46f76dd722f1d39ffefac864a1" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9375e5ce322c5c42af42d6ae8ad34f99" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "96c45d0ddc125fb8d2f509b9dac7c85a" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e456a698f739ad15e473ea81d87dc935" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4ac603ba858c46870e76e8ac7e9b387b" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "645023d15fb16cb082cd8e7118059bc9" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "088a2ebce95f296df248c77228261446" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "20463ed1b5335509459032611d019dd5" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e9e4991f58fe4bb4bccd209f3c9d8dc6" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "979cd80f68b9a7329299e1d91976d263" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d5b90cd620ae731ae642af363576612e" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0a6e7d32060299848eb2a9dbec77a4a7" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "994eeb4ea96959cc29883786ddc2a239" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "74820f6de2bc0f1b6512379ae681fefd" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8da9670b46ba81130fb63ee7e9967be2" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "51282d359ec3550bbb9715a0cee70ae4" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "87475d15847afbe4d6554f8395c24df9" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d398ef23cee00882e024679f6650d42f" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "20ecc773f3215ca6bfcd5dd6288bb6f3" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6fdd3ae9120a5585d1058f6c57206d21" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "87e2d9202e6bc31b302c52ea357ffbf0" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e85b26d52bf75b1bd8379c52c8fcfab8" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c2b74caa5a71e2e438170ed96c944cb4" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "da6cefaed28937bdb0a55b9a982ddb2a" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "08832fe53cff593dbe2857c60896fa8c" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "28982eaa0ea4ec003418940800539a0d" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0fe5b919e69e7b4a52897db16472c7a6" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "33dbf930a9c82072f878a29c86726a78" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0339b65736ac2f924685feae893d1068" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "48da6044e09af7b234f64607216769ca" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "7aba992aa3d3d66a4e5bdd955b8902be" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b7dc2367c52557c31566b1d9d0541219" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "96ca3160305793859bfba28e4caa30d9" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "64b4c80746b6df3df2e86f2bf0ecf9dc" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3185b81f3775445af0f99bba81379c1f" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "fdd3b9dcdea93b853f649301bed27686" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f5a2563b2c49a0a77ce0532bc99c619b" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6138779f1c94995bd60011803cf08c11" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a6f90fddbd35e5e7c01a7adde0d22c2c" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "26fd7dcee4721b023e060948560bd1e7" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "5b203bfb08c68275896a577c0479d2b2" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c8d5bfcec784c6b4a9b51de6400e9563" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "efafa57a8802794cf8d0f2920b248a57" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7a2458f4366c3dde4cfe52f89efda353" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "12daf0f1ca2bec712842ff4ff64c8db7" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6c0a7e047973f3a949509694570af582" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5114b3cfb7b813b3767957c67c258e4b" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b8bcc298839506c6c3807b771c6ef697" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e67f678f28a956b7bc9af9c2e70b0f22" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "dc4f9bf0bbdf52d8fd5a7e4e108ed9af" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "13fba0ce5128c03172b3ffb7f5892718" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "01a1d2270f087220e732b81f7f9598c5" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "225343a6aeccce1db3bb0fc78333afb9" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "6c6dff8d9767e97c99579910d655ebf4" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1778c322f6a95b3bf337b19f118dfd91" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2860ac2798f005a0e241dcf8a8a4c49b" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "b2df0b5cad684baabcffe9a1a63d3590" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "eafa9ea8334a245f9347a1d9005363ae" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0098aa67dc082b9cf1ccf20394aef6d9" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "db95e1f3c78b6dbbfd2d08c6fdf25eba" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d71f03d9ccc44fe9980f5f1f2d6238be" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9bafc1a3aacc46fe3c49f853091f9442" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8584a8d94641ec4366f166281281451e" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "79b255a094aa44569a7b2755321eced3" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "124df672c056f8eda2b81dbe191c2142" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "c9f7fcd7f50fecac407524827ccbe7a6" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d31b39f0a5036ef3a398b72f19c62b3f" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "bfa9154c4e86ae182b8bd9bb6555da25" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "e317064e16f2ded3ce62a9d1667149f0" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f0d02968561ac461efd3d24d6e0162c5" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5869068f582204230f4ff6b3e90bfdf9" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "934d63f92869e2384cd13f841c28dae9" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "04c3c7a697209799e62292b1fc627cd4" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "5bf58ebe2884457d3888db91d4a4f08f" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "1aa9aea243ffd23ff80e9d0e5abd59f0" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bb418b0e14148ff73024cd5df73bda01" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "4c4a62f4e81fa7036d5d0e6bc0756d62" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "191504acf30dd5de1dcdd14a905b97a1" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1510ace2975d58be052b978fa4290241" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b390f3e339d2744ff0e41f197ddc7888" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "cfab7563846bb74527cfcabc015d1aa7" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "592b95e9a67b0d2a720fc95ebb189487" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "515771b478d096e071853f0b4b30c3d4" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dfd44d5b2f344acab323435b09ccd58c" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "01b255d08fb549fda099f7c42400b373" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a2f359056d6be32fef5c27b2a58703db" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "3bad04cc24fd5b109217ea0a62be8a85" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a18b8d10f65fa77db9f8725e83f316af" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "04cda4c47b92b412fa3121443c82b91c" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e27dd053975964aed41cb2d188ee53b4" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "568f518ec2811cbbec9531a73b02fd94" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e181d19f4dfe727a09cb6aacb8f647d9" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6acb838708bef07f758605f7a1e9303b" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "80a27a28d1928a677f1cfb72060a4dcd" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "42bb7e78ba3c29afc70fb0e4b1bca422" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e81237b99a54eafa8a12c419ca84bb1e" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8e5062103493b9cf350ad526b2fe2d7b" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "49bd281d000f7a2ac966047c2963407d" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "fd52535d6c864eb909d164e493e48488" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8a46729643d3af3110b1b46c173de461" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "386899b2a15c289ec58d438a5f55423b" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "14da2021f502f125ad296a7697185492" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "afeecaa99fd9dbce5db29f3e55f989ee" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "60a927269c4a18bb3a78cdf57f444066" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "16eca2cec4e7d4f62fb66d17920b2e6b" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c6a27961e4f0d6b20b1ca7458db2f73d" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c844f286524e2e461be8094b8fb4cb25" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "8af477b35fdf1f58eee4af94d1369e8f" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "05da42d55b3522f112372f566abdc618" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4b109a2995eaa1e8b43db48050834558" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "5f10677474fc89dccd0dd566c9a0729d" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e8952ef0336c8ecf48e0a109c6bf5770" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "deae6ee0177e32d683f517defa0450b4" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "981fb861b87e1f242303d0cb8ff06508" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7fc59a406ad777af8bd094f70f3d12f1" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bf4e7ca7ab89dcad5f092bc2cd483f74" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "df0e81571ebf0f8191669e502d763624" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e6019cbfef0ce5041472d16283c0f4cb" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a56f3420b4f0861d8b793ebad6a42bcc" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "8856df7b9062b2122569d04d17fc1b05" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "703844cc3889ec995097ea2f930e1553" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "fdb81c674a1e5d2c13ae312001dcf9a9" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "f4ea5fcce63f50d7d204756ab5cc6ccd" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "002a3450981d71bd51de1d8e3fa49a98" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2637c93b83b9fafb326a0a7bfed082e8" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9e80bc67e433db1ffbc4695fa0f68228" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ff0f2d60dd6cf65e27a2f39113e91255" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8e2added901975d6b2408621ea8bae28" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.64.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.64.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "e662375d17432880f7b5257ae1c705bd" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c855d6f8407308744ed7147e53338b30" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "05f0b184a46f64bd0820a78fa1ba4dbb" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "05895f449017391a82176faeb0661113" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "35202135b460c8f93b595602e11d1659" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "736eec54728115bff47097ace23e1920" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.65.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.65.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "de89ca3b61c0bb816ded6e9c61f0643c" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "26fc696eb7fbb9b559a0fea6275de9be" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "a37d144cd91864bc53421c1a109f7d35" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "63bfd3f434129ef21f2de74e843f5de2" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a08ea95369284e91d4708833129f650d" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7f795869c3eb8be208e2a46335291a89" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.66.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.66.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "10157d1b46e489a831fa66031ecfc3c1" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ba6ae882a7b1ec6747b4b0ced7d8f907" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "13c5f85618cc4407da1b14fd6653b138" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "db7c62b5afc64a40efabc0e3409847e3" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "985a62061328d3e7de03692cc80d06ab" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2050dc8c2673130097071d5ad141171e" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "64b0660863fcc99921ebcf6714311c0f" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.67.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.67.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.68.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.68.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "0e99b738beb0106cdd1fd075cafca5c0" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "66886225f09e0219d69ee1e0c1d3698d" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b5ce842903a77284da60f55cecb927b5" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "867aa78ae7dc273abe1e187847485545" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8df2408499975a96e85801f47d0962b8" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "6845692e76bed2a3f3dd9c51089678c3" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a32c15f1e06e8a94a3df858a9c76b2aa" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9eeb41e2bf72812b371a342da5a8767a" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f812e92e6f128925432edf3466f5a52c" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3416cd1e1e565f93059aa5858494cea1" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "50c0bd3ec961a5364f3b30af07553e21" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "777fedea7cdaef877347caf9ea0c5d17" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8f449bd40b4909d5a0b45c208d31f94b" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "afb5d536d98e637ce09770644e951829" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.69.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.69.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.70.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.70.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "53d19d12e31fc90ea622ee1e5d7ef568" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c0569dadd700d0d5c23a8952487f8c37" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "78929f9802b96894abe332462c101abc" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "35bb8f08df58b36a86d4223dd2a09a9e" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7e3926c1dd9fc9d1ae640d326c83f1bc" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2586f79b78f8750f399e04deee7ecee1" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8a4afcb8ee093e0f531e678ee5e4d0b7" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fcc414164d008a20f9a843f8ca7914aa" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "53f8bd67bcf5ff30da2f160766af4ca4" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0e8b2859c1d4695c49dc1fceca2c2a40" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "59177ff92c33f3928edfbbf7ab0a106a" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "38ca36a3d0f94be029ca6174abb5cd91" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "ba744fa37039f049605b153058134249" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "501972c5635364d074814db2dd0862a0" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "634d9538ce97e8a60e744bdff03eaf7a" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "a55bfcd929b8d2d59c7c4612f6a0f721" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "78568195355eb285572024b6529d32c3" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1a5c7cd99f996fdb1d0cf0d8fc1f5315" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "23420f8a4711ff4f0abe0d7ea442b346" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9078a57850bc4ce34e59295f6f94559d" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6201423f81f666ae02dbe939346af362" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "dee17fd1059d10a2f493e018e4eb9093" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2a3f998f0ad1806bc77b421a5d20706a" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "359c66b22f1d75c6d34864ab6c8d26d2" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.71.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.71.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.72.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.72.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "c5b1faddbb90c1527ed5a9d575bf141e" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "447efcdff4d958e5a6f91fce9cf4a575" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6eccd428c79795cabb83b6b61be16a31" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "b721c13cbd27f686ba510360d4c51b9b" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d2cfca21beec6e1210828393cea5c83e" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fe71a005cf9b3bb137aeb496f46d338c" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5c907574c1e608d2b737fee5dd1d3e60" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3e815f435e59a19b2e1618a60192ff80" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f1dbcb94ffedae91ceec5b9c3f1a6313" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.73.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.73.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "972f0d2daa88837d0c464b7cad7f8234" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "0e6b2f62d16d0cc2d2d05fae45ec3ea4" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "675f9fda9b4b3871a2c5c3ad14e84456" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3b3c8e1686655633110eaa6a78c9c411" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "024289610c61bc23b15fbc0c2f6dd5e9" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "5605f345957c222dc4704ab5c6efc724" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.74.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.74.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "caf17a86fcc680146a57b2b3cb7e3909" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "4d08a510ef6839a5f987a2a78789ba86" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b981cdb56ae169046a84508971c39805" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cfd4bf30483f9fab82aae3c46edf4d2a" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f24c1a1d5dfe3d899f714fab63c376dc" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "28771f655018fd09106f86595c013b4b" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.75.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.75.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "65dbbafbbce046750d3e0106860e8a4a" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e9afbc408e704f3d5d561a6875451ea9" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8b6717f00c8e50c06cc41096929569a2" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "da6501b3f00cf2d7c748051840fbda1b" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aaa9b78f595d037dda86bd2030a30dd8" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cf850c012ab0b89f0e0dc52226aaf7a8" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cb7aebb0d036c9f73a160df5f71705b9" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.76.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.76.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.77.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.77.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "6f9e89134c8ab60f947edaf534c090a3" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6b64b0f56f02307349ff9a0f4fe891e1" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "62d260faa6ae57e40f02a2a905b52351" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "95eccbf08b80145101a37407b5fcb3aa" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "44c6f9b3fa76867569ef679d5a5aab39" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "46af5499164af67d987b0a3499c1c7b2" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ee3766cba64b9ba83e59250120dc9ca0" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "37f98d0d2c861c60bafa79f708c9fa68" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ca2bfb62639b56966b5f41ee880279de" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7fec0f7f938b368687a4604ebc03f946" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f502aa052d14efbb23941dc9b910987e" + }, + { + "dataPath": "params_shard_480.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f052293ddaff03aeb422db0a4accf56a" + }, + { + "dataPath": "params_shard_481.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.78.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.78.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.79.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.79.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "570146d7d4ecbbc089d6d19feee3a900" + }, + { + "dataPath": "params_shard_482.bin", + "format": "raw-shard", + "nbytes": 4194304, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + } + ], + "md5sum": "ffbde232efd82dae300c4b34ce097781" + } + ] +} \ No newline at end of file