| { | |
| "metadata": { | |
| "ParamSize": 325, | |
| "ParamBytes": 2149644288.0, | |
| "BitsPerParam": 4.500600961055312 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49250304, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 384, | |
| 32064 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49250304, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "51778898914f5112659c6783e6c8c812" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "046882c84cd2cbd5f05bedab16db7cf7" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 23470080, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 96, | |
| 32064 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6156288, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 6156288 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 6162432 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 18745344 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 20318208 | |
| }, | |
| { | |
| "name": "transformer.h.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23463936 | |
| } | |
| ], | |
| "md5sum": "a318ee36fe27a3e3408f23f2a85d3f81" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ce4b1fe36fd1f02610cbd1e78cce7054" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "8d57c5dd32477d9c4aa96e2a0cfcc05b" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "e258e9077c1dc3a83eda7c411602b0a5" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ffb49c03cdc14afd5a5cbc03c94837d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "414b283d64920106e745cde49bfa4888" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9758958ab1423d51b9afa05f224524f2" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.24.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "c677e252462373f7f41b74f9bb01a525" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.25.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "3d81a487048dbd3c8e5240dd1fa833f8" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7426646b13fe843d89b4675a8114603d" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "906a933e661463134902d323ec902f9a" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "19fb8b42431b0942110119058570dcad" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.26.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "7e2faa5a4d9369723b0ac846056ab4c3" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.27.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "9e3003b86c1c50bc148e4881d08ccf60" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "02a9f7036f086e6d988dc716cffae342" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "aa2cece5bbea0472eb2d4859362b98fa" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "311f752ca200e3eaa6d7477243237a69" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.28.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "49e93c4bdd457cf35069e8456c88b84b" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.29.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "235d00dd943131382d5b644b87f6b371" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8989e095361e8b3da2e4f84589b1f941" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "c8b987883f46e67d23b1899d4e23bd16" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "01c378ef6c03860ebac48f126144f8a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.30.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "62b70fc1520afc5193043653ca419822" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.31.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "76222a5132ba8156eecb93bbf8ba40ff" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8079166d7d7e7e2e93848235373bd1a2" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "59639205b28fc6405fd0fb95d1e14867" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49250304, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.q_weight", | |
| "shape": [ | |
| 32064, | |
| 384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49250304, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e61f1b06e77de108c4a56240a97fbb3b" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22093824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.norm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.embd.q_scale", | |
| "shape": [ | |
| 32064, | |
| 96 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6156288, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 22087680 | |
| } | |
| ], | |
| "md5sum": "cb39bbac8ff3fbd25f095b151fc7d353" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8053ff98adff83cdb53acbbd266fb222" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "e1c292712e5883d409ebf8d8ccfafc64" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8c128ed7c6b1b42cc87788bc3fc7f6c" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "20140e9d4ffa8665cecbf0ed1ed89171" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "c5778b22d0c307954d92db51a781348c" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9ce87cf585d4c6fb8e2a7e3b49289714" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "7ea31f0f3d66bca695addd7789e60c1b" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "555092402acc1e13d8f45466375f0747" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "c1215c69edf03326f3dea4452f7fa761" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "9bc10add79a9f2a324b8e18671cc09bd" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0a897ff9bd8bbe24b8c8a6c0a3ebd8b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "7aaab1972cc0f3d393e96aaab12edd65" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "42604cc962f6377027b06eb726972840" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "183e00f45855a504b51ab3611582ebe5" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "5393801fd565cd3f118c0a74b162b5d6" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0888063bb3e5ed35275876502d983288" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "f48e479383c2f129d4d831532828ec90" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c7751ffa8b059fe61f9ad0bc45177302" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "a609dc5b6dbf8487bb3bee5b7731e7c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "deb03e306c04aec3d568a5eeaa5546ff" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e88c4e1771cfe22c6cc0711147bad793" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "11f09ca4dbaaaade468299c0f840bcd3" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9e64bf8d0b4bff6f81865a6cfefceb9e" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "a1cff643fb5621e707b8ffbe44bd0bf3" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "09a90e1f53fcae525648794510cf1fbc" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "922b073785a159480d5fc79f94dc50c9" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "7b0496a486806f50a0e59c3a1530f941" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3217ce87180b1316ade0c0b0504a7526" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "6cdcce24eb1c8f4f4acfef2bf9df4119" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "b5bdccd591c2210ea01f1a55a49fdd6a" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4591d8d6b372ca9210cd7078cbd12d03" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "744ed8be520511f8d118f51ce553a759" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e5990c1bb2ac54f8c36061fe3e88a1b" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "372a721c75d4bc4270b132ca62a4f668" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26548224, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 21233664 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 25952256 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 26542080 | |
| } | |
| ], | |
| "md5sum": "8817389194f45a402af91bdbcb4ff75d" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "81f5e9b8a5414eca83aa828d9df8b77f" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "779d1a6fae10f8f87a456e09345d85cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f665b53505572c15e4e6fc0fac7360a9" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "6108f7953bc61acc85abad6628e2fb42" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "cb087ffffb00b1755b205047dd403838" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c406e3d3d04e6d0c3a6a2ebd4fe474fe" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "67418d6b1fbf27b1befb89a688a2953b" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8082b14303b93a635fbfe1641bcfe19" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "44781bdf39e208a53b257ce73725da3a" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "886645852ef2590ae080d36982d8d8d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e00ed4d771be5bb0f790c08cf2a92e47" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "3b7d62d668c9e2db47d49e36cfd97d5c" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8270eebb6525355e6b89451d075097b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "3d02821b738f5ecc72223d132ef37fc9" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "691e55b2c31bbb89efc566e608e11a51" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e9d16ab85829a080c4dbc99c69bd4505" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "9dc47daa5be5c62b51b14abffefb9f2a" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 15925248, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| } | |
| ], | |
| "md5sum": "9ed7291a49ebc6209ab1e5ddd7b35ac0" | |
| } | |
| ] | |
| } |