| { | |
| "metadata": { | |
| "ParamSize": 325, | |
| "ParamBytes": 2149644288.0, | |
| "BitsPerParam": 4.500600961055312 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49250304, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 384, | |
| 32064 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49250304, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "15573574494a1c2a922401367f6e7f3b" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49250304, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.q_weight", | |
| "shape": [ | |
| 32064, | |
| 384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49250304, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0811e2336da65b98ffc27b4f7a39c39" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5bb70357e6b750882927e6da7c5a36ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29626368, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 96, | |
| 32064 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6156288, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.embd.q_scale", | |
| "shape": [ | |
| 32064, | |
| 96 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6156288, | |
| "byteOffset": 6156288 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 12312576 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 12318720 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 24901632 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 26474496 | |
| }, | |
| { | |
| "name": "transformer.h.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 29620224 | |
| } | |
| ], | |
| "md5sum": "daeaba161fee0d2587777bc0a0439353" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "ec4cb7aa5757e874fc5d4697b6af1cf4" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0875cfe99fbf3462e115a60626bad3a5" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "0d27b00632b1fd873029bb57a32b524e" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 20643840 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "15ebd758e534ab423e8009f4e310d709" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8c75ab57aa43d8ad8965169136f000f" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "39c27177c60769b8d8932e9b63f1a99c" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "970536f0c4a4f648455085dd416095e5" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "ea7c9f8d68686047f25e1065b0b773b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "2230b5716309774b6faf558626cf2be4" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "304bf71ac1bfd23c37aff008c376c5ea" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "b1f6ac560113bc4f8710d033535ab163" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c87170941f3c7d2e78659e0e1cb7150c" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "7c7ffddce1370de2362b78e21a6591d0" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "4f53a78d3d44704fc76cf9dabbb00818" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "03f34c97555e4068e4357453ff503354" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "adf9b9bbf3beb390b6b4e99276fb5a5f" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1f8ebeb1757f0bc120c3ea3ec8a2d460" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "daba95cdd00ada8acbcc62d90e980524" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "be2252e5d16a20091919c7706ed5bb58" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "365fbbd1d1df787874647f25d4976b20" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "eb430e51053e1179645f896d2c53d68b" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d9495eeb87346f6b6ab0cd302e034607" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "dd9bc7a4591dd45c901cf5d3d31bde16" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "8746eefff81605500a690b8ef2877bea" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1d8e3cf9581ff1784cfe1152510e44bf" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 31463424 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "9319ba01839c3fcedc29740659216c32" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2550beb4502957aad93df81f22dacdd" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "761c825320673a67712af5c57abc883f" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5d95f3d52fd77e93926f6eb6b3772621" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "f7c2de90187c932d792d32342f5193fa" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "630fa88d211032d0993608c98298219d" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e67cb0286bb64f55ba50b09d54df50d1" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "ba58d88ba5ec66a170289ac3f182ad9b" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "289f88d75aef6cc18657b8c402a18234" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "4c43924843a0e2247711d1bb651d304c" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "a58fd333bc8ef158f9b1290b7c857eba" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "418f614d2efb8ef2f3679befae13836c" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "e09006870e844e8aa9d80003c5cc4956" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5d6398415ad3ec220f3362ec28becdef" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "f6690cf5b5bcdf56c3b0c8a4c8493931" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "d1464dd3d1ec5d243d67d16df08a8758" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "41fd3b9996a7b8e62ed889b4337a6b30" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "b1809c6cbefbe78a378454fe90292c8f" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e788d1611072f50a21eb930083dea03b" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "3449ca5272b2b01bedf4a467bd47da96" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "fd6ccccd3d9dafc004e8401cd048e812" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7840919ad8415bfa5974c0160d256a49" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "7390c4aeb09fc03807a7635882017919" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "deb7aee1e739628ba7c9d0e89e87c5c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "48147ad5096b40fd52d906e77d1fb724" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26548224, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 21233664 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 25952256 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 26542080 | |
| } | |
| ], | |
| "md5sum": "8efb76077f593cb38fdbb99a5d791300" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2fabae7f63fb4d0f14899fa4ff8b4d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 31463424 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "55c31627f27d656313eca4eb874a0b07" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b5a949deef4bfae09a6cbacda55f4a75" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "26b55242cfab8e1434b1130d546113d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5417e1f3806f3868059f8e2cacf732c8" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "066e3b3303ae7d6b096af85d9262f987" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.24.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "5fa60e4b10e1a93ad4d7eb5dc4550648" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eab49416f0a8ceda4075c045754b7c4d" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "962f764d49142f6b448b294323664f66" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8709e6e8ca6a8cd1a782e066dd79440c" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.25.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "91fc548eed1f4d8f3806ebdf5d2b1d88" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.26.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "fd68e353c2d0679990e3392f8d06018e" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ce05bcf99093f3c51edfc42ad5140749" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "196c81d6ee6cabbf2d505b3e3cfce54b" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b13dadb3b336e75f9edbc658b0251b8a" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.27.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "5efd36043298d77fbb67aa42228c1612" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.28.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "f3d05381835875dad7fe7c83b7aa4fb4" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4bf4d2cdc342fe9bfbfd86dd604ead64" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "5bf3ee91205a0d4a5bf8d905c3c5b7ff" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d5aa4aaa907026b054b11f60acb7f257" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.29.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "8f057188fe213432cfd8667c5a9857c8" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.h.30.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "b48817fbd545288d46d68eba2bb293c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9d72872b683a5f18a21ba945acd3b303" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22616064, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 17301504 | |
| }, | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 17307648 | |
| }, | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 22026240 | |
| } | |
| ], | |
| "md5sum": "c342abf30d9b7ee00b1bf225aed25d8d" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 16384 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d66b4c8a4d8f017f96957e1d545932e7" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33239040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "transformer.h.31.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 15925248 | |
| }, | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 15931392 | |
| }, | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 256, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 28514304 | |
| }, | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 16384 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 30087168 | |
| }, | |
| { | |
| "name": "transformer.h.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 33232896 | |
| } | |
| ], | |
| "md5sum": "9392f9269cc80331df56625c788da0b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21239808, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 589824, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 384, | |
| 9216 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 14155776, | |
| "byteOffset": 5308416 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 96, | |
| 9216 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1769472, | |
| "byteOffset": 19464192 | |
| }, | |
| { | |
| "name": "transformer.norm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 21233664 | |
| } | |
| ], | |
| "md5sum": "dc8b9cc5fcccb2885b98b22586fffcf5" | |
| } | |
| ] | |
| } |