| { | |
| "metadata": { | |
| "ParamSize": 405, | |
| "ParamBytes": 7322019840.0, | |
| "BitsPerParam": 4.500366420537488 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 81920000, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 32000, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 81920000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3a638588f65b2e0cb5ddcf7494b644e1" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aa5f7e9f6fc68b65fa58f6831ebd405c" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "62f5a5743622985d6661a730d81ca626" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7d2f2983f7071cc3e04cea8e37ab80ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f7bb7f1df92194ce72168d83854a8769" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32890880, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 32000, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240000, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 10240000 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 10250240 | |
| }, | |
| { | |
| "name": "model.layers.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14673920 | |
| }, | |
| { | |
| "name": "model.layers.31.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14684160 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14694400 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19118080 | |
| }, | |
| { | |
| "name": "model.layers.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 27965440 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 27975680 | |
| } | |
| ], | |
| "md5sum": "5cd5e574f2f8abf36631f78b3384d080" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ee3beb21d39861029f7ad87ddcd3e3a5" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a17e274ff1139e4d962575fbf769e358" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "955967a30c49ed5841c140c60f3206ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.layers.32.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.layers.32.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.32.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19179520 | |
| }, | |
| { | |
| "name": "model.layers.32.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "e3477615d072aa128e64464f85665295" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ba7103206378b90b26595eeb4fc4396a" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0c4c06f756c22e8f6afb4b509653bdcb" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0db22577292701b1e924a74c0e0e813c" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.layers.33.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.layers.33.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.33.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19179520 | |
| }, | |
| { | |
| "name": "model.layers.33.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "ca5ef1cce14c560cd56f23fed81182c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c845a183c25afb2dfbe8435bd42575ba" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "87a6193dbfec2018462caee3b13fac17" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8c30b83165c3cea85b25823d2918432d" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.layers.34.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.layers.34.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.34.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19179520 | |
| }, | |
| { | |
| "name": "model.layers.34.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "7a4ace7cc201e746580fe16e86f29bac" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "424a950adcc06c05af52eeff0012e185" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c62ef6006a35e72b3b7edc2e73a8991b" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1892e154464562fb0e7d52cf0d8ef154" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.layers.35.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.layers.35.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.35.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19179520 | |
| }, | |
| { | |
| "name": "model.layers.35.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "7d4835958d093c9803c632cc3f43a911" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6cc1e164e034f3343458c0e20cad2a66" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "185b3d6aa02159e397e30f606899f138" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3724f9099caf39d8ebf8eedf0e1179bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.layers.36.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.layers.36.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.36.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19179520 | |
| }, | |
| { | |
| "name": "model.layers.36.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.36.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "b8ba1b647509fa3210f7e736fc41da9b" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "35fad37e180fb42c3fb984de2c0ddf1a" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fe67b52989af159f60fce6ab32529165" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "efecda65dbcd2ee1bec320d38f61005a" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.36.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.36.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.layers.37.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.layers.37.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.37.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19179520 | |
| }, | |
| { | |
| "name": "model.layers.37.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.37.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "20557b2de9a101acb57b7a55902ac6c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d2a70be96dea35f61d6fd488efc82405" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "306b8b7bd13508dfdad6f1323f224c0c" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "922fc2e9d9b5fc04f5819ba5b874a946" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.37.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.37.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.layers.38.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.layers.38.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.38.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19179520 | |
| }, | |
| { | |
| "name": "model.layers.38.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.38.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "24b7fb27ef8f841ddcd533ab326eb22b" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ef11cabea73361698006f65270cc11fd" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e55a1f0402a5621bdb296dea0769633c" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eca464a663789f5122b5933b0b4015c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.38.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.38.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.layers.39.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.layers.39.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.39.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 19179520 | |
| }, | |
| { | |
| "name": "model.layers.39.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.39.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "c661a2bda76a8ba84ab4a94e1bd36374" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 81920000, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 32000, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 81920000, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e485cb09d088c9653606d4694bad4e50" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ad5bd462ae4b85cfc3efd38ac4c7daaf" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3d4dd7b27b487f2a0d58f29f67ad0cde" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29429760, | |
| "records": [ | |
| { | |
| "name": "model.layers.39.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.39.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 32000, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240000, | |
| "byteOffset": 14755840 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 24995840 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 25006080 | |
| } | |
| ], | |
| "md5sum": "e598b55ab7fc3e5e551d0cc6a1ca37ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6197790e144040b24e8dc18a0825526d" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1fa98afbb4dabf8a226bcd004b6006b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7f62a5b8d7adce9a3086e0525754ee80" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "894950023d3c0086cedbdf2b17493d9d" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3c6b7616befd047577b5e404d269104b" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "db163bf7766718ec6b9d108fad9fac66" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7fb15fecd1f2e0a6dbe7ccb8fc3174a9" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "70cf8387bbf01cf2255f928994216b9f" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d1c45f01b7ee09a9f5f2c4dfbcf1f07c" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fc962c6b585dd80e45ea8cf7b1b473b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3da02548926f86eb6fac0fec82924b93" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "18bb53c6219bd2c8cb714ffe8315af9a" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "25ae2474e1ed5286d2722bdc71d9a193" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8fb652e3b04efb8a512158296bd40114" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "04c42b8f05f4bd9bf4d45e3e7800a5ba" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "d9987ebd240316864831ed796bf91ae2" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5c1e6422909f7e778d028495b33851ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3aca3dcf01d7505247f2166ff47bed33" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "88fec8dd93f92f3db91bfdd8adc37855" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "f5fe417e1921259ab19655e7b98f1fd3" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d40777c87b3c9d5a277f79cb49fd50e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9d7fe59c76a6f2dc96848bc2fd270841" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a3c551bcc3cf96bf60b9adfcba27d13d" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "abc07988f221bb4326eab445fe6fb611" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1886e2ebef2c80db7a380195f8c69a3a" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e12d9a78534f5e77c8273e177a7c727b" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "53e42ab6aeae032df6b8155be25f0cae" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33443840, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 33433600 | |
| } | |
| ], | |
| "md5sum": "1c488252ab909bdfc63cb70cdf811980" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d32e5bd7d7fa460e6e8dc79d605a4606" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9261b1b1771af309e4ec57426920d252" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "abf156c140170daf4d77d11e5bdf8f83" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 18196480 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 31303680 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32942080 | |
| } | |
| ], | |
| "md5sum": "e82d5bab595caa8561ece0cfc15c534c" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "540db4381342a7488860297e861fe97b" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "853b9dceeddde2b5218a4baa9abd1ff5" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "51d569c430671b480db860cdb408873b" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 18196480 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 31303680 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32942080 | |
| } | |
| ], | |
| "md5sum": "4bfc5a380aeb9da8307c9aedc1022723" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d080925001246d3e726094efe849febd" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9561e1b1950ce20e219b466551f6b17a" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "da9d264963605c307a0d33ac82a92141" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 18196480 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 31303680 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32942080 | |
| } | |
| ], | |
| "md5sum": "8022d753c913e4b4e6307ae1083fa479" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "79fb98a7eb4156fc67c9339eeb7a2f38" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "42c10b441749be85c0d2d41199dfca13" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4430e655c931605d82815d5321d0b652" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 18196480 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 31303680 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32942080 | |
| } | |
| ], | |
| "md5sum": "732a7a6463af0fb6866e24f4bef539b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df31ec3aa4037eb694da3a5b42415e4f" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4fec6d6943a00f62166d0f15b72ee5b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4452ee224b311c4dd1ff001bd98adf0e" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 18196480 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 31303680 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32942080 | |
| } | |
| ], | |
| "md5sum": "bca0b3919ee6df0b9a1b27a1584ff51d" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1a2493e72cbb1ca3c893fed25308cc05" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0f19d1f3abc79c0b32bad6c83550924a" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "75ab4aa347fc737a6d337b49fef45305" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 18196480 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 31303680 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32942080 | |
| } | |
| ], | |
| "md5sum": "25731185c756658137af2ba74ccfefb3" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb8652c73776cc4f69e34a877ff32f0b" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f40de4e892a03358c88011060a8a7234" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a8566129e65d7d890236e08066c0ba1e" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 18196480 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 31303680 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32942080 | |
| } | |
| ], | |
| "md5sum": "3fe041e0a67ba7246b628352e1bee397" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a5f66cbf386fb4b6a854ade9662e21c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d285bd8a840134311a66352dbd9f9a59" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9607b7bf8776c5ef86a538a8176327b9" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 18196480 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 31303680 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32942080 | |
| } | |
| ], | |
| "md5sum": "fbef371fddc63ea8dafc2b55ca10584a" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f26b4f3981aa5d08b576009814ab3841" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "30ad8347351c1d87ba8673426f2ce024" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a4604989eb9d8347f9561d7b5f8b5bcc" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32460800, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 4423680 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 13271040 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13281280 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26388480 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28026880 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28037120 | |
| } | |
| ], | |
| "md5sum": "33e612f6fceff8f256ae0568b324c771" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1e92cb5833aad1ef5c9f28fb114b7527" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "101824ad8957c21fc0063e5c4bfaa723" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d75a2da5a7013e803ae13535cda5f8f7" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "3c558c3705abd79968f6da839046e6df" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d8b0b2b888b57c26f232be7732caad26" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b7a974297093f530feb160330bf01fa7" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d7d63a91bf1493c5f17427c037254251" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "a177b97787847e4de56033a7f7fa7783" | |
| }, | |
| { | |
| "dataPath": "params_shard_114.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f035dd4fdd0a596b520ba45de69b5657" | |
| }, | |
| { | |
| "dataPath": "params_shard_115.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e77ba8b34c70f7a6b452a14efc12ccd6" | |
| }, | |
| { | |
| "dataPath": "params_shard_116.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c3f5c154f63938ecd90a4e66e069a02f" | |
| }, | |
| { | |
| "dataPath": "params_shard_117.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "29e42b9e6a502d7412b1fd317675e0e4" | |
| }, | |
| { | |
| "dataPath": "params_shard_118.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "903d203f2f7e9279616a47c199b49778" | |
| }, | |
| { | |
| "dataPath": "params_shard_119.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eafd5f199be9c3c7d4000fdf2ec8ce91" | |
| }, | |
| { | |
| "dataPath": "params_shard_120.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b6a969076811180539643e97632b1cc9" | |
| }, | |
| { | |
| "dataPath": "params_shard_121.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "6e5850c2598a0106d018ef8bcbeac6d2" | |
| }, | |
| { | |
| "dataPath": "params_shard_122.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a7d5849c1054507cc03a62a3508a5eb9" | |
| }, | |
| { | |
| "dataPath": "params_shard_123.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "23fcd2224f533ddd913a726828fb8033" | |
| }, | |
| { | |
| "dataPath": "params_shard_124.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0348f3195aad97580751aa6658fd182" | |
| }, | |
| { | |
| "dataPath": "params_shard_125.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "5c8bc0d3a93109105ce1a0af822a9ba2" | |
| }, | |
| { | |
| "dataPath": "params_shard_126.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6c78a0ca5d708b22350d223591503d24" | |
| }, | |
| { | |
| "dataPath": "params_shard_127.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "37d183f70d2e30a187ab001724277e57" | |
| }, | |
| { | |
| "dataPath": "params_shard_128.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "679639d2d7d6004e07dd0d28a56e5375" | |
| }, | |
| { | |
| "dataPath": "params_shard_129.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "43e70c8f1e37bf35882295f404c36a60" | |
| }, | |
| { | |
| "dataPath": "params_shard_130.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "adc64af4456533af8d6ff40a678f9180" | |
| }, | |
| { | |
| "dataPath": "params_shard_131.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a3caf89f59015af96e8038630e4e81fd" | |
| }, | |
| { | |
| "dataPath": "params_shard_132.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d4d98a04dbbfc53d264ed6b50e4b639d" | |
| }, | |
| { | |
| "dataPath": "params_shard_133.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "854a0e19057033ea8916c2b68d33a9db" | |
| }, | |
| { | |
| "dataPath": "params_shard_134.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bf62e061a31b2d48c87d19f9e3a510f0" | |
| }, | |
| { | |
| "dataPath": "params_shard_135.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d539fb51a77dbeabafa9ffe6adf812d5" | |
| }, | |
| { | |
| "dataPath": "params_shard_136.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d68b605c1c82c5c244c073c6a4c576bf" | |
| }, | |
| { | |
| "dataPath": "params_shard_137.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "9c5bbf8f99acb2c0efbbeea2e624652f" | |
| }, | |
| { | |
| "dataPath": "params_shard_138.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8635343d6c4208dc0460806ed4949700" | |
| }, | |
| { | |
| "dataPath": "params_shard_139.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "70c51f7d3e42494f628f340ead0f08be" | |
| }, | |
| { | |
| "dataPath": "params_shard_140.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cba1e6ac59a4a5d448e16781e0d96027" | |
| }, | |
| { | |
| "dataPath": "params_shard_141.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "c796fa84348b750e99a7a099db8ebdf9" | |
| }, | |
| { | |
| "dataPath": "params_shard_142.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dfa367f85f4e4a1ad71872ffbde09b98" | |
| }, | |
| { | |
| "dataPath": "params_shard_143.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "be8935adcb6179f1e6bff41c0b7483c2" | |
| }, | |
| { | |
| "dataPath": "params_shard_144.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c386846f91bd5c32cd53fbfd2a5e17b3" | |
| }, | |
| { | |
| "dataPath": "params_shard_145.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "5e9328cf6fd8f344a1a7e5a96648d507" | |
| }, | |
| { | |
| "dataPath": "params_shard_146.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a4d925a8216c383a07a1af0af1f392a5" | |
| }, | |
| { | |
| "dataPath": "params_shard_147.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9514e08e4292b90db1c478aecd047526" | |
| }, | |
| { | |
| "dataPath": "params_shard_148.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b978b1d48b344cbd55d86e1cddc045d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_149.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "9c1ca00e545aa0c4cf053111bea0538a" | |
| }, | |
| { | |
| "dataPath": "params_shard_150.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0fb8a921b328e67fa1ca41d2cfaf2966" | |
| }, | |
| { | |
| "dataPath": "params_shard_151.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "06b2b3116b7e1322d42880a13322daa2" | |
| }, | |
| { | |
| "dataPath": "params_shard_152.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f63db6b503b8cc1fcfbbf851a4cb69dc" | |
| }, | |
| { | |
| "dataPath": "params_shard_153.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "41b97eef34cecdafb21b84745e770994" | |
| }, | |
| { | |
| "dataPath": "params_shard_154.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5978507942bd42f13dde12bd8c32356b" | |
| }, | |
| { | |
| "dataPath": "params_shard_155.bin", | |
| "format": "raw-shard", | |
| "nbytes": 35389440, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 1728 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 35389440, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "818e0e9740b4bbfbd909bb2d96c05452" | |
| }, | |
| { | |
| "dataPath": "params_shard_156.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ce723d4de67cc9d59127f3b0df856bfc" | |
| }, | |
| { | |
| "dataPath": "params_shard_157.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32952320, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28518400 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 432 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4423680, | |
| "byteOffset": 28528640 | |
| } | |
| ], | |
| "md5sum": "0f285b31bd65a31c614da8f5fcf0d39c" | |
| }, | |
| { | |
| "dataPath": "params_shard_158.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "954b1c25c5bea4a5fed7e67c41921632" | |
| }, | |
| { | |
| "dataPath": "params_shard_159.bin", | |
| "format": "raw-shard", | |
| "nbytes": 70778880, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 27648, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 70778880, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e6007e560bdb7f5ec867ae03d4b99a63" | |
| }, | |
| { | |
| "dataPath": "params_shard_160.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28518400, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8857600 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13772800 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26880000 | |
| } | |
| ], | |
| "md5sum": "beeeae454e623ff608e36738cf6abcad" | |
| }, | |
| { | |
| "dataPath": "params_shard_161.bin", | |
| "format": "raw-shard", | |
| "nbytes": 39321600, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 15360, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 39321600, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d6430bf70772a1b73db6ab7e26c4b4e6" | |
| }, | |
| { | |
| "dataPath": "params_shard_162.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28508160, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 27648, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8847360, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 15360, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4915200, | |
| "byteOffset": 8847360 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 13762560 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 26869760 | |
| } | |
| ], | |
| "md5sum": "f0429fc74fe3630dd4d66262259124c4" | |
| } | |
| ] | |
| } |