| { | |
| "metadata": { | |
| "ParamSize": 405, | |
| "ParamBytes": 8246568960.0, | |
| "BitsPerParam": 4.500325336993593 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 256901120, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 100352, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256901120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ec3ed539e35e33734d055e56419839b4" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.34.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4de4a3a5bb5791ce6025786b1fa66507" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32122880, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 100352, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32112640, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.34.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 32112640 | |
| } | |
| ], | |
| "md5sum": "f91eca43c8f79eb826e4101b6ddab68b" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.34.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2de9537dcb67fcb9ca6de76117d3b7fc" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.35.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b18e6c43b7d9e359b926ea40eb491796" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.35.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4980037a28448777e82c2add912b76d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22958080, | |
| "records": [ | |
| { | |
| "name": "transformer.h.34.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.34.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.34.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.35.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.35.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 17223680 | |
| } | |
| ], | |
| "md5sum": "3bc9209175b288282a9400ea58fed515" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.35.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "838c059157cd794e0afe251c9d83a41b" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.36.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d81272b797a17b348626f8f2c7610ab9" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.35.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.35.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.35.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.35.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.35.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.36.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "96f96489adb1130eab13f07c1014e9b7" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.36.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e72aac3b489667f5f9adc7a1b2a6dc71" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.36.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fad38404cf2cae443c6b6bcf97a5974c" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.36.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.36.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.36.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.36.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.36.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "98cfd2568ffe0b73e163279bc98c6bf0" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.37.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "787c5041b51b737c404e5848a8330565" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.37.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "39017d2b0cefd59fd40a0416558ba738" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32788480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.36.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.37.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.37.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.37.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8202240 | |
| }, | |
| { | |
| "name": "transformer.h.37.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19671040 | |
| }, | |
| { | |
| "name": "transformer.h.37.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19681280 | |
| } | |
| ], | |
| "md5sum": "5f63eb7a21e2a27165a91cef41069ef7" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.38.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a7494d4b1d2e7d7b28a4a77a16fdf8c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.38.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d8327b99b5b5ce78c144fbedc34ffc9b" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.37.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.37.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.37.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.38.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.38.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "998157f5f027db4cc067b2e59f476132" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.38.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f5f47383ee3ed5c9bd1d9b797066620f" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.39.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bbb9e0b10b242adbd74ea31b6c691b86" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.38.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.38.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.38.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.38.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.38.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.39.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "ef26480c2fe56a73530e61d64d925da9" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.39.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "698897dcd3922498a48e297393e6e827" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.39.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2171a93955dc719d92a1ad7b401a680e" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.39.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.39.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.39.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.39.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.39.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "703a4725a8a9baad362d49c02fa9bb11" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 256901120, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.q_weight", | |
| "shape": [ | |
| 100352, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256901120, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3904674291357de8ab6bcd69543d7867" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32112640, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.q_scale", | |
| "shape": [ | |
| 100352, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32112640, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fd0dc321f03afb13070a749a479693d8" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "37bad9d50a6ee67cde0357f25af354b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3f6a169ddf9ef6ad6b346075a7b1c760" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32798720, | |
| "records": [ | |
| { | |
| "name": "transformer.h.39.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.norm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2478080 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8212480 | |
| }, | |
| { | |
| "name": "transformer.h.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19681280 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19691520 | |
| } | |
| ], | |
| "md5sum": "235ec4f8339e1076fd4c30af9521a1ea" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "90e899de33d38a39ff1e5783fee4057d" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f3ae42cc301ab48cfdff46f76b48b5e9" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "468a32ebe20554de52266d2d92ffce6e" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7a154bfad76108aa8584f5919c0d5c24" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "adf96e2fb29f69274d5338373c477ca1" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "ec9181a19b874c9cb2656db100110e21" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "878d5e0fabdf8529bd09a242fce4d6d7" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c234a9144cde486068118d250ad17522" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "2f3119cd8b79f2948595c19a5758d9c4" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f1e7acc08504187db4c93f81645da12f" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "898fe33bd28e18e2c172a82f21834731" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32788480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8202240 | |
| }, | |
| { | |
| "name": "transformer.h.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19671040 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19681280 | |
| } | |
| ], | |
| "md5sum": "e81235d701152bbc01c3a32a52571b42" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9d23e046482379d9b376c07fd89925a1" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3da6a5bd16773e6c09c99d8762daa739" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "21a4bc683231c3979395fb4aeca6fad8" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d30e63556e6e28e67eb39f2a4c5b5a8a" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b7783024545c038a5e60f574f7b58df8" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28682240, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| } | |
| ], | |
| "md5sum": "379e91caa318cc53e6a27b43609b3463" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3e08572787b7adb220713767283afb30" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a2a056d3427a964165dac3a6395deb79" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28682240, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24576000 | |
| }, | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26214400 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28672000 | |
| } | |
| ], | |
| "md5sum": "a66cbd20b819839708ab8c7a1159f7ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3623049149207b8902c62261da3aac00" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8fcac62d77b5e706712a0112ac955b6" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "aae7104f432ee997d16f207819891354" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "59792346620d12b617c19033818e4430" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "12ceb6e32e5c272cafc6d810bf865f64" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32788480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8202240 | |
| }, | |
| { | |
| "name": "transformer.h.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19671040 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19681280 | |
| } | |
| ], | |
| "md5sum": "cb764a330a738b74a5c81c7b795cef92" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "883df7d76f5dd38dae6c2d1c0c0616af" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c255d0bcf530d64cf53df0af14f247c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "595d468e4f75498ae25a74531d9e38f5" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6829b6a5d95f951b6f9bd63647999883" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "70117c4dab88d3be52d31a4caa7a762f" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "2919ff9a28276ec99e869791912bc282" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8b221146c228d6506e7e6464ce1736f0" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2d998f333cddb5abcd91096093658f1" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22968320, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 5744640 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 5754880 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 11489280 | |
| }, | |
| { | |
| "name": "transformer.h.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 22958080 | |
| } | |
| ], | |
| "md5sum": "e380e9c72a3e712f8854baa17ef685a7" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4d8728826a946de5f139cf2e568b7f2b" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1ca19ab730216219908f65857202de08" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4fe13737df09928b58b0f374e52ad79a" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22947840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 17213440 | |
| } | |
| ], | |
| "md5sum": "3cd729ee8197a5b0ac34c3b38e9bcc53" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fd26085c3c1281d6396dce19012e9f5b" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ec491695613117b774ee690da6f9ebad" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "30054d33c23e3109b661af2136a2b5fc" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d09fc62aecba8847e605f4040a505ade" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ed850c4869f5fe34612a54ff38d2a7f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "3f7972ca5adc70f31224324ac310b1cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a42ec1de0e33bb38a04b3a7e4ef0e6e5" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7a3b31765e0526ec2c812ddba2576624" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32788480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8202240 | |
| }, | |
| { | |
| "name": "transformer.h.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19671040 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19681280 | |
| } | |
| ], | |
| "md5sum": "3f5f58dbf2b68730e1527ed3757370a7" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb04b969a003e3e2b58fe7b8ba7ac0fd" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "583a94ed54db0df4c0ce4bf5d273cc78" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "654791d864a3dbbf8b7d477ba999b35c" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fbd8f3a073ee6b6ce0961f22c60cbb3c" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d95c85e5e0d634267d4d2942c122a269" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "d499ea4a9704768e052252223d52d86c" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "84fe35019339678ab7f9a7f902944727" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f6d59439cdc5f6a7c30fe0536aab5051" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "451ed6cdfa8764c87c8a0bc0642b785b" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "59e6cca0afb98a46abb2bfaab1119fcd" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2d820ac8de0cb889047ac2b767641266" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32788480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8202240 | |
| }, | |
| { | |
| "name": "transformer.h.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19671040 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19681280 | |
| } | |
| ], | |
| "md5sum": "a57dc4d1f57f087079b56932d3b24414" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6b6346e2218d35d0d016bf98c855e0ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7a35dcd1070e61c2cc476dcebab88153" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "6fd8b4771938c75f914b1288b0f1e03d" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ee848fe7526563f34a663c18ff8ffff5" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "318f0539207fd1de700c3ed97765e5f9" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "c644c5dc7192f2289998df7307b7ca35" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4df83cdb7ea5d635dc19b6b4c3ae32e7" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "49f9de32bc8baedbd5b92e26d86df2ee" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "e59356cfcd687d42c4eb45cc3afe45a3" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d882ae5ee6c77a128a03308b13e76b57" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "557c53cb2b190e54ee9eaac6e5c11af1" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32788480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8202240 | |
| }, | |
| { | |
| "name": "transformer.h.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19671040 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19681280 | |
| } | |
| ], | |
| "md5sum": "238e749371c1f3a637800979bb2e7647" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ff9f904e0217600b4ed1d3824249c14" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "45137761dee13d0a4a3698712ace3681" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "cc93f9e700741d776e48524ccc15a5f0" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dcfbadc9ba447e96ac40d009b6afa347" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28682240, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| } | |
| ], | |
| "md5sum": "01b4eccdafb7c19ee6c70be7b46ffc2c" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "653426db6b65ed70a63a0c06c72a79c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "14bdf820305ee594132d00ef77fdbdf0" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dd24cdbb0b89ea6cbe4746cecec66101" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22947840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 17213440 | |
| } | |
| ], | |
| "md5sum": "b3770e65f62ed5bf742ea1c18bb2d7d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0921cf640063e8f5c85c974fbf6665c" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0409aca65194fdf15b9d6bb5240aaca0" | |
| }, | |
| { | |
| "dataPath": "params_shard_114.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28702720, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 11489280 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 17223680 | |
| }, | |
| { | |
| "name": "transformer.h.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28692480 | |
| } | |
| ], | |
| "md5sum": "201f0325cff32c4830c53702d97a2d39" | |
| }, | |
| { | |
| "dataPath": "params_shard_115.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "50bfacc323b07c7c775006178f2f5bef" | |
| }, | |
| { | |
| "dataPath": "params_shard_116.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d2025004b017f7721dcc199be17f7a1c" | |
| }, | |
| { | |
| "dataPath": "params_shard_117.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3883e9d2004e52d155b36118d14790de" | |
| }, | |
| { | |
| "dataPath": "params_shard_118.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22947840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 17213440 | |
| } | |
| ], | |
| "md5sum": "765bd68ec1c5c94408ccdb08fbe634cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_119.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "abd715f3f826850a45c3b9690af9e9a7" | |
| }, | |
| { | |
| "dataPath": "params_shard_120.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a0944e9f111b0b7bf73941174560d079" | |
| }, | |
| { | |
| "dataPath": "params_shard_121.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "a40db7b1256977ea848407102446da7b" | |
| }, | |
| { | |
| "dataPath": "params_shard_122.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8166706e49b1ea6d34d34b32ad9bb594" | |
| }, | |
| { | |
| "dataPath": "params_shard_123.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8f67cacae1deddea9dbb1c42ebe0b26b" | |
| }, | |
| { | |
| "dataPath": "params_shard_124.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "da77780d9d2f52f08d9ffdbfced2bb16" | |
| }, | |
| { | |
| "dataPath": "params_shard_125.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fabaa0119e733f29b74bccd6a17c28b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_126.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2e1caa0363849837d58436e36a8af950" | |
| }, | |
| { | |
| "dataPath": "params_shard_127.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32788480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.24.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8202240 | |
| }, | |
| { | |
| "name": "transformer.h.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19671040 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19681280 | |
| } | |
| ], | |
| "md5sum": "6a7e39b2777a1ad8c228fbc37f97f1ce" | |
| }, | |
| { | |
| "dataPath": "params_shard_128.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e12108fd5ca0592084f3483e0e8e57fe" | |
| }, | |
| { | |
| "dataPath": "params_shard_129.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "687a5de87e5c8b85e850883a505f0c25" | |
| }, | |
| { | |
| "dataPath": "params_shard_130.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.25.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "e7e145b434a58f76dd1b79c3053fb21d" | |
| }, | |
| { | |
| "dataPath": "params_shard_131.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2a9d317100ce29998840d75e72ed9a09" | |
| }, | |
| { | |
| "dataPath": "params_shard_132.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4c0568f3be30fce4717bdf2b77b196df" | |
| }, | |
| { | |
| "dataPath": "params_shard_133.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.26.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "eff2e390a822beb782a4164f4ab59206" | |
| }, | |
| { | |
| "dataPath": "params_shard_134.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "10cfd7143b89b57485c0d1f49e9e6fb9" | |
| }, | |
| { | |
| "dataPath": "params_shard_135.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "90a2b502979aedcfe6b7959a71bce7cb" | |
| }, | |
| { | |
| "dataPath": "params_shard_136.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "a5809205747e05746932a126a1f5e599" | |
| }, | |
| { | |
| "dataPath": "params_shard_137.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ff2f0db89b5c750b8faeee80cd6e79b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_138.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f30d84f7da073c87a2033deb5aec75fb" | |
| }, | |
| { | |
| "dataPath": "params_shard_139.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "46ccc4016f39ad16dd818594570c4421" | |
| }, | |
| { | |
| "dataPath": "params_shard_140.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25405440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 15564800 | |
| }, | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.27.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19660800 | |
| }, | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 19671040 | |
| } | |
| ], | |
| "md5sum": "104db2983ec55400f17e70d0b5a35d95" | |
| }, | |
| { | |
| "dataPath": "params_shard_141.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7957737e1037c4c3026ad44d6558a9d4" | |
| }, | |
| { | |
| "dataPath": "params_shard_142.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1f25293127b70aacd24e256db1b8155c" | |
| }, | |
| { | |
| "dataPath": "params_shard_143.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28702720, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.28.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 11489280 | |
| }, | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 17223680 | |
| }, | |
| { | |
| "name": "transformer.h.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28692480 | |
| } | |
| ], | |
| "md5sum": "1b3cbec4187ecd7510ce45e56f36cd81" | |
| }, | |
| { | |
| "dataPath": "params_shard_144.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "82c89446825cf4f6c4932d30138e7990" | |
| }, | |
| { | |
| "dataPath": "params_shard_145.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9d3b38f314fa76e85f4eb565bb5d1c44" | |
| }, | |
| { | |
| "dataPath": "params_shard_146.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4492beaa3d4a211be9a470a935850fb6" | |
| }, | |
| { | |
| "dataPath": "params_shard_147.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22947840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 13107200 | |
| }, | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 14745600 | |
| }, | |
| { | |
| "name": "transformer.h.29.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 17213440 | |
| } | |
| ], | |
| "md5sum": "2b2fe34b085032f830cf1d859aed7dea" | |
| }, | |
| { | |
| "dataPath": "params_shard_148.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5f0fb2169b2db2884339ca27b573afb3" | |
| }, | |
| { | |
| "dataPath": "params_shard_149.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "05684b76cc17f59133468f6304a37a2a" | |
| }, | |
| { | |
| "dataPath": "params_shard_150.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.30.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "f004002fa77babc9bacbef6bd4c537c4" | |
| }, | |
| { | |
| "dataPath": "params_shard_151.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8b4bb303f6ba925402cc85e0e02fa590" | |
| }, | |
| { | |
| "dataPath": "params_shard_152.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "da7f32d22d118f2d2036eb8d4a033094" | |
| }, | |
| { | |
| "dataPath": "params_shard_153.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "6a932e7e28e54e35eeb6e950444ea045" | |
| }, | |
| { | |
| "dataPath": "params_shard_154.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e3c4c0f5ed3cda6b9899fafef30079e7" | |
| }, | |
| { | |
| "dataPath": "params_shard_155.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ebc40df92ff2f178e7789073cd557ced" | |
| }, | |
| { | |
| "dataPath": "params_shard_156.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32788480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.31.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 2467840 | |
| }, | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 8202240 | |
| }, | |
| { | |
| "name": "transformer.h.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 19671040 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 19681280 | |
| } | |
| ], | |
| "md5sum": "031a136a4da6e8061efa3f8128ef024f" | |
| }, | |
| { | |
| "dataPath": "params_shard_157.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.32.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0c48aefbdc8deed7f226139252d44caf" | |
| }, | |
| { | |
| "dataPath": "params_shard_158.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.32.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a0135db99fe883b0a7c9220d56cc955d" | |
| }, | |
| { | |
| "dataPath": "params_shard_159.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29501440, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 1638400 | |
| }, | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 21299200 | |
| }, | |
| { | |
| "name": "transformer.h.32.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 23756800 | |
| }, | |
| { | |
| "name": "transformer.h.32.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 23767040 | |
| } | |
| ], | |
| "md5sum": "03d808f0f5b264664611e3934c409fd5" | |
| }, | |
| { | |
| "dataPath": "params_shard_160.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.32.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c20a8b6e9c39cf8ecdd51f4a9a97e1f9" | |
| }, | |
| { | |
| "dataPath": "params_shard_161.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45875200, | |
| "records": [ | |
| { | |
| "name": "transformer.h.33.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 2240 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45875200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "362d86d928a17a4c05192d4f80924775" | |
| }, | |
| { | |
| "dataPath": "params_shard_162.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28692480, | |
| "records": [ | |
| { | |
| "name": "transformer.h.32.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.32.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 11468800 | |
| }, | |
| { | |
| "name": "transformer.h.32.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 11479040 | |
| }, | |
| { | |
| "name": "transformer.h.32.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 24586240 | |
| }, | |
| { | |
| "name": "transformer.h.32.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 26224640 | |
| }, | |
| { | |
| "name": "transformer.h.33.ln.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 28682240 | |
| } | |
| ], | |
| "md5sum": "e9ca9aab629381e699491d3c6a53e7c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_163.bin", | |
| "format": "raw-shard", | |
| "nbytes": 91750400, | |
| "records": [ | |
| { | |
| "name": "transformer.h.33.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 35840, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 91750400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2a8ea13ddf2357b5a35014745644def3" | |
| }, | |
| { | |
| "dataPath": "params_shard_164.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.33.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "01977e615616fd6a79a484ac4ddaa1e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_165.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31959040, | |
| "records": [ | |
| { | |
| "name": "transformer.h.33.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5734400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.33.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 35840, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11468800, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "transformer.h.33.post_attention_layernorm.weight", | |
| "shape": [ | |
| 5120 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 10240, | |
| "byteOffset": 17203200 | |
| }, | |
| { | |
| "name": "transformer.h.33.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 17213440 | |
| }, | |
| { | |
| "name": "transformer.h.33.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 30320640 | |
| } | |
| ], | |
| "md5sum": "6b808e7bcd73a46ee1579f5ad692e18e" | |
| }, | |
| { | |
| "dataPath": "params_shard_166.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.34.mixer.qkv_proj.q_weight", | |
| "shape": [ | |
| 7680, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19660800, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d035aa71b72ea033a696800c3d670fc2" | |
| }, | |
| { | |
| "dataPath": "params_shard_167.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19660800, | |
| "records": [ | |
| { | |
| "name": "transformer.h.33.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.34.mixer.out_proj.q_weight", | |
| "shape": [ | |
| 5120, | |
| 640 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 13107200, | |
| "byteOffset": 2457600 | |
| }, | |
| { | |
| "name": "transformer.h.34.mixer.out_proj.q_scale", | |
| "shape": [ | |
| 5120, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1638400, | |
| "byteOffset": 15564800 | |
| }, | |
| { | |
| "name": "transformer.h.34.mixer.qkv_proj.q_scale", | |
| "shape": [ | |
| 7680, | |
| 160 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2457600, | |
| "byteOffset": 17203200 | |
| } | |
| ], | |
| "md5sum": "47480ec3d3fcc7638d36f99bc3f38684" | |
| } | |
| ] | |
| } |