phi-4-q4f16_1-MLC / ndarray-cache.json
jdp8's picture
phi-4 model weights
6ec5fd3
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 8246568960.0,
"BitsPerParam": 4.500325336993593
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 256901120,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
100352,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 256901120,
"byteOffset": 0
}
],
"md5sum": "ec3ed539e35e33734d055e56419839b4"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.34.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "4de4a3a5bb5791ce6025786b1fa66507"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32122880,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
100352,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32112640,
"byteOffset": 0
},
{
"name": "transformer.h.34.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 32112640
}
],
"md5sum": "f91eca43c8f79eb826e4101b6ddab68b"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.34.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "2de9537dcb67fcb9ca6de76117d3b7fc"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.35.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "b18e6c43b7d9e359b926ea40eb491796"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.35.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "4980037a28448777e82c2add912b76d3"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 22958080,
"records": [
{
"name": "transformer.h.34.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.34.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.35.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17213440
},
{
"name": "transformer.h.35.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 17223680
}
],
"md5sum": "3bc9209175b288282a9400ea58fed515"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.35.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "838c059157cd794e0afe251c9d83a41b"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.36.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "d81272b797a17b348626f8f2c7610ab9"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.35.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.35.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.35.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.35.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.36.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "96f96489adb1130eab13f07c1014e9b7"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.36.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "e72aac3b489667f5f9adc7a1b2a6dc71"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.36.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "fad38404cf2cae443c6b6bcf97a5974c"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.36.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.36.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.36.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.36.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "98cfd2568ffe0b73e163279bc98c6bf0"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.37.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "787c5041b51b737c404e5848a8330565"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.37.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "39017d2b0cefd59fd40a0416558ba738"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32788480,
"records": [
{
"name": "transformer.h.36.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.37.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.37.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2467840
},
{
"name": "transformer.h.37.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8202240
},
{
"name": "transformer.h.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.37.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19681280
}
],
"md5sum": "5f63eb7a21e2a27165a91cef41069ef7"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.38.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "a7494d4b1d2e7d7b28a4a77a16fdf8c0"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.38.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "d8327b99b5b5ce78c144fbedc34ffc9b"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.37.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.37.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.37.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.38.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.38.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "998157f5f027db4cc067b2e59f476132"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.38.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "f5f47383ee3ed5c9bd1d9b797066620f"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.39.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "bbb9e0b10b242adbd74ea31b6c691b86"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.38.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.38.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.38.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.38.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.39.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "ef26480c2fe56a73530e61d64d925da9"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.39.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "698897dcd3922498a48e297393e6e827"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.39.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "2171a93955dc719d92a1ad7b401a680e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.39.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.39.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.39.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.39.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "703a4725a8a9baad362d49c02fa9bb11"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 256901120,
"records": [
{
"name": "transformer.embd.q_weight",
"shape": [
100352,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 256901120,
"byteOffset": 0
}
],
"md5sum": "3904674291357de8ab6bcd69543d7867"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 32112640,
"records": [
{
"name": "transformer.embd.q_scale",
"shape": [
100352,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32112640,
"byteOffset": 0
}
],
"md5sum": "fd0dc321f03afb13070a749a479693d8"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.0.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "37bad9d50a6ee67cde0357f25af354b0"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.0.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "3f6a169ddf9ef6ad6b346075a7b1c760"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32798720,
"records": [
{
"name": "transformer.h.39.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.0.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2467840
},
{
"name": "transformer.h.0.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2478080
},
{
"name": "transformer.h.0.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8212480
},
{
"name": "transformer.h.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19681280
},
{
"name": "transformer.h.0.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19691520
}
],
"md5sum": "235ec4f8339e1076fd4c30af9521a1ea"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.1.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "90e899de33d38a39ff1e5783fee4057d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.1.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "f3ae42cc301ab48cfdff46f76b48b5e9"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.0.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.0.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.0.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.1.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "468a32ebe20554de52266d2d92ffce6e"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.1.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "7a154bfad76108aa8584f5919c0d5c24"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.2.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "adf96e2fb29f69274d5338373c477ca1"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.1.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.1.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.1.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.1.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "ec9181a19b874c9cb2656db100110e21"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.2.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "878d5e0fabdf8529bd09a242fce4d6d7"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.2.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "c234a9144cde486068118d250ad17522"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.2.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.2.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.2.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.2.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "2f3119cd8b79f2948595c19a5758d9c4"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.3.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "f1e7acc08504187db4c93f81645da12f"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.3.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "898fe33bd28e18e2c172a82f21834731"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 32788480,
"records": [
{
"name": "transformer.h.2.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.3.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2467840
},
{
"name": "transformer.h.3.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8202240
},
{
"name": "transformer.h.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.3.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19681280
}
],
"md5sum": "e81235d701152bbc01c3a32a52571b42"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.4.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "9d23e046482379d9b376c07fd89925a1"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.4.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "3da6a5bd16773e6c09c99d8762daa739"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.3.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.3.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.3.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.4.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "21a4bc683231c3979395fb4aeca6fad8"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.4.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "d30e63556e6e28e67eb39f2a4c5b5a8a"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.5.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "b7783024545c038a5e60f574f7b58df8"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 28682240,
"records": [
{
"name": "transformer.h.4.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.4.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.4.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.4.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
}
],
"md5sum": "379e91caa318cc53e6a27b43609b3463"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.5.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "3e08572787b7adb220713767283afb30"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.10.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "a2a056d3427a964165dac3a6395deb79"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 28682240,
"records": [
{
"name": "transformer.h.5.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.5.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11468800
},
{
"name": "transformer.h.5.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24576000
},
{
"name": "transformer.h.5.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26214400
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28672000
}
],
"md5sum": "a66cbd20b819839708ab8c7a1159f7ac"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.10.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "3623049149207b8902c62261da3aac00"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.10.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "b8fcac62d77b5e706712a0112ac955b6"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.10.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.10.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.10.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.10.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "aae7104f432ee997d16f207819891354"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.11.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "59792346620d12b617c19033818e4430"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.11.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "12ceb6e32e5c272cafc6d810bf865f64"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 32788480,
"records": [
{
"name": "transformer.h.10.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.11.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2467840
},
{
"name": "transformer.h.11.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8202240
},
{
"name": "transformer.h.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.11.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19681280
}
],
"md5sum": "cb764a330a738b74a5c81c7b795cef92"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.12.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "883df7d76f5dd38dae6c2d1c0c0616af"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.12.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "c255d0bcf530d64cf53df0af14f247c0"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.11.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.11.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.11.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.12.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "595d468e4f75498ae25a74531d9e38f5"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.12.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "6829b6a5d95f951b6f9bd63647999883"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.5.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "70117c4dab88d3be52d31a4caa7a762f"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.12.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.12.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.12.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.12.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "2919ff9a28276ec99e869791912bc282"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.6.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "8b221146c228d6506e7e6464ce1736f0"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.6.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "c2d998f333cddb5abcd91096093658f1"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 22968320,
"records": [
{
"name": "transformer.h.5.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 5734400
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 5744640
},
{
"name": "transformer.h.6.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 5754880
},
{
"name": "transformer.h.6.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 11489280
},
{
"name": "transformer.h.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 22958080
}
],
"md5sum": "e380e9c72a3e712f8854baa17ef685a7"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.6.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "4d8728826a946de5f139cf2e568b7f2b"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.7.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "1ca19ab730216219908f65857202de08"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.7.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "4fe13737df09928b58b0f374e52ad79a"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 22947840,
"records": [
{
"name": "transformer.h.6.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.6.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "transformer.h.6.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 14745600
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.7.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 17213440
}
],
"md5sum": "3cd729ee8197a5b0ac34c3b38e9bcc53"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.7.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "fd26085c3c1281d6396dce19012e9f5b"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.8.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "ec491695613117b774ee690da6f9ebad"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.7.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.7.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.7.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.7.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "30054d33c23e3109b661af2136a2b5fc"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.8.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "d09fc62aecba8847e605f4040a505ade"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.8.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "ed850c4869f5fe34612a54ff38d2a7f3"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.8.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.8.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.8.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.8.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "3f7972ca5adc70f31224324ac310b1cb"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.9.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "a42ec1de0e33bb38a04b3a7e4ef0e6e5"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.9.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "7a3b31765e0526ec2c812ddba2576624"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 32788480,
"records": [
{
"name": "transformer.h.8.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.9.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2467840
},
{
"name": "transformer.h.9.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8202240
},
{
"name": "transformer.h.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.9.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19681280
}
],
"md5sum": "3f5f58dbf2b68730e1527ed3757370a7"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.13.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "cb04b969a003e3e2b58fe7b8ba7ac0fd"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.13.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "583a94ed54db0df4c0ce4bf5d273cc78"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.9.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.9.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.9.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.13.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "654791d864a3dbbf8b7d477ba999b35c"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.13.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "fbd8f3a073ee6b6ce0961f22c60cbb3c"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.14.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "d95c85e5e0d634267d4d2942c122a269"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.13.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.13.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.13.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.13.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "d499ea4a9704768e052252223d52d86c"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.14.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "84fe35019339678ab7f9a7f902944727"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.14.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "f6d59439cdc5f6a7c30fe0536aab5051"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.14.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.14.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.14.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.14.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "451ed6cdfa8764c87c8a0bc0642b785b"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.15.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "59e6cca0afb98a46abb2bfaab1119fcd"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.15.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "2d820ac8de0cb889047ac2b767641266"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 32788480,
"records": [
{
"name": "transformer.h.14.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.15.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2467840
},
{
"name": "transformer.h.15.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8202240
},
{
"name": "transformer.h.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.15.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19681280
}
],
"md5sum": "a57dc4d1f57f087079b56932d3b24414"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.16.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "6b6346e2218d35d0d016bf98c855e0ab"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.16.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "7a35dcd1070e61c2cc476dcebab88153"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.15.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.15.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.15.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.16.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "6fd8b4771938c75f914b1288b0f1e03d"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.16.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "ee848fe7526563f34a663c18ff8ffff5"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.17.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "318f0539207fd1de700c3ed97765e5f9"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.16.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.16.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.16.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.16.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "c644c5dc7192f2289998df7307b7ca35"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.17.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "4df83cdb7ea5d635dc19b6b4c3ae32e7"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.17.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "49f9de32bc8baedbd5b92e26d86df2ee"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.17.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.17.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.17.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.17.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "e59356cfcd687d42c4eb45cc3afe45a3"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.18.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "d882ae5ee6c77a128a03308b13e76b57"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.18.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "557c53cb2b190e54ee9eaac6e5c11af1"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 32788480,
"records": [
{
"name": "transformer.h.17.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.18.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2467840
},
{
"name": "transformer.h.18.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8202240
},
{
"name": "transformer.h.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.18.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19681280
}
],
"md5sum": "238e749371c1f3a637800979bb2e7647"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.19.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "2ff9f904e0217600b4ed1d3824249c14"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.19.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "45137761dee13d0a4a3698712ace3681"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.18.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.18.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.18.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.19.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "cc93f9e700741d776e48524ccc15a5f0"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.19.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "dcfbadc9ba447e96ac40d009b6afa347"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 28682240,
"records": [
{
"name": "transformer.h.19.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.19.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.19.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.19.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
}
],
"md5sum": "01b4eccdafb7c19ee6c70be7b46ffc2c"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.20.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "653426db6b65ed70a63a0c06c72a79c1"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.20.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "14bdf820305ee594132d00ef77fdbdf0"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.20.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "dd24cdbb0b89ea6cbe4746cecec66101"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 22947840,
"records": [
{
"name": "transformer.h.20.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.20.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "transformer.h.20.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 14745600
},
{
"name": "transformer.h.20.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.20.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 17213440
}
],
"md5sum": "b3770e65f62ed5bf742ea1c18bb2d7d9"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.21.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "e0921cf640063e8f5c85c974fbf6665c"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.21.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "0409aca65194fdf15b9d6bb5240aaca0"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 28702720,
"records": [
{
"name": "transformer.h.20.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.21.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11479040
},
{
"name": "transformer.h.21.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 11489280
},
{
"name": "transformer.h.21.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 17223680
},
{
"name": "transformer.h.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28692480
}
],
"md5sum": "201f0325cff32c4830c53702d97a2d39"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.21.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "50bfacc323b07c7c775006178f2f5bef"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.22.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "d2025004b017f7721dcc199be17f7a1c"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.22.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "3883e9d2004e52d155b36118d14790de"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 22947840,
"records": [
{
"name": "transformer.h.21.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.21.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "transformer.h.21.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 14745600
},
{
"name": "transformer.h.22.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.22.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 17213440
}
],
"md5sum": "765bd68ec1c5c94408ccdb08fbe634cb"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.22.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "abd715f3f826850a45c3b9690af9e9a7"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.23.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "a0944e9f111b0b7bf73941174560d079"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.22.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.22.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.22.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.22.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.23.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "a40db7b1256977ea848407102446da7b"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.23.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "8166706e49b1ea6d34d34b32ad9bb594"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.23.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "8f67cacae1deddea9dbb1c42ebe0b26b"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.23.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.23.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.23.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.23.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "da77780d9d2f52f08d9ffdbfced2bb16"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.24.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "fabaa0119e733f29b74bccd6a17c28b0"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.24.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "2e1caa0363849837d58436e36a8af950"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 32788480,
"records": [
{
"name": "transformer.h.23.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.24.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.24.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2467840
},
{
"name": "transformer.h.24.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8202240
},
{
"name": "transformer.h.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.24.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19681280
}
],
"md5sum": "6a7e39b2777a1ad8c228fbc37f97f1ce"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.25.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "e12108fd5ca0592084f3483e0e8e57fe"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.25.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "687a5de87e5c8b85e850883a505f0c25"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.24.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.24.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.24.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.25.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.25.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "e7e145b434a58f76dd1b79c3053fb21d"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.25.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "2a9d317100ce29998840d75e72ed9a09"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.26.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "4c0568f3be30fce4717bdf2b77b196df"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.25.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.25.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.25.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.25.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.26.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "eff2e390a822beb782a4164f4ab59206"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.26.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "10cfd7143b89b57485c0d1f49e9e6fb9"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.26.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "90a2b502979aedcfe6b7959a71bce7cb"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.26.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.26.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.26.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.26.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "a5809205747e05746932a126a1f5e599"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.27.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "ff2f0db89b5c750b8faeee80cd6e79b1"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.27.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "f30d84f7da073c87a2033deb5aec75fb"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.27.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "46ccc4016f39ad16dd818594570c4421"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 25405440,
"records": [
{
"name": "transformer.h.26.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.27.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2457600
},
{
"name": "transformer.h.27.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15564800
},
{
"name": "transformer.h.27.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 17203200
},
{
"name": "transformer.h.27.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.27.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 19671040
}
],
"md5sum": "104db2983ec55400f17e70d0b5a35d95"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.28.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "7957737e1037c4c3026ad44d6558a9d4"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.28.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "1f25293127b70aacd24e256db1b8155c"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 28702720,
"records": [
{
"name": "transformer.h.27.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.28.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11479040
},
{
"name": "transformer.h.28.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 11489280
},
{
"name": "transformer.h.28.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 17223680
},
{
"name": "transformer.h.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28692480
}
],
"md5sum": "1b3cbec4187ecd7510ce45e56f36cd81"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.28.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "82c89446825cf4f6c4932d30138e7990"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.29.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "9d3b38f314fa76e85f4eb565bb5d1c44"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.29.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "4492beaa3d4a211be9a470a935850fb6"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 22947840,
"records": [
{
"name": "transformer.h.28.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.28.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "transformer.h.28.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 14745600
},
{
"name": "transformer.h.29.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.29.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 17213440
}
],
"md5sum": "2b2fe34b085032f830cf1d859aed7dea"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.29.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "5f0fb2169b2db2884339ca27b573afb3"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.30.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "05684b76cc17f59133468f6304a37a2a"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.29.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.29.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.29.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.29.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.30.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "f004002fa77babc9bacbef6bd4c537c4"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.30.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "8b4bb303f6ba925402cc85e0e02fa590"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.30.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "da7f32d22d118f2d2036eb8d4a033094"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.30.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.30.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.30.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.30.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "6a932e7e28e54e35eeb6e950444ea045"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.31.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "e3c4c0f5ed3cda6b9899fafef30079e7"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.31.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "ebc40df92ff2f178e7789073cd557ced"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 32788480,
"records": [
{
"name": "transformer.h.30.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.31.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2457600
},
{
"name": "transformer.h.31.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 2467840
},
{
"name": "transformer.h.31.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 8202240
},
{
"name": "transformer.h.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.31.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 19681280
}
],
"md5sum": "031a136a4da6e8061efa3f8128ef024f"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.32.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "0c48aefbdc8deed7f226139252d44caf"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.32.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "a0135db99fe883b0a7c9220d56cc955d"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 29501440,
"records": [
{
"name": "transformer.h.31.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 0
},
{
"name": "transformer.h.31.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 1638400
},
{
"name": "transformer.h.31.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21299200
},
{
"name": "transformer.h.32.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 23756800
},
{
"name": "transformer.h.32.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 23767040
}
],
"md5sum": "03d808f0f5b264664611e3934c409fd5"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.32.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "c20a8b6e9c39cf8ecdd51f4a9a97e1f9"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 45875200,
"records": [
{
"name": "transformer.h.33.mlp.down_proj.q_weight",
"shape": [
5120,
2240
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45875200,
"byteOffset": 0
}
],
"md5sum": "362d86d928a17a4c05192d4f80924775"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 28692480,
"records": [
{
"name": "transformer.h.32.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 0
},
{
"name": "transformer.h.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 11468800
},
{
"name": "transformer.h.32.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 11479040
},
{
"name": "transformer.h.32.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 24586240
},
{
"name": "transformer.h.32.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 26224640
},
{
"name": "transformer.h.33.ln.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28682240
}
],
"md5sum": "e9ca9aab629381e699491d3c6a53e7c1"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 91750400,
"records": [
{
"name": "transformer.h.33.mlp.gate_up_proj.q_weight",
"shape": [
35840,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 91750400,
"byteOffset": 0
}
],
"md5sum": "2a8ea13ddf2357b5a35014745644def3"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.33.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "01977e615616fd6a79a484ac4ddaa1e2"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 31959040,
"records": [
{
"name": "transformer.h.33.mlp.down_proj.q_scale",
"shape": [
5120,
560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5734400,
"byteOffset": 0
},
{
"name": "transformer.h.33.mlp.gate_up_proj.q_scale",
"shape": [
35840,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11468800,
"byteOffset": 5734400
},
{
"name": "transformer.h.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 17203200
},
{
"name": "transformer.h.33.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 17213440
},
{
"name": "transformer.h.33.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 30320640
}
],
"md5sum": "6b808e7bcd73a46ee1579f5ad692e18e"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.34.mixer.qkv_proj.q_weight",
"shape": [
7680,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "d035aa71b72ea033a696800c3d670fc2"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.33.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.34.mixer.out_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 2457600
},
{
"name": "transformer.h.34.mixer.out_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 15564800
},
{
"name": "transformer.h.34.mixer.qkv_proj.q_scale",
"shape": [
7680,
160
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 17203200
}
],
"md5sum": "47480ec3d3fcc7638d36f99bc3f38684"
}
]
}