{ "metadata": { "ParamSize": 445, "ParamBytes": 2222822400.0, "BitsPerParam": 4.501497946035061 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 194478080, "records": [ { "name": "lm_head.q_weight", "shape": [ 151936, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 194478080, "byteOffset": 0 } ], "md5sum": "c3e9d6c33f156c21a7d4aa1aabe20140" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 194478080, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 194478080, "byteOffset": 0 } ], "md5sum": "f24bd841edada926b1bdf5a7628c6c68" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 24309760, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24309760, "byteOffset": 0 } ], "md5sum": "4929a87aedb9dce94ca343bfea1befb7" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33162240, "records": [ { "name": "lm_head.q_scale", "shape": [ 151936, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24309760, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 24309760 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 24314880 } ], "md5sum": "5eeace80b3449b997cd503ca9056cda2" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32092160, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 1105920 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 18800640 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 21012480 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 21017600 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 21032960 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 30863360 } ], "md5sum": "fcd57d38a484b7ef1fe8d65b5bcd52f1" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33551360, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 3691520 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 12538880 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 13644800 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 31339520 } ], "md5sum": "e5005ef4912dd09e9ec1c598f56fb543" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ae42282f108fb585809ad6c53936c787" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26956800, "records": [ { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 5120 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 20480 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9850880 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11079680 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14356480 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14766080 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14771200 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23618560 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24724480 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26936320 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26941440 } ], "md5sum": "62cba6f40711457fce1e1a7ef934473b" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "69c2b88960ffa81d56e1840e7d6313f3" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "7d062ea11e26664d25dbd9c6ed956b1a" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "00bdad9477a2069a84733fa9a41cea74" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "52054698012966698c9c800381fac35f" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "01ab6ea004e12b8f24dbc1781c9da92b" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "afd636f7d4a778a77ba27ea0d32667dd" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "07054519cceff55193e4b3466a9de282" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "9f48b4aaaa4067b1ed088629b83aacd1" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d4613f5d96a59ba51021a772ce147e15" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "14f549e41346b253782dcaa0dc8b5d52" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "54ec764f189559cd4c8fbe8f6e531df9" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "5a148d8c913b4637519c5145c9dceb9d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c4edc332889d20125202949e236e7da3" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "875e6a6e016aa93d481f9b9213678fff" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ff058a135e3dde2f85108795d4155fb4" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "d1843f31d174acdf34360a3239f80b75" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e21763103fc1ac0a230c07e66988e695" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "adc3c2220368d0df014a7495bbffcc20" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1958b95c37d4019d1a3bcbfdabdfebf3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "0fd7f9598737ee79e02545d9904d7ad5" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ff75698217adacbbd1f004449b1f2c37" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "8b846bfba9b69fae849c2036449d6277" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3fd3a93c99504e7a51da96cf6ef06d47" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "55527b116c6cc470042780e7dfdf81b1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "8cf1441c5e6dc697f78c1e104d773476" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "28c5e9285bd3ef8c35c0a176b2db9b2c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0e23d1829142a240347097fb8842f2af" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "0009e02fa270772a0c69f86e483c4cc2" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6dc454761156189a8aa7b63ffe3b2174" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "15242419611f19a4b8b02b23dda09478" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b4869bdd51d1db3fc60d901438451d9b" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "13a74b94fdd380f971b6c061d8ea7e7a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ea3c05a7e131a966e3cecfde6d4287c3" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "114a0b47e7ac4312caf647b38d4f9a6f" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e0b159604e469642c61b962083024d89" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "b8bf6785f24ade67f8b2fd7359da956e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3d461879ca83df8c8c4833d3806bcc60" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "e8f97a666a66b143c26a553f5548ec5b" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f579c1d4ed1988f7cb894fd7dcb2f7d1" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "51ad70ec0ab01f458e1906106aa94b72" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "48dd8d60a0e4fcddcaf4f2c383fbd0b2" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "db3b75264014ede0b092fbde337b7e80" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "884f40ef9d4291063da28fb4056f980a" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "f7bb94bf44235da4500bacfdc6cd3ea4" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "fac20210e4a2197d17f52f17303ab1ff" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "e93e5b4c83b0a61114347d87ef0623bc" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "62bc40a1075f946e1c08548151c3a7e0" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "865bd3ea9bb7856dd47ff029765bd48b" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7081ca9671acfceb7f5d65553d592893" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "a948d14e82c2cb8f7eba3d8f4db9b61b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "dc5bbd920427c74436ec2ad881e05f47" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "673bfa3cd6796d21de363141ca59d934" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "86146f29f1edcf0556aedfb3488479ee" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "53cb760e1a70c97e57998b447a6bd2c3" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "7d2ea3116aca9b1b7e035f197eb69046" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "74d37dff56b8e1ae7e0c3dfdad0dbfb5" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "4dd36960c740f3081022ad3e094806e9" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "dd7f6a16b300317606dec91e9b0010f7" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0b186e7154dfb607d3641b64943b4e88" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "9d41c8040c08778155500dd03725a44b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d28fa20bacb9a18dadd36cbc7d32518e" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "1d7bfdd204c4ba7538d49579cf6875ed" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "631eaff454cdfe910020247dde81a3f5" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "5464caf27c74761a7e7bd6170aa628a9" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b77f59a22acb13e53dab87501e0e4fea" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "63bfe1e304873d9f132d9c1a74d9c528" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9eca403e3152dcad61f60f664e91b32c" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "f1a38ab0222625abc9fd27f1c8fc593f" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "754ff8facf6f79fe93d660da4ae354a1" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "82967b78a426c0ed945d3943c2369502" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "165c83c894c64dbae6f5e6baebdb7772" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "34cfb6db23c3b9e4505fc516a8bc84fa" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 13824, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ccc46ef1645b4a9e8d9d025bb0215993" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 26936320, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2560, 864 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14750720 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2560, 216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1105920, "byteOffset": 23598080 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 13824, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 24704000 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26915840 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 26920960 } ], "md5sum": "b20564c77c4bab0433f0c14bf2e43033" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 14750720, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.norm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 } ], "md5sum": "c21a0d5b00ddef9b73a7ff9bcee560bf" } ] }