diff --git a/README.md b/README.md index 32897cd3e640101ba184f8c4ccd896981de3804a..12e048bb805b706349783df5d9e6d2d2a25f7079 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ --- license: mit --- + +4-bit [OmniQuant](https://arxiv.org/abs/2308.13137) quantized version of [CodeNinja-1.0-OpenChat-7B](https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B). diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..12a98694bf3dbf213d189451d772b42275487ac7 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "<|end_of_turn|>": 32000, + "<|pad_0|>": 32001 +} diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..a79f68e6770546ef50cc9e7e6c932c22e54ba81d --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,4303 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3734421760.0, + "BitsPerParam": 4.125436579647682 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65540096, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32002, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65540096, + "byteOffset": 0 + } + ], + "md5sum": "a2dcbae1b9da5dc0a6c3c0d92204e084" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "83d236bafe1f4fe2df57ee3e59210e8a" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33267840, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32002, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048128, + "byteOffset": 0 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2048128 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 2056320 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3891328 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3899520 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 3907712 + } + ], + "md5sum": "48e3f5aaee401df406fe8b5d5cf02340" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d24919297c1578f08573f50253fe4fe9" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f5ec61db27a552416027d179fef9f6c3" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4bd600247c94f2a07e2e95dc5ec41b41" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27156480, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 917504 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2752512 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 2760704 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 15343616 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 15736832 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 24125440 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 24395776 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 25313280 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27148288 + } + ], + "md5sum": "b1329eda509598a774fd4ffa8ccdd29e" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ddaaba3dd2ed60ee094e7682c2b8f583" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "ff13def09c82fd9016e9ef77075c762f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "9d3bfa844f96298ecc8e84df26c517c3" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9292f48c3348c599d53782c349b4e617" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "63df8f737221b1cc9173a7ac474cd49f" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "0f0197ac9549f84d969cbaec0f0660ca" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cb4b28bff6be68ca6893e928f3071333" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "bc4f8aa5d6ee3fe2901b288b60601e1c" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "147d5b047be05dbecfe132adc7e64813" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a4ea4ca3db831ee6304e2c02a8002713" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "45cca5221fe0d57a749fb26efcf96b83" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "6ef55cad6cf4f72306a52c5de6ce1c77" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8a2878cd12ab54d6231891c069dc35ba" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "27618303ad03a70296c46204f9725e31" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "ef620e9ab6162bcf91e0cfde0560bb2d" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "68de916d90e822962bb31d946137bff4" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "60db7ee525b9499b86ab26100f08fab5" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "83c282f64c73925dce544e4fd0c6852e" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7ada513ebbab639fe17a6c4840841871" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8dd322f5237ccf283b0ca57384a38287" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "3129e08f6435a5ce8e4c9bf2e20dde0e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "88fef360fb0e484a5a3a201d99f9f7fd" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cb7c069a499e444a63bf609c7a188bcb" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "e48ae4d277f0b432863a476c60c9efc6" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 65540096, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32002, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65540096, + "byteOffset": 0 + } + ], + "md5sum": "41d72cfa6a998ac42c933bf43736317a" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9e64acbc2d61b8ed5a3dd3a5a9169f45" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "62434b3ea3974556e447ee65787d4470" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 26452096, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32002, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048128, + "byteOffset": 21635072 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23683200 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 23691392 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 24608896 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26443904 + } + ], + "md5sum": "a1b48063008d27e101709f5006b1128f" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "19695f3f8aaba5fcf125612eb63805c9" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "268c624d8ee69b80747bb33083cb30a4" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "1984db4e2bb4168ac7a65c480b274d61" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d04caed45004c643f8d1a6d5235caaea" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 23461888, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 21626880 + } + ], + "md5sum": "c5a5e65b57e2903078cea7639cad59f3" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "03fe4d45b5cf91b06c3c2263c21eacfd" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b4a7538eda60756accaa7cf4618e8f05" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "d165ad44d05dc0dc7ed4ff241759c4bd" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5618861549f5fcd09ed1d990cf300315" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "11efa9e49218f042fde3fe697e67ea85" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "096f2675cb591877f05aa9e05df32ca5" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "52629650bed64ec10a5ed703e49f964f" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "61c1743e9c2de3cdfdaba1e18cf46271" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "69010f229afb21730902412d384ec180" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e66d0309b513e97dea61c68324123cae" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a79b714c7517f425bd3eb56ee19403d0" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "c76c812a70ede4a292bdf386a47ab204" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "21e167050c9e0138b0e842204baf5cda" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "672494dac3786bd614aec3c2a7c029b8" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "cd65106c1e03eea4d13ca24a4480913b" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c6911a644e228972caa090b068f12dcd" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "24f73714b9da83336bebcda7d55949c0" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "e0639ef629ac1c2cec30d2b2086a7b00" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "aebb52c9079bf033c9e16cadd3fa3190" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "895ed5fabf987aef512a17fe2b6b1272" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "763c8aaf3a8763b5b8a9c247ca6a372b" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9b87be10614eeb9adca298400f881050" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d7219b4a29ad692f86591f88e35ab950" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "e96ead9b66405440997a0519ff39d442" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b8836e4db86947f3b6550d53ead4bc9b" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "47cc59797fa11e7373f0f1db4a2ada52" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1a9976f5767df8b96af3a01209358023" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25329664, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22552576 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22560768 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 22568960 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 23486464 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25321472 + } + ], + "md5sum": "27848f08616a3cd909ebe1ec992c9272" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "58c8374ace36df6fdb5f10cc6d582311" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "29fe6c53bff6d5705480af000df0ead1" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "b52dd4e2e63daaf261d59ba810e43b02" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fab048c7c9b1083dfb7e59c5a3962cbc" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c2e74e61c3ea2634422e62cee2590d6d" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "0b9976a40db6424b4f81e024320717c0" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "242a037008ab1204fc40d4f7952b0e5a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e6d0f7b44d4b7ef692c2d549b80fc491" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "8647388f778b2873ffa8b9d0b8c7c36e" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "12302bbfe427e1469df507bcdb4dc116" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cbe307e63a18a96e30ab0f80dd39dd84" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "e6fe0da3100611b3970a86159812f829" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2fdbfc1430bd6fa6ebeb0ffed8c73d32" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1bb49941227586f7fad8c2aa8b1638bb" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "126fb265d8f6539d906c9259becd3448" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5353ed99e1cd6cfa5444fb9126f92bd3" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0d199d1394283d1db29155ed95efc958" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "8878cac4d8bd34004a5592abcc04f4cd" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "54a45fcfc0bb6d50a2913e3938154140" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d76e70e1f0d151791d3316b21378c8f5" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "9243ab251bafbbc27b1b6d24ac04f502" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "dab33c7cca7f7f2ee991673a9b17ee74" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f18a1327483a207cc5e9917d5e04ac63" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "a51aabb9867b11936905257a6b4a03a9" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7dd081c9448894e3e4a255a51fae7ec6" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "68686552ab9d8babad7b7cce542d654b" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "879a759c941e8acb778beb20fab0acd9" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5329091d33c187d85ad5df711d023c16" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21626880 + } + ], + "md5sum": "85f6a1e37b4a8a750a31b73216d1b965" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 21626880, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + } + ], + "md5sum": "cc10962f06a5ccd4789d86fd011ef70c" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..edbbdca859c7d63538e1b7a279910c72011dded1 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217b137978385b9baf58173c103a01511bf4646d653964df09c5bcdca83f1cb1 +size 65540096 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..f696a492de6284914bd547ac3ee5c5339b7843cc --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ca84455efbb4687299c7a6902a10ca0cae196e68fff24909a4b2eb9d3869353 +size 58720256 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e661b864d9e9b0ca70c1c3e1895fe133d635d67 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73298943eb9a227d85fcfa5f15f50d2c1ec91fe7cf491a8ded807f1e2e824b9 +size 29360128 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..c3064641c60a6c732ea434c112f183a1ed948e8d --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3077db1b31d351ad22bcd80e4d2817fead8dec3dae3d981adcfd994eeff88f2d +size 58720256 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..84c21c1f6e38161f1c1ac466ab1745c12a1cda3d --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69a8b274b86f5f35a810cf300263800d52151ec1936d1ec2399265b37f9f37b +size 24395776 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..efd2256f5e665fd4dbe8049f61a5b130d1a8ec95 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72aadd1b34ac82e96eabbabd5b4b4f2e494c61f7413bff11a03cff8efe8e86be +size 29360128 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..7319e39f0e539587a75dc7e66d65ddfbb75385e6 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17f78136b0e5e3828b04a49ca7cd993925e5f613ec00e98fa79cc658a8fa4e6b +size 58720256 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..7453c36e39cf255f55e4edf2d33b648cbab80b41 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8715d60904b5d25e29a8f59ebea57233f12d5979c14aebf0b469637c2c1bc0c5 +size 24395776 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..1905831e7f04008e7aa53218d209cfc9542e2a05 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26ce990db14408e163f600d79418d0ec53cd3af0d950a046405d95c3eb8c0c4 +size 29360128 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..bce1051594079dc1e9bf00d966ff284ad89d2996 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41731a5ba727577c9be4c814745fec75cb8d6cbe713ac2b1d5868cd78db75534 +size 58720256 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..40a12bcd30be4ed036902a78aa38e8e1f7263516 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd936f364b7a2f46959faa4578a66d2f8d9bf7dfd26e816b8c42ebd54a1ad013 +size 24395776 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..b750fd26038e0d88c3466c90b7511bdff9cd07e4 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:582d01969007bc4cc4561a29f09835a8136d3afbc929e180eb99c0f946bbae70 +size 29360128 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..e37cc1db65dac88acf51eaab1780eb5f99209394 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff8a9252ed54331c805a3c2a9dd166499233985541299ce3d3850f7f8f80a0a3 +size 33267840 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d197aa46c1e0526a04b57d05d01faa063b833b9 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8035e86dfed010270714a160a33f35d91381563e1dc98f356fe8a9e74fd9a26 +size 58720256 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..a65dd5b83b108a48417ca765d2c3420573b150c3 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104f7aa98e148f35ff6d6c5ac8e31458afdcca04677121175276f92737d879ee +size 24395776 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..eba7e55edf49088f71bf5bcdb27f9d69f29e918d --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f093d37cb488137bff98411178bfee3ac228fe1de32912a3bbcce2c441924e87 +size 29360128 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..6724a743a4fd4c2bcb80c1fea7e56d7b77b1eec2 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2175cd4add0c21fadfef1c9e65011b3b1271950eac9372932c26580dc037684 +size 58720256 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..77d9e9d6c4f1a7aabf60e09601f75a078d203140 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2341a2a168447fbcdc71aefc4c04dd0572a837d38e77e79c60fde8c11c49de05 +size 24395776 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f26ce56267a5dfb9f63acd98bf77076f07e99e1 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed7c6e4bb84975261a7609bfe4050707efcf03646a3182cdf07bb0680587463 +size 29360128 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..63cfb146fb887f6cd2d320a689c9290ff53913cf --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28bee287f7a5d23aafc83c9e6266e2a3277773ca78a7638d9802d0a930e9eee5 +size 58720256 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..01a59a8919692ad0b6b1beeed3cab2a5f9a54eeb --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a94ad550af8ebc796d2e82601f3f241853d63e4c8056878b529d197804c855c8 +size 24395776 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..d7f4680441d46bfb0dc7b2b0a13b1cc23b746eed --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6233399a6b5ad4c44b2a0279acef93ec242b51caeb2c942e86228118c1a93c +size 29360128 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..a22bd8ce9eda4806228208ae1d4bbce27c5d1d87 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a0317a6b26cca1f503c89a8ad5a4aabfe1c4db1903b53651c838101f10b05a +size 58720256 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..12468d0c75fd00fdbcdeee9ac7356d6fc69841fc --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3c9371fb762908b755968ff7db5824fb1be05d3ac9c88ab5a294188abc174d +size 58720256 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a84deab26c2813559ca2ee44d28b53d6c8d2feb --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c1f2e630cbea0551494d69791d9abd427f73078359461c4c4988b9c29993bf +size 24395776 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..21f9cb56393e52d9e029e3846a9e13cfef149b8a --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:940cfdc668beeb2783735c3d2c18616156a2e84cbb01bea689fde2e0f46bc955 +size 65540096 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e202b6fe14c3b8763e10703e1d0659e89031e77 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f4c7db6ca84b66d15c30475eb9c84f1e416f44e0ee123236563f2deafa6806 +size 29360128 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b0491b75f9db02c17b5168a892ffa49820b9a16 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d8b861dc22a4d64e5d397cf38b02eaf25e6fb565538f0940aff1cbc321ddeab +size 58720256 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..396c4dbe393ae4f0f36212ede8070935f18c4106 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3109c85e8c5a3511b87556c7f77d4393ed56fe4a810a00845834d691fe27f97b +size 26452096 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b8b1849045bfe000df8bffec5e257ef949325c4 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd442d3fa4ac9f98a9c066dc1e08ed9a9136f2d30fbf23bcade1fbcc97a78b01 +size 29360128 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..9557cce71493d0cf3e4d4903c39387c65b3f3a2a --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4243dc300320464cd540ecdb70982ab4ce0c52cd16f8f2c37f24f251010bcdd9 +size 58720256 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d8b3c21845b2e6089f868a9d2e9876034ddd5b7 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad8cf9f8b009a8b28d7258175c32e6fd0caa204accf9ef045985c7d4d794aec +size 24395776 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..1232aa5913957e3f0c36de7666c359fac6520183 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0f9e10608fd9a51a49455e4816d2d2cee640dbd8d2d5896fe184ac570ec8d9b +size 58720256 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..89a52fa250bd23b4447efbfe00a4d9734a08669f --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8b097c35489aecb340576f4dec9717bd0bdc40adfc932857fcedd5c1fa977c +size 23461888 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e9e794f9438d4dde338dd318aa433ad313baaeb --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aadadcfa1531295464b2299ff4280ce80babe59f3b3f587058c8887d2fc79319 +size 29360128 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c0de0c744e5c03d222757366398e620f3df4d78 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6162a34bc3426d6ccf0693968a018d81b39202b414d34314dc220b60b6e9f776 +size 29360128 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..fca0ce340ef196481987577709443f19e4498fb2 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:175cb23ccbcfb3cba00fd2e93f4e8594baec7bc7433f695ecd200102304256da +size 58720256 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..c3a25a47284638790f65a7f80a275077e3889523 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c381122d63faf3f2779f1f1b913dc2de3a733369fcd779a9b8a5ed469360a595 +size 24395776 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..bce8af47f9f9d43a46da91eaa80f8a39505ee1ff --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff5468f487bb6fc8cab2f0950ee039498bfdb318d7b77f582aca4ccd82dae103 +size 29360128 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..e13763a9e69ff30f4d4842892a0c963531e159c1 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cdb751147fb44d68582fc4a65ed1b86d7567164c65ce421cb461ef0fb3f8da +size 58720256 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..f16795c28623e6fa2002b920de68081c2b09fd0e --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b4603ac5e83ea745bc581843a2de59bd2ac07538cec55364abed39597f163d +size 24395776 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..72cec572fb937fa21b348fc40bc56f5994f93284 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:888566a5c272a7047b59a969f06fdc77fe1d60e31c7c12bfc97628aaea7e565b +size 29360128 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff815b52bb0f32b759201cc175a1557072dede3b --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ebbe88d687041c32280eca8532f08e707b36beece4ec6b53aef38c55f57c87 +size 58720256 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..df33aadcc0f8d6dcede161ee132fe3c8253f7cac --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f9d0e9627556c47fc50484b8ecdb8b7065ff453b1078a41681912e8bc10dc8 +size 24395776 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..89122527a5ce7ecf2fafe12f76df55133d9608d6 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c24a7406323e6f1301ab3f9f78c3082b39f1deb9a87d08e51affaa371a851c +size 29360128 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..e51f5b0702929ac21828376c891d073d261e4730 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44399b7a6f41809baf42caaf9b28130aa53efc8c93ca1bd4963cdb9116d733b3 +size 58720256 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9cebe5d6cc9a6882cafbc5768ac4ca10fe32783 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0ec6fdae35a7b28ac7252f0af9ff5cd83c12736b170d7a44d1fab31dab05b9 +size 58720256 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c83b38082b3d1215adb883e6dba374501cc3a7e --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:731a086d3d6359fe61744b8497270f204212b2ad7ba7cd550717779335a4e0e6 +size 24395776 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..a4432895f82ae8054d562e33ac23867b69e08496 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2bc51dfb7644f3898a20634a0d2752e9384cd7fbabb5979ebd6b2aca9c67148 +size 29360128 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..418c16a5947d87c6977b86339f2e5027b6484b9a --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee48dcc34350e07ec52cccd8e4ca833869a37ae198d97c154743e7c37accd0f4 +size 58720256 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..b98401f78dd9ba695a4bd941b261f6c18c7393ae --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b135c6f31612927dda060c64f8f73fb3ff6a5976a403b7d879418934d4316e +size 24395776 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fa8ffb6b9b61af3179e20d8b10e10b25f7ec588 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b7c7d861a8602cd0214d80a9bbc14e3d3c11cd374a9560afd1e82900afb3e13 +size 29360128 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1598c9e5a99b6f0857fd1e669673916f3d34ad8 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7c5a49ab862a6330833b3a071638c4812b193c2ca9d744949c110612d76da1 +size 58720256 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b91dde6a2901d2ce3625794069928e030221c93 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8192e9953adfe18eff91033856995a3ba03f7b24d6c001edb8315b64053b4558 +size 24395776 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8ad793a88cf683d8eeb75f4963d7550741d8512 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9b4fe8dfa07dd9d06bf426cde829c0d7831eee9135eabb0dc401f2cab7b6ae1 +size 29360128 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5ae842fc5c2ee3046890fb99555e83b3260d83c --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f818e514ee53daa98de65b65598a0d028838b52e88bc7cdbc5d28d104276825 +size 58720256 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..3be809e21b9a14920385bd7e2afaa629cb360743 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ce29716ecf4e5c01047ba047171de24fa96bf9dd41343b13ddc34bf34fa0ef +size 27156480 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a3ab568ccf8fc39128b5c1e32be195f398d4ac0 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed353e9db30804f4140d9101b7c5169048b88632535fb64bd190c9f24ab71a4 +size 24395776 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d6191366855e50a8d962629781d6623b717e5d3 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba68e6839351050475dc1a3264d9b44d079e3b9cc4e1e063fd39e091e583914 +size 29360128 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..215a1640b6ed02a352e56ec9b5c25905da9e924b --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:299bbfadaa79af6a2272ed0f88c4d67250fb277374a735798c90b643ba682bf0 +size 58720256 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..69d873200121440cee1bbd3a94800209a8d614b9 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03fdb0cb059ac3d33af6daf964027dace0453cd7462b771908f281df7b78421 +size 24395776 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..75718cece8352e00df688240ddb3a29056f67d89 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6b7ab18f92e8caab5dab2acb81fe6012c41dcdcdc7bc09fa2f53e8aae7d2afa +size 29360128 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3198042bfb269ae588b917c54ce98b85e47baea --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7857c48bca4c247628ea3b524270459e4890578110b6b4b47bc7390afd559143 +size 29360128 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8cfb0d83c255f639f8928017d8917043340b207 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ecde51e54eccfafdd9e2da8f4a86ea8bed165f5c70954e8dc6202c66e3548a4 +size 58720256 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc1ca2929838512e286cec0cd2e784e0cadc61ee --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fddcca501a70cba5a138a9caa93788797ccfef8680d4639f8c21b96fa01ba3a +size 25329664 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..865a7a946a9c796e4dea95ceaed33d6e38d7002e --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3e5e852aed00452e9e5f8128b95c0f4d5ce1b98275c60788454a479841c4c6 +size 29360128 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..00e79ffed9700f8dde995292622fbf70b6ba7c64 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef3e5272cce8ec0d545dc3e997f6bcf66d0436180f61f53383fa0e90329a7b9 +size 58720256 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c9fb0f837c69c30c8e1fa77f155d93f365490d5 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56570a06cc676cfe5f4934cdae1b73640cfaf50174d210c63330a4ea7691d1a +size 29360128 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea2371ca7afa1a940948cccecd3cdc3ca1b7a952 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62403ae423823f761a8ec45de7f4ec9386489489f01f39ad8e2bbe8df35b8ac +size 24395776 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..6657d9b0d0847a1441dd3b9feb9a524fb77af5d2 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8754608e65a9ce594ef976e32d82bc29c65cb696a06a701825aaf777324f923f +size 29360128 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..154ad86fb572dfc8ac875ea197f923f4674239d2 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1791533cb829bbe3eb343878d00e3969d672cc774dbd6591d83a6310810b0df +size 58720256 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e543bfb3ba6f1b90f4752f8742ae4141f33045d --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912051fe7f8fa0d424bf1d0585820ca2d39cf84cab0548c9605a0cf63f4d2417 +size 24395776 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..98d45cefd5c7786d8e07fafff48fdd64643d8554 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0649050ddbd53b3e7ab6f620714a795b84fd529f207f76b120376869a6f44ad0 +size 29360128 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..da0fa23795ca767c01be2207d5193a51a60d6807 --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0af8a02343b3b6f18f1686f6499d4ace7c8b1b1155616d771d8dd4fb26e6d3 +size 58720256 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf4770ab2929d7366643aed8302dcfaa9968bf1a --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5ccddb2342870ed730249ee26c852cd59c7e4dc9e4117103df56260e4b769cb +size 24395776 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2ca84e5e38a6e23f9936627178a61c1a632ca85 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00452d99e2e3c32f05a56cee56a774d9891582c759743caa3ccda4216d341c67 +size 29360128 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e4478d867739d6407043d38089340fa872a88f5 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1699beaef91afe05a9716ad77cd912c2e10f98a2ef423c562037e3669be2aeaf +size 58720256 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fffa5fa0bed57f69d40136e91853827db65ad1d --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b3a93292034c14118771ede75fc4864a3f28c2d9f05045db66624113d2932d2 +size 24395776 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..390207ffd8be1e34c07140cfd4cb776a37c2989f --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97a4324fdbe90b515cbe04a842d4478d88ae714f0318b9cca1e6493c4c7d87de +size 58720256 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc1774f8f0e06e89445d5ccd82cee5835aa14d20 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12ea308e4e058f989160bd5191ce809b1fb2596c905519be6d0e97b81c1ac55 +size 29360128 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..72de98b207f2de559cd3b43f9a36d80f4d6662b1 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4a830fcaf2e615a00481720ea827d70db2219f31ce5633d4de8f0a375e9002 +size 58720256 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..378b322725a9cfed900c36a40b868f6d9037ca4b --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a80b86e78586edaf4f98099437872d6198e5c8b237a04ad6534f8742817e727c +size 24395776 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c23688a8dce1e5fb7cf8d1781268054ce062b47 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bcd7f845b25e595771af31bf43a5fa34f420d5d34b4f4b068bb2070376a0fc3 +size 29360128 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4d8bceb9fd6ece9be3d6490448e15d0e71f762a --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd9210df70bd74e1b47934b34a200926460b94bb047426bdddb9df361e9b942 +size 58720256 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f85c690053bfc8aedb14691dda19e3383b54556 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e576dd7f0cd5b23d43b6eb36cf375eff5b7e6a626fe4f77a8b1280714f21611b +size 24395776 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..1efbebe70e2ba6a483e55503b8b9fe06d2970c09 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5bbedf37564ff29047f1356e765182f41b27091f6b8aba25096b3804ddfb9f +size 29360128 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..99005d9638d5496caaa5fd04946c546da2eb052a --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62bbcbe38281599f2631d9860e1d072aec0ff818b194af3e44aaefb20cf1d1e +size 58720256 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..bfdaf1f12aa6581167f99c59d0cf846ece0b2c03 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d78acaa73a514c8fbb41fb0a734473c807c32d837531110948862e83b908df6 +size 24395776 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..72bb094af27c2331ee718b2c9e6e2c355adb01d8 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d57dbe1e4fadd7d072fb8070edc875fd496671913d2b0b3c4f4924816f6669b +size 29360128 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..176988bd66bf7b3929f3f1df17e41211252befbb --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9275477f28b1399ab98b2eb91c1ef446c5f3045eebaeb4339da66df447f1470 +size 24395776 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..93c1873f8ebb19aa34cfd5af65d929bab99f9762 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45fa7ea8ac45491d1f26441658fb29a2d0445974a81b26495ed778878259200f +size 58720256 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..c88d2d206670a1f00c28b0c17e159383683613c5 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf9bfdc3625be17ef127b1a328a16f2c11c101eda42c7eb7d5f70493e7819ef +size 24395776 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e065fcac292db773f0c0301723b229cbe1bb8a4 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aed70b94b592254fcfef6c725fb98da182fce0aa6f11d74979c7a99d22a6360 +size 29360128 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6d99c72cf263ee6960931a563c131cc9b4cc3ed --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987e7efcd05f25d73a9260f807da58160abfd3ba175a851b4f75f0b323fdfa07 +size 58720256 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..de0b41a12074a27299058d20eb6003b0fd3a56c2 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbbe9f777260739d83ae82ccea9033867ab863d6b174c55f9007b78b745cbd6 +size 24395776 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c09ae1ff8cdd31056956316f6c42d7941f0b2a6 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ef8dca7d48fe7db3bc3b918023ae5920ab31b552598670cffea24143806060 +size 29360128 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..508704c2fa446f1ef8a5a9d409a2705d8d28a62f --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a161563257c432461e0bd1f5aea14592210bc67715c33fb9aa284aafcd222a81 +size 22544384 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..67c273237a1020d5b9c36f249b3416788c24fcaa --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccb3d372df4609a4c0e048f001318fea5f2635f1347f81bfae4a2b2fc6cd58b +size 21626880 diff --git a/private-llm-config.json b/private-llm-config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c8c23c840f431a32925668399836121f066f815 --- /dev/null +++ b/private-llm-config.json @@ -0,0 +1,42 @@ +{ + "model_type": "mistral", + "quantization": "w4a16g128asym", + "model_config": { + "hidden_size": 4096, + "intermediate_size": 14336, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "rms_norm_eps": 1e-05, + "vocab_size": 32002, + "position_embedding_base": 10000.0, + "num_key_value_heads": 8, + "head_dim": 128, + "sliding_window_size": 4096, + "prefill_chunk_size": 128, + "attention_sink_size": 4, + "tensor_parallel_shards": 1 + }, + "vocab_size": 32002, + "context_window_size": -1, + "sliding_window_size": 4096, + "prefill_chunk_size": 128, + "attention_sink_size": 4, + "tensor_parallel_shards": 1, + "max_batch_size": 80, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.7, + "repetition_penalty": 1.0, + "top_p": 0.95, + "conv_template": "openchat3.5", + "pad_token_id": 0, + "bos_token_id": 1, + "eos_token_id": 2, + "tokenizer_files": [ + "tokenizer.model", + "added_tokens.json", + "tokenizer_config.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a095bf823907e335f8cafe119c7199388e66fb5 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,64 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|end_of_turn|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32001": { + "content": "<|pad_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|end_of_turn|>", + "<|pad_0|>" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": true, + "use_fast": true +}