diff --git a/README.md b/README.md index 7b95401dc46245ac339fc25059d4a56d90b4cde5..764d76fa75d80eb41b1420e768258a3adb05a9b9 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,33 @@ --- license: apache-2.0 +tags: +- mlx +base_model: GreenBitAI/DeepSeek-R1-671B-layer-mix-bpw-4.0 --- + +# GreenBitAI/DeepSeek-R1-671B-layer-mix-bpw-4.0-mlx + +This quantized low-bit model [GreenBitAI/DeepSeek-R1-671B-layer-mix-bpw-4.0-mlx](https://huggingface.co/GreenBitAI/DeepSeek-R1-671B-layer-mix-bpw-4.0-mlx) was converted to MLX format from [`GreenBitAI/DeepSeek-R1-671B-layer-mix-bpw-4.0`](https://huggingface.co/GreenBitAI/DeepSeek-R1-671B-layer-mix-bpw-4.0) using gbx-lm version **0.4.0**. +Refer to the [original model card](https://huggingface.co/GreenBitAI/DeepSeek-R1-671B-layer-mix-bpw-4.0) for more details on the model. + +## Use with mlx + +```bash +pip install gbx-lm +``` + +```python +from gbx_lm import load, generate + +model, tokenizer = load("GreenBitAI/DeepSeek-R1-671B-layer-mix-bpw-4.0-mlx") + +prompt = "hello" + +if tokenizer.chat_template is not None: + messages = [{"role": "user", "content": prompt}] + prompt = tokenizer.apply_chat_template( + messages, add_generation_prompt=True + ) + +response = generate(model, tokenizer, prompt=prompt, verbose=True) +``` diff --git a/model-00001-of-00073.safetensors b/model-00001-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df4a3eb85af15a6256e35067097196d42bb5f65c --- /dev/null +++ b/model-00001-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5afba22a8b5a10511ba27c43e5f840fb58be94db8d8c61c83cfc6f0c5413e773 +size 4995184776 diff --git a/model-00001-of-00083.safetensors b/model-00001-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f1258ed70fad8bf0e4d6dd2df4282444c0ef53d --- /dev/null +++ b/model-00001-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52cfb6b029ec1cac22ee887fa18d5e91b0e2a9731f20082e3247ceb65ae2765f +size 4901155076 diff --git a/model-00002-of-00073.safetensors b/model-00002-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68a06c9b9757bf4bde5823ee736cae6f2acb4548 --- /dev/null +++ b/model-00002-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77447e93d421f32a315a06629cf39e9fb118dd4482efabdee9ac545a8b01ab0b +size 4997775232 diff --git a/model-00002-of-00083.safetensors b/model-00002-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a84d713b31ede62fdace4d2824f7c5fcc75123b --- /dev/null +++ b/model-00002-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e0a65b84b6e27097c39bf51fd2ae091ab6f2b88da7bd9da81afaa8ba8ecd391 +size 4465578998 diff --git a/model-00003-of-00073.safetensors b/model-00003-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7698afc225ff7d52c9b44b1dc9562477c2b8180b --- /dev/null +++ b/model-00003-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db145492b24a239309d4372cee9502f85b4b9c099c0fcd20b3d12f5e03807853 +size 4999329648 diff --git a/model-00003-of-00083.safetensors b/model-00003-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffedb190fa7ef777cc867bf51e48a530023ede84 --- /dev/null +++ b/model-00003-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fbc8677434db966cb58b1b6c27ce677f161929ecc8b7de6a280cee12c55d5b3 +size 4483120096 diff --git a/model-00004-of-00073.safetensors b/model-00004-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fdbece69f3e294cd6338cf1bdca5f555ba63e8d --- /dev/null +++ b/model-00004-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4c5bdf4650ddbacd45305e3eab07f75535b88831ebf70138efa274121e9798 +size 4999329640 diff --git a/model-00004-of-00083.safetensors b/model-00004-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a06a6fa9a891722c0df599119c01f091817095a6 --- /dev/null +++ b/model-00004-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ea30630454e332a2b2928c3d92354e056ad18d5a9850380979ec6f68a38709 +size 4697621266 diff --git a/model-00005-of-00073.safetensors b/model-00005-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68062c0390341e25833e0ca6276ea4fad576ee67 --- /dev/null +++ b/model-00005-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc23323d6d2c4d6c760588d893ce2b982959f8282252cc0283dec91c04af318 +size 4999350432 diff --git a/model-00005-of-00083.safetensors b/model-00005-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09aba0be2cddfebefe97535cca22c0c4b1f294f4 --- /dev/null +++ b/model-00005-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b5dd50047f7d6065254b5fda80564d8dc841ae66b055f2cdfee09dd75cc339 +size 4845992687 diff --git a/model-00006-of-00073.safetensors b/model-00006-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b586f4759a67c6b49ba3619d305664253cda7a1e --- /dev/null +++ b/model-00006-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1460479dd51afcafb608dd511d2936e2e35298e8d698df12c6771cddc157de +size 4999330368 diff --git a/model-00006-of-00083.safetensors b/model-00006-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdf23c9e3dee717b20f764edbbcb33e224cabe50 --- /dev/null +++ b/model-00006-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a92e99551976636fb62961234ee81c5deedc7b93ac609b23798c8694044d705 +size 4845992603 diff --git a/model-00007-of-00073.safetensors b/model-00007-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0ca2335f3250ddaa226836a7fb77f6caa8e8007 --- /dev/null +++ b/model-00007-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ebd193d9ecb2e980628002bf22f786ec09650378458f93b552226008438acb4 +size 4997775528 diff --git a/model-00007-of-00083.safetensors b/model-00007-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ee1cf1277db992fa23e4fc8d4103564b7181afe --- /dev/null +++ b/model-00007-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:112a74e250ddb6b72d475a9de71b9fe8f838236256e3776c77e5bbcd6a41b6c6 +size 4697621264 diff --git a/model-00008-of-00073.safetensors b/model-00008-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cae8470255ee78c2b261ace612d626f6a44e3d3b --- /dev/null +++ b/model-00008-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:939a66289e32e51a05ef223de15ef2dcac1e2df8a1f7a14ad0cba418248bd535 +size 4999329640 diff --git a/model-00008-of-00083.safetensors b/model-00008-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d268bcbc30f8e285415d269ff26558cc90c9d4e9 --- /dev/null +++ b/model-00008-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8af213f7c5a35d5ec6d60717272ee372e6ee91619677378bb5ec78542241474 +size 4845992705 diff --git a/model-00009-of-00073.safetensors b/model-00009-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8138424603c649ce03a0a60d5947c88233555b3 --- /dev/null +++ b/model-00009-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d780df3809663fa68dfd6b849c36fe034a6407b89abb3d6bd039c37c56e66c60 +size 4999350120 diff --git a/model-00009-of-00083.safetensors b/model-00009-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b8ecb2d894e283aaf85f6081ec7961290e8a073 --- /dev/null +++ b/model-00009-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3478ce2dd2bd11aeb8e75add6c480ab6a71beff355b12a93307846fffa5038a6 +size 4845992627 diff --git a/model-00010-of-00073.safetensors b/model-00010-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4a39d0e24b86afd80bb1238de1e10ef7646b6bc --- /dev/null +++ b/model-00010-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:252372d9e15ac50e38f063bba6b1821e47f6e462f3ca907c940fce57105a5b5f +size 4999331168 diff --git a/model-00010-of-00083.safetensors b/model-00010-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9397f25e8a8f66257f5748cd0f018a0afd2c8d4 --- /dev/null +++ b/model-00010-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a9bf8618db97a1d415cea4f3f0e319deeedc955e56380d1efb5008784afddaa +size 4697621264 diff --git a/model-00011-of-00073.safetensors b/model-00011-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e92b610d79779b15c1f848ac71839748073b39e --- /dev/null +++ b/model-00011-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d342323f912911a6846a4d7f1b10662eafbd50a7207862c641ce97dded67814 +size 4999333528 diff --git a/model-00011-of-00083.safetensors b/model-00011-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..381d48805eeb75188f5611b1cd28c02b2e5e4dea --- /dev/null +++ b/model-00011-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f276430176f4e17aac8785cc52a61f7c75f288a3a75b650abd4cbcc48d3996 +size 4845992697 diff --git a/model-00012-of-00073.safetensors b/model-00012-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88e0afc7d03ebc1ae35342d0b9ea473f8ffb0475 --- /dev/null +++ b/model-00012-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd227bd4fc3889537081aa7fcf49add38c920f08a68f879a032c62f4f995b885 +size 4999354120 diff --git a/model-00012-of-00083.safetensors b/model-00012-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d116bb50373776a1a241a4b8fbaf481f4c016eb1 --- /dev/null +++ b/model-00012-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff7d6ceef96b1b096d893f45634980c4ba5b5f9696d9b08a6f6fa66fc9a73c5 +size 4845992643 diff --git a/model-00013-of-00073.safetensors b/model-00013-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..452777077ad3e26eadd47e8d20e0f70268c1118b --- /dev/null +++ b/model-00013-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e600b82550800e94ed6e7229315d8753d76786d850728cb2bc121b282d8b67 +size 4997778176 diff --git a/model-00013-of-00083.safetensors b/model-00013-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63abeb684f657032d5a003a4090843a9565c1c89 --- /dev/null +++ b/model-00013-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73573c98f0617810b1c060b2cae6e7e32a0906e59a994b2ad954d93b020b30da +size 4697621270 diff --git a/model-00014-of-00073.safetensors b/model-00014-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ce3bcf13a289fbaa2000daaf54803fca61a17b5 --- /dev/null +++ b/model-00014-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bcfef3131ce3cf51222078569ea864011e1fe42381c6dd4a12922b48506b3e3 +size 4999332808 diff --git a/model-00014-of-00083.safetensors b/model-00014-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cfbc0575c44c507dd6ea2b13fd6d435950fd642 --- /dev/null +++ b/model-00014-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d4e75223718fe88e22271fc11d36978f9a5fc32eae0602f1b94fd536b95b71 +size 4845992701 diff --git a/model-00015-of-00073.safetensors b/model-00015-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f15e86ead82ebd757edc9662ec071a5c53ce1152 --- /dev/null +++ b/model-00015-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f146d7f33416e29dfec4696de5755dfdf907caf27b1bd9797f078c458fbfe1f2 +size 4999332800 diff --git a/model-00015-of-00083.safetensors b/model-00015-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2b8a7c4958c3462d60619767664c621e5b34be6 --- /dev/null +++ b/model-00015-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba2fbcc10cb99460d18aefec2d16ee72c70a9525f02f02c77c81d237ca9045c +size 4845992645 diff --git a/model-00016-of-00073.safetensors b/model-00016-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34a4e6b105d691fa29422caa1e1dd4b6038b7ab4 --- /dev/null +++ b/model-00016-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f19eebbc5f83810f16ebbfb20fb18a6cd0920a0615f49952c5c6db1324ddf4f +size 4999353728 diff --git a/model-00016-of-00083.safetensors b/model-00016-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c4a085ffce1e9c8be251641e6d9712e0a9c6f76 --- /dev/null +++ b/model-00016-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97726d8ae1e5803bf8848c51727278c36b064cd4d1a32b44b6941ba28eefe764 +size 4697621266 diff --git a/model-00017-of-00073.safetensors b/model-00017-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17fb16f8bda3edf33d3b2a594c1982dde5b30974 --- /dev/null +++ b/model-00017-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bd8c5817f180d905dbce74cdbfaf32d4968e913067a51cb90714b7c763be51d +size 4999333528 diff --git a/model-00017-of-00083.safetensors b/model-00017-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..809d95de36497d369dbd974cfb8adbf17c86f82e --- /dev/null +++ b/model-00017-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:249c5e61917dd75f61a5ec96f24d6880561245d01588ea3fc35b8cce827f8733 +size 4476130045 diff --git a/model-00018-of-00073.safetensors b/model-00018-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b382cc981fd1755c90606f04d3484c0bf34722ce --- /dev/null +++ b/model-00018-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8574a66d4e69cc62d7aa5346ac149caff1dd648668525b45cfa632b84eee56ec +size 4997778592 diff --git a/model-00018-of-00083.safetensors b/model-00018-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70b8e41b2617ddb639b4aaa50dfd67048c63d30d --- /dev/null +++ b/model-00018-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87de84e8d679caebe11bd61525f81b59e4a8a862d795b222a09d760d0905851 +size 5294085231 diff --git a/model-00019-of-00073.safetensors b/model-00019-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20b463bd367286c57cc5b3f68cecc5ecd4935d52 --- /dev/null +++ b/model-00019-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96fde1105521c41a4eb35afe3f8d0724886d87fd6e78ed3e0bed0b9c908712b +size 4999332800 diff --git a/model-00019-of-00083.safetensors b/model-00019-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2f11a3695a2a3fdadc0ee41dcf6a3e4f7770d57 --- /dev/null +++ b/model-00019-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec6763c00efe237c0c6e55cc52a986de6ac175d6c3bfa58926669308cdb5adc +size 4465579059 diff --git a/model-00020-of-00073.safetensors b/model-00020-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5cd2032f1abef61d865e6c764a30218916228cff --- /dev/null +++ b/model-00020-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c741784786c1ec9afb3f8e97e5e2cb076eb8932380cbb543dd6b315940de05 +size 4999353280 diff --git a/model-00020-of-00083.safetensors b/model-00020-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..770e684319123b058e8e9d0a1c8e522e0f5ea38c --- /dev/null +++ b/model-00020-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f450291e05ed8e53361c9fce5a4d5e367a38763dbe03c945371db11b3d7cf6e0 +size 4465579175 diff --git a/model-00021-of-00073.safetensors b/model-00021-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0830dd6b0aeef2d4fe07a139cd30d5c73be01970 --- /dev/null +++ b/model-00021-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b44cbfbdce45a9ae34b78f0b267d7dba1a3c7f9cfe2d23ba6adb32f121aa941 +size 4999332960 diff --git a/model-00021-of-00083.safetensors b/model-00021-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61740335da503c45d8258c915a133d502ebfb874 --- /dev/null +++ b/model-00021-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95649781ec30c36e23ef5c3f0f175353b91be252666ae2b2c7c71ec3f9fdce4 +size 4465579127 diff --git a/model-00022-of-00073.safetensors b/model-00022-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bf8b6db6a502509491bc9f35d6cefc4670f08e5 --- /dev/null +++ b/model-00022-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76279e0e0ae17de8987037638536a4f6aa3fe6b1e73a5c1504f7b630f8928492 +size 4999333528 diff --git a/model-00022-of-00083.safetensors b/model-00022-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd255567629bdcddfd7e724ee6c41e28fdfd7a96 --- /dev/null +++ b/model-00022-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5582d5c8102ce7fba0b1366b665e7743599feea3905ab1b35b875b9f8b3287f +size 4465579171 diff --git a/model-00023-of-00073.safetensors b/model-00023-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd37e2d9f20603f1cbd00b69f0eb054fb0ccc6e4 --- /dev/null +++ b/model-00023-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ff5f39985a920204f8cf0734350cc6338a5ab32811e1d203737aabcfe9265d +size 4997778888 diff --git a/model-00023-of-00083.safetensors b/model-00023-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc19c56c5767a3b564a4991a869fb119487de1e4 --- /dev/null +++ b/model-00023-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065597764fcecd35063477a21d16a7c1b3906121521015bc88a6ed468746fd0e +size 4465579175 diff --git a/model-00024-of-00073.safetensors b/model-00024-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d19fe6341771af4df78bdff8ae2b3d7e587e1026 --- /dev/null +++ b/model-00024-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4ea3bbb69ade327e423da0dd788a202fcd249d2d2144c5402e354ee879f961 +size 4999353280 diff --git a/model-00024-of-00083.safetensors b/model-00024-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1f4d192cbcf94beecebb7b8aad686e0c1241c6d --- /dev/null +++ b/model-00024-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae5e90d5e1b189e109e8616c6e1da765917ad1ffb5b0a794089c70322f48512 +size 5287662759 diff --git a/model-00025-of-00073.safetensors b/model-00025-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfb51a628bbbdec44525462c208bce98ee5fc498 --- /dev/null +++ b/model-00025-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47fd3f066e87591efd0f007d75fa5bf9556fb66e9637cc30b8def80b2c485db0 +size 4999332808 diff --git a/model-00025-of-00083.safetensors b/model-00025-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..553173f5b586799f95f33667170106c9ede04f4d --- /dev/null +++ b/model-00025-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54c3f5047f60b40fb5bde6f13cd48deb8994be04af326344e4d8c56fe87e6d3 +size 4137220771 diff --git a/model-00026-of-00073.safetensors b/model-00026-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18661c5209c5473ac6aec0045ae31f19717c370d --- /dev/null +++ b/model-00026-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c02bfc889446ec4fa834e4e91c88d004d58f5344aa537ece7e1f629e63a6e182 +size 4999332800 diff --git a/model-00026-of-00083.safetensors b/model-00026-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be6a75ae78fdc7619efc186ab5ca31523e5c0e50 --- /dev/null +++ b/model-00026-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef436a970d8dfd470b705a360d057367e8ffe1f82e2e77f93891653f9a87a3fb +size 4697621272 diff --git a/model-00027-of-00073.safetensors b/model-00027-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a960eb7e396de71735df42c186f077e13649698 --- /dev/null +++ b/model-00027-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:426ffce4d1363e0229d8a304cd0e2be859957dd8f0be6d0b9f17ffcd4f87db8b +size 4999353864 diff --git a/model-00027-of-00083.safetensors b/model-00027-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80ef51dddbc645eca61fdbeb4008577dc9672408 --- /dev/null +++ b/model-00027-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d0394035b107b236e91fef37285805c61fc5da9bd6402606331821e393fe19 +size 4845992755 diff --git a/model-00028-of-00073.safetensors b/model-00028-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40014b116c6d429138237e6f25f9153cd15e9f2b --- /dev/null +++ b/model-00028-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c0e7bb4210d163e1042e808bd6537792db55a8cdd04208b06ee4295a7985d36 +size 4999333528 diff --git a/model-00028-of-00083.safetensors b/model-00028-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bde9383d950b7ae1ff89c4f167a1a04e9818f53d --- /dev/null +++ b/model-00028-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56dd6dafc164dd4445c7d907d66308cce1fd5f3965b767047ab809490a79614e +size 4845992669 diff --git a/model-00029-of-00073.safetensors b/model-00029-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef94186467e73b3a359a9c1a44755dfdb625c3ed --- /dev/null +++ b/model-00029-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9730503005d5523e02d183680f773da49edd7e857457844b6d206b7f994ff81 +size 4997778456 diff --git a/model-00029-of-00083.safetensors b/model-00029-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7552a6e6950f9d3827840fb14248df0c123132f --- /dev/null +++ b/model-00029-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a4dbd784c29b00e609d69fdcc1af1e512192963af94ca302263074642189e1 +size 4697621268 diff --git a/model-00030-of-00073.safetensors b/model-00030-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64007890212a4af72601a60237ced0fb8d61d395 --- /dev/null +++ b/model-00030-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55cb4cb8c7b8a400be2c62919447101e81fc134afe8e6ec52088b0f37fa55988 +size 4999332800 diff --git a/model-00030-of-00083.safetensors b/model-00030-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b66dc62f97c142876793e6b5cec4405142cddb2 --- /dev/null +++ b/model-00030-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b81dd497abb0873f258f08cdda06fa30eeebeb183e79b0ef264d64813896bb +size 4845992757 diff --git a/model-00031-of-00073.safetensors b/model-00031-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bdc251e4b050af61291a01672e519e11792a2df --- /dev/null +++ b/model-00031-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd763c8ed7a5aa791e23d280ba5a847857c0a39ec561b5ba7ff877cbae7f1bb +size 4999353280 diff --git a/model-00031-of-00083.safetensors b/model-00031-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14fb22078a38fb7b5099e55002d8452a78de3bc4 --- /dev/null +++ b/model-00031-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd08198a0ac5e7011cd4d22030fc34331de22ac582323d8755a6a6ed16638e6 +size 4845992663 diff --git a/model-00032-of-00073.safetensors b/model-00032-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..330235cf0d4ef85a03856fa241e1200ee4a3c743 --- /dev/null +++ b/model-00032-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0727fa56189d2851c1e75d82a7df7e8d53373473251373bcd651878f906be5fd +size 4999333096 diff --git a/model-00032-of-00083.safetensors b/model-00032-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46b1891402341165c0f58182ec6fe6e1fcd91d0d --- /dev/null +++ b/model-00032-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffbce5bebb0a9ae1f1a4b88c88962a0f5212d4b1aaaf50b68e798cd4ddc80b3 +size 4697621272 diff --git a/model-00033-of-00073.safetensors b/model-00033-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0961e3fae21cd23065ac8d0fc645895ffcd9bec --- /dev/null +++ b/model-00033-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e961c667fbd929b9d75dee9eb1700ba225b40e6e3e1d60b06ff28881ccb4de4e +size 4999333528 diff --git a/model-00033-of-00083.safetensors b/model-00033-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..655ddb1d11d68157387ba55cc2b2199ac31cadb9 --- /dev/null +++ b/model-00033-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:265b8a4f79a41a6023efa7ba25cdd51d70326a9a91b7a5ad70f3accfe68ecaf7 +size 4828451975 diff --git a/model-00034-of-00073.safetensors b/model-00034-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60baa359f2694dc3ce43be49e4cea4e7a481081a --- /dev/null +++ b/model-00034-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:253826ec8238e57bb3c4fd0019574e09fdecd50f36264f37cd61703ace779d46 +size 4997778752 diff --git a/model-00034-of-00083.safetensors b/model-00034-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bed35e7e5fbd98ee6b38dd34eb01fdc72da2573 --- /dev/null +++ b/model-00034-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98fcb4f7f2cbba842868c2e22f6626ea36128137a2d5f68f59f00a79476fef4f +size 4483119923 diff --git a/model-00035-of-00073.safetensors b/model-00035-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5958c81ffa292c30cccde3779d83267517edf1df --- /dev/null +++ b/model-00035-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e89c0d4ad618e7e5d5ca3df1ee8b337cfa6a80db645097b36ac9257ff4a92e +size 4999353280 diff --git a/model-00035-of-00083.safetensors b/model-00035-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7082a7e25c2600ae845844c61edffd2b3692c6d7 --- /dev/null +++ b/model-00035-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e97810bb1361d5b1a4b98a5bbd16138ca317bf959c00e44b939b1ba66f7d49 +size 4845992661 diff --git a/model-00036-of-00073.safetensors b/model-00036-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aaf111298b0859374e276cf26d701f95bed26c19 --- /dev/null +++ b/model-00036-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f191a38df677510268c0188fdb70485ee82d41f3b1db22ada9f0c5de7320f5f8 +size 4999332808 diff --git a/model-00036-of-00083.safetensors b/model-00036-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af9077ad4f078d9521e4a17f66fa8acd15f121ce --- /dev/null +++ b/model-00036-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15cae63c55cce4377809863c7db4cc0d54308b0ca53281b33b3237d1086a5653 +size 4697621272 diff --git a/model-00037-of-00073.safetensors b/model-00037-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..668a19f654b1f415fb836c280de1b9117670504b --- /dev/null +++ b/model-00037-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf7194996aaa2f299efc1d8de468fffaa986d91f1ae872cb09bb32c679bd34e +size 4999332800 diff --git a/model-00037-of-00083.safetensors b/model-00037-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23368fbf5d32d91af319545e73cfaff97d133d92 --- /dev/null +++ b/model-00037-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e558ed5a918f14f154b012da748ad501fd2080da1eef165fa995457b85ed707 +size 4845992755 diff --git a/model-00038-of-00073.safetensors b/model-00038-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..029c5b5d9b072107ac5f072a7ba13beb1ebda6f5 --- /dev/null +++ b/model-00038-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de6ea5deecf1a5ae24a69134303d86063e1cb1069cad1e0785aaa55bad14e5fe +size 4999354000 diff --git a/model-00038-of-00083.safetensors b/model-00038-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a8a2940d91f98a2568005ae738a88475b55f618 --- /dev/null +++ b/model-00038-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a349424e2c4c129d26a221c117efc89a9fee310ef3a3423db83e6072b38db2 +size 4845992657 diff --git a/model-00039-of-00073.safetensors b/model-00039-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..849e0d1b08a2738feb4ed0fbd7fce75773508a74 --- /dev/null +++ b/model-00039-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60efead34f6579e753195f80bc372a1a17a9649fa84b3983b8e1aa1de1439455 +size 4999333624 diff --git a/model-00039-of-00083.safetensors b/model-00039-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..408d8cdccb8d6d5eb51f4d69649feaca501dec43 --- /dev/null +++ b/model-00039-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ab782e1855aea3c4325f1cb134c22a2391873292813bd273dcc89ffc74c4ee +size 4697621270 diff --git a/model-00040-of-00073.safetensors b/model-00040-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..548e49a874d7a03bdeb529e4f7c30ed46e96c4a0 --- /dev/null +++ b/model-00040-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:344ec15f93397bea3d89ac9b306cf1361a29e0f91932acee75d15fa781720763 +size 4997778224 diff --git a/model-00040-of-00083.safetensors b/model-00040-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb53be810dbb58a6046759dd9a560dd6dcd9162d --- /dev/null +++ b/model-00040-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19b4e38122e71d4789ebbac4d388f01f99a552996cdb8e27524d5f5068a06c2 +size 4845992721 diff --git a/model-00041-of-00073.safetensors b/model-00041-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff1867702f6d99607deb5e33f463dc08eefcd49c --- /dev/null +++ b/model-00041-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb62d2ac05b776122d4e1f1b767a4e57eac5765538705b1a32f39376063471d +size 4999332800 diff --git a/model-00041-of-00083.safetensors b/model-00041-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a5dae978bdb418273120ece36ae21efdb1f2638 --- /dev/null +++ b/model-00041-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5b6ac4f59690756191ac8411d9b13bf9059309f78c8780a59e3c7fb5590c76 +size 4845992675 diff --git a/model-00042-of-00073.safetensors b/model-00042-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a205da9482a5a770d8c299759bc397d96e7a6b1d --- /dev/null +++ b/model-00042-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa0f62e8e9830829e8de0bdec49862e44154a66180edb3b55594465e7fb09f0e +size 4999353280 diff --git a/model-00042-of-00083.safetensors b/model-00042-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b44c5221a679df0b8c6fd688c94eb127cf8a2a8 --- /dev/null +++ b/model-00042-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0acae0c7cb69fc1981d2292d7d2b77dcf90da316440ee97dfb955b0491b667e +size 4697621272 diff --git a/model-00043-of-00073.safetensors b/model-00043-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f739a5addaa64e411b92fb228f1ea74492a2aff --- /dev/null +++ b/model-00043-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb43c8b2af091bc61311c03f4221b910a5f35c4deda882550eb8470f07cee51 +size 4999333232 diff --git a/model-00043-of-00083.safetensors b/model-00043-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bb6c7eceaca8fa9c48f902ac12564e33174fc26 --- /dev/null +++ b/model-00043-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f6269b6d622e4ba29fa9a7b51c5e64dd697a9b4f6fdb985988e06fc9727bcf +size 4845992733 diff --git a/model-00044-of-00073.safetensors b/model-00044-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2860ce2a8273bc82eddac275850d88bbf0c5a251 --- /dev/null +++ b/model-00044-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207c914cf19f14d992155de1da0203d5f381080a7549a36a15dcd49834602ca6 +size 4999333528 diff --git a/model-00044-of-00083.safetensors b/model-00044-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd463ffe68742dd477d3636294a9945e303cde2b --- /dev/null +++ b/model-00044-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315316b24bfae42dfd468698908fa809a2a84300c5b742886a463013494f49b0 +size 4845992675 diff --git a/model-00045-of-00073.safetensors b/model-00045-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cea3b5d9b2cea8bc7db85c0805ca01ed820db438 --- /dev/null +++ b/model-00045-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222e99670adf7ab43714749d8fef3a96eab89b82458f662baa9a39de3350c746 +size 4997778616 diff --git a/model-00045-of-00083.safetensors b/model-00045-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4cef6938cb1e56aa4e698541990f2a89f05b09c --- /dev/null +++ b/model-00045-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc19be07dee2628ced6714e1499ca1333cb9f3a7c147da17c4cf25cabde4a7b1 +size 4697621270 diff --git a/model-00046-of-00073.safetensors b/model-00046-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a892bff5f770932e83a946fbf2af2a1f0cdeea01 --- /dev/null +++ b/model-00046-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdfd36e8c4a73db68098d3c4dd3ba3f920bc1ae723a66cf0052d324bd3a29bfd +size 4999353280 diff --git a/model-00046-of-00083.safetensors b/model-00046-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35a6f4dc5c7fe1bf3da92f635655fa20d1ab3662 --- /dev/null +++ b/model-00046-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ea7a047de7095857d989f88fc18d94068735e33997bb335e0347baa8cc10bd1 +size 4828452007 diff --git a/model-00047-of-00073.safetensors b/model-00047-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd975ecc168009bc0542b4611e2c01056739d02a --- /dev/null +++ b/model-00047-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446a883e44a0834fe8278abe89e57e58c01e4936456f5e510a75196565b0b0b5 +size 4999332808 diff --git a/model-00047-of-00083.safetensors b/model-00047-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e91d0e95cecc3b3f4906afb4def99d55251df645 --- /dev/null +++ b/model-00047-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9424a217f2f77858a60c20606384eef6f031cb8e5d4568dff8ca251b73a22b +size 4465579175 diff --git a/model-00048-of-00073.safetensors b/model-00048-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52212f372ae26fc79de82873317b33f1db699896 --- /dev/null +++ b/model-00048-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8311eb05675eee176902deeffeeecc4ea7c366ae2f9712f93715e1579fd3a46 +size 4999332928 diff --git a/model-00048-of-00083.safetensors b/model-00048-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cb929e3116048aaf9d8ab5dbb7afc3e5937ef41 --- /dev/null +++ b/model-00048-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827aa56db98217d970dacf5dd2c729f7d186522debc19d3a657bda0fb05deef7 +size 4483119869 diff --git a/model-00049-of-00073.safetensors b/model-00049-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..400e9a75090e59db03d1f4128dd2f62281b397f6 --- /dev/null +++ b/model-00049-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99f74b9b89ed71cfcbe4950ab4915e8234488bae703cafb371919834ec7a7516 +size 4999354008 diff --git a/model-00049-of-00083.safetensors b/model-00049-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67ea691a29126e3ee71ab466a362f25d690795a7 --- /dev/null +++ b/model-00049-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b947fe72237337ff0cfe9ffc84c6a751fa4b8de954ea05bd7e24d55ef3a909e0 +size 4845992657 diff --git a/model-00050-of-00073.safetensors b/model-00050-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..743daa7f411deffa25688b236cf43ccc267fd817 --- /dev/null +++ b/model-00050-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4689976092b0187c44b28fc9b46b97f664fba628a6577c18bc734a9b181b1997 +size 4997749984 diff --git a/model-00050-of-00083.safetensors b/model-00050-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fcba91ce61d6ab8574a990eef592bbb994816a01 --- /dev/null +++ b/model-00050-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826c086b6d95845d3fe1025ebe15e8ea21809d5480cec807b8769526f071fd05 +size 4697621268 diff --git a/model-00051-of-00073.safetensors b/model-00051-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1f6abe055ed9cbab86feb379e80bc11a56001af --- /dev/null +++ b/model-00051-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab66f8ca2f772a44070da8b4b8948f71bbfc1ae2569715bacbdf4d186ce400bf +size 4999361728 diff --git a/model-00051-of-00083.safetensors b/model-00051-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6edd43a14da5329588c9c71514e1958681e37a2f --- /dev/null +++ b/model-00051-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fac001813b4538c8c734aeccb93a9053476e00d64df401afc363f19aac36d3e +size 4845992723 diff --git a/model-00052-of-00073.safetensors b/model-00052-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..932d74a04aa5ac7c2798908fc196b22220941a91 --- /dev/null +++ b/model-00052-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:928c61185dfa00ee6e8a6d72d37a88efe3ec8fc1a37090b0ddde284774b77e8e +size 4999332800 diff --git a/model-00052-of-00083.safetensors b/model-00052-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71fc87a40e110d15b3e54d8c87b133a6919f4d84 --- /dev/null +++ b/model-00052-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7efecfcdeb90be43fa70433c5dbb155df6e0f7ef8926dc41d52bf272bb92a7b +size 4845992679 diff --git a/model-00053-of-00073.safetensors b/model-00053-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3235d33f0dd4344372f01a59f89015e3dcb6f8f --- /dev/null +++ b/model-00053-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5c9f83bd1b30e42c6ab49910b8ec3c2eed205ce6e4db7114f5aa301c794eab +size 4999353280 diff --git a/model-00053-of-00083.safetensors b/model-00053-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb1391863631adc34a8ce4ea59ed5d137b450bda --- /dev/null +++ b/model-00053-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:759fd216561b21158ca402b184e10d18a5d5bc1817289522c572e794e106b4cf +size 4697621270 diff --git a/model-00054-of-00073.safetensors b/model-00054-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65115c87f30c57e959a29150224c37a3103aa2d3 --- /dev/null +++ b/model-00054-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6b3c7c84298318c5822451dc575f99fd3f809df79d5fef38b1f750e878272a6 +size 4999333360 diff --git a/model-00054-of-00083.safetensors b/model-00054-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7628da48a4add8c0d87d9cd7f3463bc62c43acf --- /dev/null +++ b/model-00054-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6cf91ded711a54f312035648c6663dff24d3c6f291c443b45dc65171596ece0 +size 4845992755 diff --git a/model-00055-of-00073.safetensors b/model-00055-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1eac3710ce9014c6a711203b0686fe117e8cc0ea --- /dev/null +++ b/model-00055-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c31d883951ae567678d3a6d8871b33f2f58f961ad21642568198100e86beb08a +size 4999333528 diff --git a/model-00055-of-00083.safetensors b/model-00055-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8a7a2a41bf6f96aa3749d6b7213ee153cf2227b --- /dev/null +++ b/model-00055-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9139b91ba16bedfa9e427832850e67dd2808264613c37920b5eb474ba6b772a8 +size 4845992671 diff --git a/model-00056-of-00073.safetensors b/model-00056-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1db47503d3d1a258f2c2689600b740ece3646352 --- /dev/null +++ b/model-00056-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7bf035e5d95ec537fcbd0c0f294a944819c4c7ad7251641b737d311994f10c +size 4997778480 diff --git a/model-00056-of-00083.safetensors b/model-00056-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16b71dd2ee4e28c9ca93694de0c6f5703519a8f5 --- /dev/null +++ b/model-00056-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329d278cf950deb2ca4750acb1c93ab3124664108609fc9aae0bb949c75f8663 +size 4697621272 diff --git a/model-00057-of-00073.safetensors b/model-00057-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ce26dbce71ef6df8339f49b46fcfa536f836f46 --- /dev/null +++ b/model-00057-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90fda138850b979845bea1d105ce58451376d02b3d41d537bd629b42cb3a5dc +size 4999353280 diff --git a/model-00057-of-00083.safetensors b/model-00057-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efeb43a6769380fbf3f017f08f208dde3c0cbfc2 --- /dev/null +++ b/model-00057-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e88bbff7d1fd890750f8c7567f6b9535765a187ead8f9272ca7e8f9df8cea8f +size 4845992755 diff --git a/model-00058-of-00073.safetensors b/model-00058-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..344d854c9da5058fbf629dfd6ba0a1ea2cfecb3e --- /dev/null +++ b/model-00058-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d4af0554f291164588cbad42a861562cce240794f1ba67405d36e7f7f5c38bb +size 4999332808 diff --git a/model-00058-of-00083.safetensors b/model-00058-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba761a226b95f324e2fb3fa1c7a62404f2d153fa --- /dev/null +++ b/model-00058-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:962e3c6ca03b98a7f457156a3c2935db0d66ef1ec2d84fa7c0a5fd2562c5aa06 +size 4845992655 diff --git a/model-00059-of-00073.safetensors b/model-00059-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9644b3ae20e41320d4dbb2390981f24d6370317 --- /dev/null +++ b/model-00059-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb285262dfc815136787b13d85176a6e86f258067cf0a1a28dbddcb9d7cd445 +size 4999333064 diff --git a/model-00059-of-00083.safetensors b/model-00059-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15916bd22caa5efde9347aab39b0e0fa7860a7e0 --- /dev/null +++ b/model-00059-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:825e6f06b5fd5104e070ec945283ce867e743d26d4121c99fef67eb7ea3bbd7e +size 4697621268 diff --git a/model-00060-of-00073.safetensors b/model-00060-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..653f4f366966975e33ef85a703b3994b8db7f91f --- /dev/null +++ b/model-00060-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa5a1b850c4c2539312cbae47a951d6972a38f635871be685489ae8db64002ad +size 4999354008 diff --git a/model-00060-of-00083.safetensors b/model-00060-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27780a4b6aeac669190e50dfee9df0c97fbbde83 --- /dev/null +++ b/model-00060-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c4f67c0fb94d2754d814b334ac4309c9933467bb5ab70723623b76ae2650cf +size 4845992757 diff --git a/model-00061-of-00073.safetensors b/model-00061-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ade6150bc0e63247c763ec16cd148087ff9c637 --- /dev/null +++ b/model-00061-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f675896b048729be4b080268ff348ed0b881872c9d5d2969f4b09c98a16821 +size 4997778776 diff --git a/model-00061-of-00083.safetensors b/model-00061-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af873b950c4e7b2931de2a7eafcce83f875541c5 --- /dev/null +++ b/model-00061-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38725737e815c277c4af2ba0834d01920d6291553fd079370b7a853f882f79ae +size 4845992693 diff --git a/model-00062-of-00073.safetensors b/model-00062-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf888df945495937ce6e8c401a9958095c97161f --- /dev/null +++ b/model-00062-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c661dd2f2a9979ab653d534eb4f8a4df30ef36034e0191ec7d36c650d50939 +size 4999332808 diff --git a/model-00062-of-00083.safetensors b/model-00062-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fb76a2b89677f839bc438f36a3125acb797f696 --- /dev/null +++ b/model-00062-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a131e836e717e6a3f4ce3186d4e632e850d3150fa09aab145f8476b0cf34e6af +size 4697621270 diff --git a/model-00063-of-00073.safetensors b/model-00063-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b06ac553306e05150712c1e45b8dcc7b4caab4f --- /dev/null +++ b/model-00063-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1abb49cf5a938a3d687825cdc262408e81a643f42007f6fde3110c097417fca0 +size 4999332800 diff --git a/model-00063-of-00083.safetensors b/model-00063-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88c3c288d778c2c7e019ede6f282440042a61ed2 --- /dev/null +++ b/model-00063-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea551057c3153d4210a21315b1790f2190306aac47e8ea789a3d23805110825f +size 4845992731 diff --git a/model-00064-of-00073.safetensors b/model-00064-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04c4418a55280a409e934edc81d2d1dbb0ad5888 --- /dev/null +++ b/model-00064-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c2e6308866cc69249943c62df13ac2a753279beda92fcc1ebc69447062a881d +size 4999353280 diff --git a/model-00064-of-00083.safetensors b/model-00064-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1cafa67f5e8559bb89043032ebfd86e9f66fd376 --- /dev/null +++ b/model-00064-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2edd03a5a968d1c21829db53a567ba2581583947ba02c2fe9b257c332d749b59 +size 4845992629 diff --git a/model-00065-of-00073.safetensors b/model-00065-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8433e44170bab1e24e895dfe926c1d6db0dddd49 --- /dev/null +++ b/model-00065-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905865443d1f6c2e9f9ef4135bb0077d1ab6358c839cace16a593219f6254668 +size 4999333496 diff --git a/model-00065-of-00083.safetensors b/model-00065-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89a60e95e0b05aa4b0d668b7c941d26cb1685c2e --- /dev/null +++ b/model-00065-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0cc2cbd0361c1a3782168946217688362353d1b0914af6c2ab05caa3e3da4e +size 4697621268 diff --git a/model-00066-of-00073.safetensors b/model-00066-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..974a396569870eb9319bc9ed12886dd2dab8c95e --- /dev/null +++ b/model-00066-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb0a688576c3805bb9cce1c353c643cda997a9e362b96fef0a52b0850be2f6a +size 4999333600 diff --git a/model-00066-of-00083.safetensors b/model-00066-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df5acf8a908278bc7aa2192b55327f69891b6a07 --- /dev/null +++ b/model-00066-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc1872cbbd880bd1a5110598d232f91338d18ec594b1e7bfa219a4b216dc3eb +size 4845992753 diff --git a/model-00067-of-00073.safetensors b/model-00067-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23cabacd62187e0c8bf58716991b1f1bcfd4b738 --- /dev/null +++ b/model-00067-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6cf8c0737a28896b7330e3d1b4cceb86d7c96b5dd3d504a01a8a544ee70c72 +size 4997778280 diff --git a/model-00067-of-00083.safetensors b/model-00067-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60b8c627c5a206b66cdabfc385e1d4a10a504398 --- /dev/null +++ b/model-00067-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:141f400203c44952c36610053a26b246f848b98b7558ae582c4a632e7bfc11a5 +size 4845992643 diff --git a/model-00068-of-00073.safetensors b/model-00068-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d69ed53bf3e3925368ecfaadf39862a80249e0eb --- /dev/null +++ b/model-00068-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666e75c9a532b9dcd4343b7cb41391f17d6e997256a269b599ecb24577baa50a +size 4999353280 diff --git a/model-00068-of-00083.safetensors b/model-00068-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..174663d16a8964bda8d5cc0afcd86e9503cfc13d --- /dev/null +++ b/model-00068-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:113e861665a2b77ae93be8596985d0679097bcfe6c4877e9a79033dfca8078b3 +size 4697621270 diff --git a/model-00069-of-00073.safetensors b/model-00069-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ab3b0d70a0de4acf8d75da06880e890ead16151 --- /dev/null +++ b/model-00069-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f64d4eb5652dca4b2a30aeab6954b096c602acc3ac754a3792f2bbd8ed0585f +size 4999332808 diff --git a/model-00069-of-00083.safetensors b/model-00069-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..233e0d17b9021172e27fc91c13b1bdd62b1929d0 --- /dev/null +++ b/model-00069-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba45520ecdf1f9255650602b847b480133bbac7c079603f69589e7e5ecdf036 +size 4845992757 diff --git a/model-00070-of-00073.safetensors b/model-00070-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5eed95daa02043a1fffbb858fc8ad636e42ed54 --- /dev/null +++ b/model-00070-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5149d97aac58abb5b900605e3f123dc367611f2e2419bb9f3e9becf98328fa75 +size 4999333200 diff --git a/model-00070-of-00083.safetensors b/model-00070-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cafe6656a2ddf4ebc7f0fb014cac09225430360 --- /dev/null +++ b/model-00070-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac123e1b114254a0bc7c52726fff42b4c4151c2d4c935d9f8b3f3d1e4df49b7 +size 4845992701 diff --git a/model-00071-of-00073.safetensors b/model-00071-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbfe3bf5d561cb57ee2175773e86624c2c73690d --- /dev/null +++ b/model-00071-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1509de5a416b8ece7753ebe3ed065f41c0d4257a040d42ff70e96a64d4487eb +size 4999354008 diff --git a/model-00071-of-00083.safetensors b/model-00071-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e78d4e5e57f920e5a73de9bcb6772d11036fda3d --- /dev/null +++ b/model-00071-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eacd529700206336305f4320ed3bb1f512d1d092defc9bcf3b6384747b5a3f9e +size 4697621270 diff --git a/model-00072-of-00073.safetensors b/model-00072-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83c76593de0d57ff9729c10ea83592815b6c0ade --- /dev/null +++ b/model-00072-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819583dd77ddb68a3f7ff5541d86c7eb360bab6cfe42b5dce1f7ce9f5270cac5 +size 4997778640 diff --git a/model-00072-of-00083.safetensors b/model-00072-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a34936096c5e5c17f2af4c41efa6ba1335bacf24 --- /dev/null +++ b/model-00072-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f043d9f6c08f643d63f9447bd54d99357201b301d56d35507d07cec0f0e9337 +size 4845992755 diff --git a/model-00073-of-00073.safetensors b/model-00073-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0481f81f5b8e0dadb9f6306811b6777fbc41355b --- /dev/null +++ b/model-00073-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8df4985e6116084103d01af8b127e550afc05d0ef13820a8eea6a6a0428f12d +size 4156227192 diff --git a/model-00073-of-00083.safetensors b/model-00073-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b70c312c0b2387b77a2d433a2c49c6ffbe388d03 --- /dev/null +++ b/model-00073-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4e988ada300a6b7714772d654d9a6a11cc6296f8f107628fda29f9f92b579b +size 4845992693 diff --git a/model-00074-of-00083.safetensors b/model-00074-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b70c3821f882610b4ef2bd415966d58d0d499c2 --- /dev/null +++ b/model-00074-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c448e373e18804b0e8a562cf90dc5b46fa5c802c0c6d571aa54a34bb69c36e +size 4697621270 diff --git a/model-00075-of-00083.safetensors b/model-00075-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcaaecebb293161d8d9cf73fd8949cd88faf535c --- /dev/null +++ b/model-00075-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c437d69d7723f0fdb492fbd2ac60935dd9a4a039446c13f06eb08d9acf9d20f8 +size 4845992753 diff --git a/model-00076-of-00083.safetensors b/model-00076-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7316d53eca76ae955efb62ac3ceec294e5f9119 --- /dev/null +++ b/model-00076-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104371aa1d43ae41d5b44b1b842c103329021473bec4c8171517af4e085ede2e +size 4828451499 diff --git a/model-00077-of-00083.safetensors b/model-00077-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6833bacc6a66454a8729ce3e5b214f6615e9de57 --- /dev/null +++ b/model-00077-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b0c4297c4a974aecb33e67f8f85772418e886a5e381741bb1871ee4dcaf887c +size 4483120185 diff --git a/model-00078-of-00083.safetensors b/model-00078-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4012268e203ae91dcc79fee66f1b0a9595f9e94f --- /dev/null +++ b/model-00078-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b653a660bded600fc0da088c2e05e39a10519ebf0dd07e650ded40155df559 +size 4697621268 diff --git a/model-00079-of-00083.safetensors b/model-00079-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6afe34a4bbef049824a4fd9c9a3651bd1ea05a7c --- /dev/null +++ b/model-00079-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5db1465ee733975fe4dc82fc00add45264cda69cadedf4979a1fee1ea390afa +size 4845992743 diff --git a/model-00080-of-00083.safetensors b/model-00080-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..263d238c5304a18a3e87d596e093d3c1e6fb47b0 --- /dev/null +++ b/model-00080-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d48b725b88eccd216cd5af07d9404eda8951d8e1d8828c1f01a85f6573a923 +size 4845992637 diff --git a/model-00081-of-00083.safetensors b/model-00081-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff5275678b14a58e5f525d36a6cc374ad6e787b2 --- /dev/null +++ b/model-00081-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db52ce83c818146cfb6ae984e770a2b8a0947115b447c9d97fbb0f24a944e890 +size 4697621272 diff --git a/model-00082-of-00083.safetensors b/model-00082-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5395cab2d82e5a3304a0863d574f9c55efd570a --- /dev/null +++ b/model-00082-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba473801dcf78003e112029beefb98629fe7c029c9f3033d6890bc90d667d5c +size 2380131624 diff --git a/model-00083-of-00083.safetensors b/model-00083-of-00083.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1bc22d6ad9c9604d0537c915c239c2ecac89fb2 --- /dev/null +++ b/model-00083-of-00083.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da8585fe3781db00c56319811d28f7f099016d73d0e8928b1d2d4155b0aaded +size 3706716286 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 7439adc4a800e53a4ec5cd8bb2f8cd6c30f5c058..a1e2a34c3d4b018454a76a63d2b1968fae96b1df 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eb34714a209a8a0b81a6044e0f93b2bd98c156d0fa74255a5509cbccfc470dd7 +oid sha256:5aa3dfbc630c05df14e80eb583c148f830ba0614eba2f3f5c3a83c603c785444 size 20783505 diff --git a/quant_strategy.json b/quant_strategy.json index 90deef9716ba5d161b7013a23c8d1cc8060ea0d4..f5636e371a0f463f899a2936f1ad60c957c4f648 100644 --- a/quant_strategy.json +++ b/quant_strategy.json @@ -1,11 +1,11 @@ { "measurement": { "model.layers.0": { - "accuracy": 0.9836017141933553, - "total_bits": 2516201472.0, + "accuracy": 0.9817549798171967, + "total_bits": 2379451392.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -18,7 +18,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -31,7 +31,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -44,7 +44,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -57,7 +57,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -70,7 +70,7 @@ }, "gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -83,7 +83,7 @@ }, "up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -96,7 +96,7 @@ }, "down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -109,11 +109,11 @@ } }, "model.layers.1": { - "accuracy": 0.9947275305603398, - "total_bits": 2516201472.0, + "accuracy": 0.9940149370813742, + "total_bits": 2379451392.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -126,7 +126,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -139,7 +139,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -152,7 +152,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -165,7 +165,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -178,7 +178,7 @@ }, "gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -191,7 +191,7 @@ }, "up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -204,7 +204,7 @@ }, "down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -217,11 +217,11 @@ } }, "model.layers.2": { - "accuracy": 0.9892190659011248, - "total_bits": 2516201472.0, + "accuracy": 0.9847580479981843, + "total_bits": 2379451392.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -234,7 +234,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -247,7 +247,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -260,7 +260,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -273,7 +273,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -286,7 +286,7 @@ }, "gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -299,7 +299,7 @@ }, "up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -312,7 +312,7 @@ }, "down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -325,8 +325,8 @@ } }, "model.layers.3": { - "accuracy": 0.9843865929287858, - "total_bits": 32448351232.0, + "accuracy": 0.9904986454348546, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -394,10 +394,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -407,10 +407,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -433,10 +433,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -446,10 +446,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -477,8 +477,8 @@ } }, "model.layers.4": { - "accuracy": 0.9804863141616806, - "total_bits": 32448351232.0, + "accuracy": 0.9895119229331613, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -546,10 +546,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -559,10 +559,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -585,10 +585,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -598,10 +598,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -629,11 +629,11 @@ } }, "model.layers.5": { - "accuracy": 0.9889985889894888, - "total_bits": 49646546944.0, + "accuracy": 0.9867525280278642, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -646,7 +646,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -659,7 +659,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -672,7 +672,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -685,7 +685,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -698,7 +698,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -711,7 +711,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -724,7 +724,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -737,7 +737,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -750,7 +750,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -763,7 +763,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -781,11 +781,11 @@ } }, "model.layers.6": { - "accuracy": 0.9873386551626027, - "total_bits": 49646546944.0, + "accuracy": 0.9837914108939003, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -798,7 +798,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -811,7 +811,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -824,7 +824,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -837,7 +837,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -850,7 +850,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -863,7 +863,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -876,7 +876,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -889,7 +889,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -902,7 +902,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -915,7 +915,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -933,11 +933,11 @@ } }, "model.layers.7": { - "accuracy": 0.9860153485496994, - "total_bits": 49646546944.0, + "accuracy": 0.982880576106254, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -950,7 +950,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -963,7 +963,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -976,7 +976,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -989,7 +989,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1002,7 +1002,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1015,7 +1015,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1028,7 +1028,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1041,7 +1041,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1054,7 +1054,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1067,7 +1067,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1085,11 +1085,11 @@ } }, "model.layers.8": { - "accuracy": 0.9847448625660036, - "total_bits": 49646546944.0, + "accuracy": 0.9805211594211869, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1102,7 +1102,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1115,7 +1115,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1128,7 +1128,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1141,7 +1141,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1154,7 +1154,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1167,7 +1167,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1180,7 +1180,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1193,7 +1193,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1206,7 +1206,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1219,7 +1219,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1237,11 +1237,11 @@ } }, "model.layers.9": { - "accuracy": 0.9817010213155299, - "total_bits": 49646546944.0, + "accuracy": 0.9770258169737644, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1254,7 +1254,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1267,7 +1267,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1280,7 +1280,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1293,7 +1293,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1306,7 +1306,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1319,7 +1319,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1332,7 +1332,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1345,7 +1345,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1358,7 +1358,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1371,7 +1371,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1389,11 +1389,11 @@ } }, "model.layers.10": { - "accuracy": 0.9794488882180303, - "total_bits": 49646546944.0, + "accuracy": 0.9744746365468018, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1406,7 +1406,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1419,7 +1419,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1432,7 +1432,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1445,7 +1445,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1458,7 +1458,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1471,7 +1471,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1484,7 +1484,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1497,7 +1497,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1510,7 +1510,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1523,7 +1523,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1541,11 +1541,11 @@ } }, "model.layers.11": { - "accuracy": 0.9824990088818595, - "total_bits": 49646546944.0, + "accuracy": 0.9777291420614347, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1558,7 +1558,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1571,7 +1571,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1584,7 +1584,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1597,7 +1597,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1610,7 +1610,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1623,7 +1623,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1636,7 +1636,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1649,7 +1649,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1662,7 +1662,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1675,7 +1675,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1693,11 +1693,11 @@ } }, "model.layers.12": { - "accuracy": 0.9849340560904238, - "total_bits": 49646546944.0, + "accuracy": 0.9809753063018434, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1710,7 +1710,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1723,7 +1723,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1736,7 +1736,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1749,7 +1749,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1762,7 +1762,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1775,7 +1775,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1788,7 +1788,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1801,7 +1801,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1814,7 +1814,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1827,7 +1827,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1845,11 +1845,11 @@ } }, "model.layers.13": { - "accuracy": 0.9906857509049587, - "total_bits": 49646546944.0, + "accuracy": 0.9880705887626391, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1862,7 +1862,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1875,7 +1875,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1888,7 +1888,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1901,7 +1901,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1914,7 +1914,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1927,7 +1927,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1940,7 +1940,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1953,7 +1953,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1966,7 +1966,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -1979,7 +1979,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -2149,8 +2149,8 @@ } }, "model.layers.15": { - "accuracy": 0.9876436085323803, - "total_bits": 32448351232.0, + "accuracy": 0.9941651911940426, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -2218,10 +2218,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2231,10 +2231,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2257,10 +2257,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2270,10 +2270,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2301,8 +2301,8 @@ } }, "model.layers.16": { - "accuracy": 0.9869564180844463, - "total_bits": 32448351232.0, + "accuracy": 0.9934399027988547, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -2370,10 +2370,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2383,10 +2383,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2409,10 +2409,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2422,10 +2422,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2453,8 +2453,8 @@ } }, "model.layers.17": { - "accuracy": 0.9861695145664271, - "total_bits": 32448351232.0, + "accuracy": 0.9944614085834473, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -2522,10 +2522,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2535,10 +2535,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2561,10 +2561,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2574,10 +2574,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2605,8 +2605,8 @@ } }, "model.layers.18": { - "accuracy": 0.9839566865412053, - "total_bits": 32448351232.0, + "accuracy": 0.9921934259618865, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -2674,10 +2674,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2687,10 +2687,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2713,10 +2713,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2726,10 +2726,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2757,8 +2757,8 @@ } }, "model.layers.19": { - "accuracy": 0.982039811933646, - "total_bits": 32448351232.0, + "accuracy": 0.9912811828980921, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -2826,10 +2826,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2839,10 +2839,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2865,10 +2865,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2878,10 +2878,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2909,8 +2909,8 @@ } }, "model.layers.20": { - "accuracy": 0.9796012884471565, - "total_bits": 32448351232.0, + "accuracy": 0.9900320051528979, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -2978,10 +2978,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2991,10 +2991,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -3017,10 +3017,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -3030,10 +3030,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -3213,11 +3213,11 @@ } }, "model.layers.22": { - "accuracy": 0.9890681402757764, - "total_bits": 49646546944.0, + "accuracy": 0.9864137352560647, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3230,7 +3230,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3243,7 +3243,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3256,7 +3256,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3269,7 +3269,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3282,7 +3282,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3295,7 +3295,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3308,7 +3308,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3321,7 +3321,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3334,7 +3334,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3347,7 +3347,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3365,11 +3365,11 @@ } }, "model.layers.23": { - "accuracy": 0.9887834941036999, - "total_bits": 49646546944.0, + "accuracy": 0.9857180436956696, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3382,7 +3382,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3395,7 +3395,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3408,7 +3408,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3421,7 +3421,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3434,7 +3434,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3447,7 +3447,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3460,7 +3460,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3473,7 +3473,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3486,7 +3486,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3499,7 +3499,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3517,11 +3517,11 @@ } }, "model.layers.24": { - "accuracy": 0.9874449702329002, - "total_bits": 49646546944.0, + "accuracy": 0.9841050353425089, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3534,7 +3534,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3547,7 +3547,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3560,7 +3560,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3573,7 +3573,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3586,7 +3586,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3599,7 +3599,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3612,7 +3612,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3625,7 +3625,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3638,7 +3638,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3651,7 +3651,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3669,11 +3669,11 @@ } }, "model.layers.25": { - "accuracy": 0.9868336729414295, - "total_bits": 49646546944.0, + "accuracy": 0.9832171808811836, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3686,7 +3686,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3699,7 +3699,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3712,7 +3712,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3725,7 +3725,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3738,7 +3738,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3751,7 +3751,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3764,7 +3764,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3777,7 +3777,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3790,7 +3790,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3803,7 +3803,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3821,11 +3821,11 @@ } }, "model.layers.26": { - "accuracy": 0.986997331638122, - "total_bits": 49646546944.0, + "accuracy": 0.9834447980683763, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3838,7 +3838,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3851,7 +3851,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3864,7 +3864,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3877,7 +3877,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3890,7 +3890,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3903,7 +3903,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3916,7 +3916,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3929,7 +3929,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3942,7 +3942,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3955,7 +3955,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3973,8 +3973,8 @@ } }, "model.layers.27": { - "accuracy": 0.9610166220227256, - "total_bits": 32448351232.0, + "accuracy": 0.9812710456317291, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -4042,10 +4042,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -4055,10 +4055,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -4081,10 +4081,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -4094,10 +4094,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -4125,11 +4125,11 @@ } }, "model.layers.28": { - "accuracy": 0.9852554257959127, - "total_bits": 49646546944.0, + "accuracy": 0.9811981529928744, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4142,7 +4142,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4155,7 +4155,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4168,7 +4168,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4181,7 +4181,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4194,7 +4194,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4207,7 +4207,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4220,7 +4220,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4233,7 +4233,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4246,7 +4246,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4259,7 +4259,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4277,11 +4277,11 @@ } }, "model.layers.29": { - "accuracy": 0.9843449779436924, - "total_bits": 49646546944.0, + "accuracy": 0.9799808135721833, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4294,7 +4294,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4307,7 +4307,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4320,7 +4320,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4333,7 +4333,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4346,7 +4346,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4359,7 +4359,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4372,7 +4372,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4385,7 +4385,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4398,7 +4398,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4411,7 +4411,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4429,11 +4429,11 @@ } }, "model.layers.30": { - "accuracy": 0.9839022597298026, - "total_bits": 49646546944.0, + "accuracy": 0.9797754119499587, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4446,7 +4446,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4459,7 +4459,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4472,7 +4472,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4485,7 +4485,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4498,7 +4498,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4511,7 +4511,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4524,7 +4524,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4537,7 +4537,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4550,7 +4550,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4563,7 +4563,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4581,11 +4581,11 @@ } }, "model.layers.31": { - "accuracy": 0.9827811672585085, - "total_bits": 49646546944.0, + "accuracy": 0.9786722971475683, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4598,7 +4598,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4611,7 +4611,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4624,7 +4624,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4637,7 +4637,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4650,7 +4650,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4663,7 +4663,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4676,7 +4676,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4689,7 +4689,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4702,7 +4702,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4715,7 +4715,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4733,11 +4733,11 @@ } }, "model.layers.32": { - "accuracy": 0.9849323070375249, - "total_bits": 49646546944.0, + "accuracy": 0.9808280722354539, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4750,7 +4750,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4763,7 +4763,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4776,7 +4776,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4789,7 +4789,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4802,7 +4802,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4815,7 +4815,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4828,7 +4828,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4841,7 +4841,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4854,7 +4854,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4867,7 +4867,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4885,11 +4885,11 @@ } }, "model.layers.33": { - "accuracy": 0.9866606635332573, - "total_bits": 49646546944.0, + "accuracy": 0.9827347988612019, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4902,7 +4902,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4915,7 +4915,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4928,7 +4928,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4941,7 +4941,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4954,7 +4954,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4967,7 +4967,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4980,7 +4980,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -4993,7 +4993,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5006,7 +5006,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5019,7 +5019,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5037,11 +5037,11 @@ } }, "model.layers.34": { - "accuracy": 0.9865586688974872, - "total_bits": 49646546944.0, + "accuracy": 0.9827491079631727, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5054,7 +5054,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5067,7 +5067,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5080,7 +5080,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5093,7 +5093,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5106,7 +5106,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5119,7 +5119,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5132,7 +5132,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5145,7 +5145,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5158,7 +5158,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5171,7 +5171,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5189,11 +5189,11 @@ } }, "model.layers.35": { - "accuracy": 0.9868608236429282, - "total_bits": 49646546944.0, + "accuracy": 0.9832003622432239, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5206,7 +5206,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5219,7 +5219,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5232,7 +5232,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5245,7 +5245,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5258,7 +5258,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5271,7 +5271,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5284,7 +5284,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5297,7 +5297,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5310,7 +5310,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5323,7 +5323,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5341,8 +5341,8 @@ } }, "model.layers.36": { - "accuracy": 0.9563268288038671, - "total_bits": 32448351232.0, + "accuracy": 0.9827660944429226, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -5410,10 +5410,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -5423,10 +5423,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -5449,10 +5449,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -5462,10 +5462,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -5493,8 +5493,8 @@ } }, "model.layers.37": { - "accuracy": 0.955550791346468, - "total_bits": 32448351232.0, + "accuracy": 0.98276356497081, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -5562,10 +5562,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -5575,10 +5575,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -5601,10 +5601,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -5614,10 +5614,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -5645,11 +5645,11 @@ } }, "model.layers.38": { - "accuracy": 0.9879953919735271, - "total_bits": 49646546944.0, + "accuracy": 0.9846183339541312, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5662,7 +5662,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5675,7 +5675,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5688,7 +5688,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5701,7 +5701,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5714,7 +5714,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5727,7 +5727,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5740,7 +5740,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5753,7 +5753,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5766,7 +5766,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5779,7 +5779,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5797,11 +5797,11 @@ } }, "model.layers.39": { - "accuracy": 0.9868617769097909, - "total_bits": 49646546944.0, + "accuracy": 0.9832595270127058, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5814,7 +5814,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5827,7 +5827,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5840,7 +5840,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5853,7 +5853,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5866,7 +5866,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5879,7 +5879,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5892,7 +5892,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5905,7 +5905,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5918,7 +5918,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5931,7 +5931,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5949,11 +5949,11 @@ } }, "model.layers.40": { - "accuracy": 0.9849735620082356, - "total_bits": 49646546944.0, + "accuracy": 0.9807395663810894, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5966,7 +5966,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5979,7 +5979,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -5992,7 +5992,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6005,7 +6005,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6018,7 +6018,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6031,7 +6031,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6044,7 +6044,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6057,7 +6057,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6070,7 +6070,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6083,7 +6083,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6101,11 +6101,11 @@ } }, "model.layers.41": { - "accuracy": 0.9857438872568309, - "total_bits": 49646546944.0, + "accuracy": 0.9818256291910075, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6118,7 +6118,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6131,7 +6131,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6144,7 +6144,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6157,7 +6157,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6170,7 +6170,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6183,7 +6183,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6196,7 +6196,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6209,7 +6209,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6222,7 +6222,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6235,7 +6235,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6253,11 +6253,11 @@ } }, "model.layers.42": { - "accuracy": 0.9868348813324701, - "total_bits": 49646546944.0, + "accuracy": 0.9831473883823492, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6270,7 +6270,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6283,7 +6283,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6296,7 +6296,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6309,7 +6309,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6322,7 +6322,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6335,7 +6335,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6348,7 +6348,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6361,7 +6361,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6374,7 +6374,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6387,7 +6387,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6405,11 +6405,11 @@ } }, "model.layers.43": { - "accuracy": 0.9866155976196751, - "total_bits": 49646546944.0, + "accuracy": 0.9827884282858577, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6422,7 +6422,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6435,7 +6435,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6448,7 +6448,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6461,7 +6461,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6474,7 +6474,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6487,7 +6487,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6500,7 +6500,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6513,7 +6513,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6526,7 +6526,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6539,7 +6539,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6557,11 +6557,11 @@ } }, "model.layers.44": { - "accuracy": 0.9863877832249273, - "total_bits": 49646546944.0, + "accuracy": 0.9825569812674075, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6574,7 +6574,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6587,7 +6587,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6600,7 +6600,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6613,7 +6613,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6626,7 +6626,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6639,7 +6639,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6652,7 +6652,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6665,7 +6665,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6678,7 +6678,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6691,7 +6691,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6709,11 +6709,11 @@ } }, "model.layers.45": { - "accuracy": 0.9854023810476065, - "total_bits": 49646546944.0, + "accuracy": 0.9813435177784413, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6726,7 +6726,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6739,7 +6739,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6752,7 +6752,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6765,7 +6765,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6778,7 +6778,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6791,7 +6791,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6804,7 +6804,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6817,7 +6817,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6830,7 +6830,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6843,7 +6843,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6861,11 +6861,11 @@ } }, "model.layers.46": { - "accuracy": 0.9850968104728963, - "total_bits": 49646546944.0, + "accuracy": 0.9808638134854846, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6878,7 +6878,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6891,7 +6891,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6904,7 +6904,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6917,7 +6917,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6930,7 +6930,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6943,7 +6943,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6956,7 +6956,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6969,7 +6969,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6982,7 +6982,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -6995,7 +6995,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7013,11 +7013,11 @@ } }, "model.layers.47": { - "accuracy": 0.9855538352276199, - "total_bits": 49646546944.0, + "accuracy": 0.9815339095075615, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7030,7 +7030,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7043,7 +7043,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7056,7 +7056,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7069,7 +7069,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7082,7 +7082,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7095,7 +7095,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7108,7 +7108,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7121,7 +7121,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7134,7 +7134,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7147,7 +7147,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7165,11 +7165,11 @@ } }, "model.layers.48": { - "accuracy": 0.9850831855437718, - "total_bits": 49646546944.0, + "accuracy": 0.9809085759334266, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7182,7 +7182,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7195,7 +7195,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7208,7 +7208,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7221,7 +7221,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7234,7 +7234,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7247,7 +7247,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7260,7 +7260,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7273,7 +7273,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7286,7 +7286,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7299,7 +7299,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7317,11 +7317,11 @@ } }, "model.layers.49": { - "accuracy": 0.9848428060940932, - "total_bits": 49646546944.0, + "accuracy": 0.9807316974038258, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7334,7 +7334,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7347,7 +7347,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7360,7 +7360,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7373,7 +7373,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7386,7 +7386,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7399,7 +7399,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7412,7 +7412,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7425,7 +7425,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7438,7 +7438,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7451,7 +7451,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7469,11 +7469,11 @@ } }, "model.layers.50": { - "accuracy": 0.9842289972875733, - "total_bits": 49646546944.0, + "accuracy": 0.9797588220098987, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7486,7 +7486,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7499,7 +7499,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7512,7 +7512,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7525,7 +7525,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7538,7 +7538,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7551,7 +7551,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7564,7 +7564,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7577,7 +7577,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7590,7 +7590,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7603,7 +7603,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7621,11 +7621,11 @@ } }, "model.layers.51": { - "accuracy": 0.9855613028921653, - "total_bits": 49646546944.0, + "accuracy": 0.9814481334178708, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7638,7 +7638,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7651,7 +7651,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7664,7 +7664,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7677,7 +7677,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7690,7 +7690,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7703,7 +7703,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7716,7 +7716,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7729,7 +7729,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7742,7 +7742,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7755,7 +7755,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7773,11 +7773,11 @@ } }, "model.layers.52": { - "accuracy": 0.984318298578728, - "total_bits": 49646546944.0, + "accuracy": 0.9798667979193851, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7790,7 +7790,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7803,7 +7803,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7816,7 +7816,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7829,7 +7829,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7842,7 +7842,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7855,7 +7855,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7868,7 +7868,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7881,7 +7881,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7894,7 +7894,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7907,7 +7907,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7925,11 +7925,11 @@ } }, "model.layers.53": { - "accuracy": 0.9847224070690572, - "total_bits": 49646546944.0, + "accuracy": 0.9805318327853456, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7942,7 +7942,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7955,7 +7955,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7968,7 +7968,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7981,7 +7981,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -7994,7 +7994,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8007,7 +8007,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8020,7 +8020,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8033,7 +8033,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8046,7 +8046,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8059,7 +8059,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8077,11 +8077,11 @@ } }, "model.layers.54": { - "accuracy": 0.9827672768151388, - "total_bits": 49646546944.0, + "accuracy": 0.9779171335394494, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8094,7 +8094,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8107,7 +8107,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8120,7 +8120,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8133,7 +8133,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8146,7 +8146,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8159,7 +8159,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8172,7 +8172,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8185,7 +8185,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8198,7 +8198,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8211,7 +8211,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8229,11 +8229,11 @@ } }, "model.layers.55": { - "accuracy": 0.9829433120903559, - "total_bits": 49646546944.0, + "accuracy": 0.9781052877078764, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8246,7 +8246,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8259,7 +8259,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8272,7 +8272,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8285,7 +8285,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8298,7 +8298,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8311,7 +8311,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8324,7 +8324,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8337,7 +8337,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8350,7 +8350,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8363,7 +8363,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8381,11 +8381,11 @@ } }, "model.layers.56": { - "accuracy": 0.9832319259585347, - "total_bits": 49646546944.0, + "accuracy": 0.9785591780673712, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8398,7 +8398,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8411,7 +8411,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8424,7 +8424,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8437,7 +8437,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8450,7 +8450,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8463,7 +8463,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8476,7 +8476,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8489,7 +8489,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8502,7 +8502,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8515,7 +8515,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8533,8 +8533,8 @@ } }, "model.layers.57": { - "accuracy": 0.9462334433337674, - "total_bits": 32448351232.0, + "accuracy": 0.9786476686713286, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { "4": 128 @@ -8602,10 +8602,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -8615,10 +8615,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -8641,10 +8641,10 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -8654,10 +8654,10 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 128 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -8685,11 +8685,11 @@ } }, "model.layers.58": { - "accuracy": 0.984666926873615, - "total_bits": 49646546944.0, + "accuracy": 0.9801497724256478, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8702,7 +8702,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8715,7 +8715,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8728,7 +8728,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8741,7 +8741,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8754,7 +8754,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8767,7 +8767,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8780,7 +8780,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8793,7 +8793,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8806,7 +8806,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8819,7 +8819,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8837,11 +8837,11 @@ } }, "model.layers.59": { - "accuracy": 0.9812533727963455, - "total_bits": 49646546944.0, + "accuracy": 0.9751776319462806, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8854,7 +8854,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8867,7 +8867,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8880,7 +8880,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8893,7 +8893,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8906,7 +8906,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8919,7 +8919,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8932,7 +8932,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8945,7 +8945,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8958,7 +8958,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8971,7 +8971,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -8990,10 +8990,10 @@ }, "model.layers.60": { "accuracy": 1e-06, - "total_bits": 49646546944.0, + "total_bits": 46949960704.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9006,7 +9006,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9019,7 +9019,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9032,7 +9032,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9045,7 +9045,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9058,7 +9058,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9071,7 +9071,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9084,7 +9084,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9097,7 +9097,7 @@ }, "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9110,7 +9110,7 @@ }, "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -9123,7 +9123,7 @@ }, "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4