diff --git a/model-00001-of-00079.safetensors b/model-00001-of-00079.safetensors index 81cdab91538f37b10926e1366e7d0e9795f6306e..6f9ac00152241574c7a7d992917a1ee8bcaeaa54 100644 --- a/model-00001-of-00079.safetensors +++ b/model-00001-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04d008e2d409aacd9006df3ee93ae7df387ba6c26fcb3199d2038752079184b8 +oid sha256:2289a2d256532db267c39dab0a4581a84741a5c97415db9d2723fa185abaac67 size 4998814392 diff --git a/model-00002-of-00079.safetensors b/model-00002-of-00079.safetensors index 9e89227dd1b5a63edab0b24324ecd751cb23920d..7923819a7d5d200e87cabd6658893426374557f0 100644 --- a/model-00002-of-00079.safetensors +++ b/model-00002-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:308a30da4214294aa838695be38fa2004ffd6f353732066693726833dbd6a729 +oid sha256:f2c8b53dad8d749ca0f067d0bb8c3bfe99670f7f89caa1946cbb383cd916fbe7 size 4998681264 diff --git a/model-00003-of-00079.safetensors b/model-00003-of-00079.safetensors index df7cd40151a042a85246899ad444d6853f40a8ed..59e22ccbd56567754ac360ae672acad0e43decc0 100644 --- a/model-00003-of-00079.safetensors +++ b/model-00003-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:438f2b9a922997e65eaff0eb1d1a591eb5594cf609b024eeac34dc787a9736c7 +oid sha256:ed30318833504e2ec323c9ac017f62da94a6d1e4c225188c79c4d1a181c242e6 size 4999770088 diff --git a/model-00004-of-00079.safetensors b/model-00004-of-00079.safetensors index 02959eca8accee8303acb68bfde6286ba5276833..2449f0b13ec6ad90c50a66bb099bc517d3da0648 100644 --- a/model-00004-of-00079.safetensors +++ b/model-00004-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f15f1d0175aec1ef1f7df45317b9b9f5d127de5d5b90900db6e23b69baa6d626 +oid sha256:dbbb572524ba28a235290991334ad4e436427acee6f3f1b4d2c467c91860a51b size 4994243696 diff --git a/model-00005-of-00079.safetensors b/model-00005-of-00079.safetensors index 0dbef77eb2c4b68ca27a8ab330b22412845b7390..77ec5a0afea945aac833531806d97cfb4d0bd99e 100644 --- a/model-00005-of-00079.safetensors +++ b/model-00005-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96040742ad063899e011ecd0f3455f67ed213ce99737b24606dc013eab57e9ba +oid sha256:8e5055e44ff917e067f4b975cdc393dea95a31442b2ca0c6e03680e6ae24b39f size 4995467920 diff --git a/model-00006-of-00079.safetensors b/model-00006-of-00079.safetensors index 5b5a890acb28537c07b71c2b9a1c331ae1049955..285f20311c7e2ab1e09a8ce8f7fba6e7812e8d7f 100644 --- a/model-00006-of-00079.safetensors +++ b/model-00006-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:267ec2b1954f061ccd7d87abdd670e9380c38bd285c6b719c49e70a77f542c00 +oid sha256:e93c7e9d655f6518eacff7a6cba0c160d6ab7673ae542ac207340b9b46db3c1a size 4995448032 diff --git a/model-00007-of-00079.safetensors b/model-00007-of-00079.safetensors index f85bbeb0973cb641146f2c832f9fbe92a8484e34..2b344a23d687490f40768078a7355133f8f0cc2c 100644 --- a/model-00007-of-00079.safetensors +++ b/model-00007-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f63d0825a38363c1de7baacb34cc35924ae80e279b7a796da6b7d682816f3f71 +oid sha256:16d43f045eace61cd1706f051b260485b56110545e16236d3a38b6b3174c449b size 4994244000 diff --git a/model-00008-of-00079.safetensors b/model-00008-of-00079.safetensors index 474706f12053145e3718ea1a931a322e0b97fa4b..6ddc3f016326e96293f74d8e4ff237fdc99de62b 100644 --- a/model-00008-of-00079.safetensors +++ b/model-00008-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ffd61bd2e3b3d42ea2a5f89e07fd7a4954b1a58b0a1a98d6d0d95dcef84802dd +oid sha256:636609a73c3a77ad9e55b35fd530c8d0d25f1ed62e26a982b71634d3b613b4ba size 4995447448 diff --git a/model-00009-of-00079.safetensors b/model-00009-of-00079.safetensors index 3b398316a64bb21472311460a86d4437ed335063..dd103ee8ec98f95fc4f058cf01e6143cc1e705f2 100644 --- a/model-00009-of-00079.safetensors +++ b/model-00009-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:78b570fa16f5f9cec2bf51738e0b6bf0eff43ddbf96940b444b29506fca3618c +oid sha256:703e171cce6a0b78399973301312b43b4e8380bc3a204009169fa1c827f9a52d size 4995468200 diff --git a/model-00010-of-00079.safetensors b/model-00010-of-00079.safetensors index a079bfc3309972ebf18a20988509bc7c63d570bf..903b330a22cec737e25e59cd4317b5e917ccbc9f 100644 --- a/model-00010-of-00079.safetensors +++ b/model-00010-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c848ad6b25d27c5708fd913a974471fda5f6a57271f66027893549f63112ce7a +oid sha256:9ce24221b302fe1fbaf90918cb900087a4ed5d677d67bbb6a0e3c21a2c2b6377 size 4995448856 diff --git a/model-00011-of-00079.safetensors b/model-00011-of-00079.safetensors index 22c64db397530a5109fcfd6b59975cf1be7a580d..739a80dcd61ff6879dd431af77725fad07cdb85e 100644 --- a/model-00011-of-00079.safetensors +++ b/model-00011-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb6595067cd75b9ac11bf6653cc91f09c209884862296c57ec82c948791b4a56 +oid sha256:94906d608ce406e98c68510f3346c2b3e7eb9a13ecebd78c8e788e17473dfb77 size 4994245680 diff --git a/model-00012-of-00079.safetensors b/model-00012-of-00079.safetensors index 07649445d1efb4c63e869ad9aa81d2f6c8913626..1324afad766aba4feffedc14337abaf50c34dfd5 100644 --- a/model-00012-of-00079.safetensors +++ b/model-00012-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d341d46c6ae5ada9504b78b2d74956e57b9100cbc2233d545e83c8e7937946b7 +oid sha256:b176b0728393969599c7533c4b2670818660c9b62bfd947a4aa91693d50b294f size 4995450128 diff --git a/model-00013-of-00079.safetensors b/model-00013-of-00079.safetensors index 1b494b93b8d26b030a66848c52e57ce0da765b25..30dea2bcb47a512ba74e185e7c771162be2f683b 100644 --- a/model-00013-of-00079.safetensors +++ b/model-00013-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd14313d151b66a4f431f0e16c8c5939839273b6a2b6b87b899e089c938f4770 +oid sha256:6c7acd40d384370e6b07310ccd60cabe8a56b89a6aaa2c490a37981975f4cef0 size 4995471784 diff --git a/model-00014-of-00079.safetensors b/model-00014-of-00079.safetensors index 67e40b8072eb28b6921d0a90f7c388987ab3d31a..d044234cd94bcfe298f360370e85fde256bfb84a 100644 --- a/model-00014-of-00079.safetensors +++ b/model-00014-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4dd7ca9f2904a7fadb5797bc9459fd27603e36128db9dedaa73be3d97c2f0ed2 +oid sha256:dd05499ebaf1942bb81887ad6ae28cd35db9b1eb6a92664859e6497b4432c54b size 4994246120 diff --git a/model-00015-of-00079.safetensors b/model-00015-of-00079.safetensors index a89b1c6b0c97dac3fc0a8ea802be9bb0534ab33a..35f0d691023b3975ee471bc09de3432de34dc126 100644 --- a/model-00015-of-00079.safetensors +++ b/model-00015-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9a570722f33ceddb4140f3348dadc443b002e4ea448e5e5d63eb70bd1216d29 -size 4999585192 +oid sha256:7a706001dd76c101de3a52fd200af2831ddd4d7a3dd13a3788fd2583bd907bc6 +size 4995450128 diff --git a/model-00016-of-00079.safetensors b/model-00016-of-00079.safetensors index f2532e45e6849611b25ffdf1e243a38ed5d17d0f..f4cac89270f6f5cbe76a9d731b7be31d51b04386 100644 --- a/model-00016-of-00079.safetensors +++ b/model-00016-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2a40e4064630c5cf3fd4926225c166858602c0be5d854f884d4a0ae08f4eb92 -size 4999055592 +oid sha256:abb83961ace94fe808836bb65dbcfac6ffe18c3c29c3fffe0915e0e14218a4e0 +size 4994963896 diff --git a/model-00017-of-00079.safetensors b/model-00017-of-00079.safetensors index 27f2b57eeba6c303fe91fb722366654476626977..7549482eeef40eda57bbd127c2081d35193cee1d 100644 --- a/model-00017-of-00079.safetensors +++ b/model-00017-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a83d2c6da1435854ef55f1d7f261f2fbd3f3982c20bc7b81b8deb5649070fb4 -size 4999332808 +oid sha256:dc52df01cf2b398983e8c064a8ac47cfcedef948de7815a6e047b92979eeb0c9 +size 4997527944 diff --git a/model-00018-of-00079.safetensors b/model-00018-of-00079.safetensors index c65ba700163536ede66dbabb453a93d476a01385..057d24cbf7435736ad39c53338a77c8d350ef54d 100644 --- a/model-00018-of-00079.safetensors +++ b/model-00018-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:992e6064e163857c67333d46cd23115a6df87f3b9cf226605803898d6223481a -size 4996039488 +oid sha256:9664b03d8988d9784b0ad46a4ba04f131552b30cb799261925cb0af2ca3e3f6e +size 4994068736 diff --git a/model-00019-of-00079.safetensors b/model-00019-of-00079.safetensors index 81a532608e7bfa0fe4faacb6f88dd7e9eef52b94..50cd68ca07967f84345474b6569b47ebcef5cf5a 100644 --- a/model-00019-of-00079.safetensors +++ b/model-00019-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:52a61e935884d3563e22b1d72ef49bf02b7c7a8a535ff6d6424e288106822206 -size 4994069240 +oid sha256:b7e886282e977c547d9c29937152d5507e8078c23138cbce55152d32e5efc6fe +size 4994068864 diff --git a/model-00020-of-00079.safetensors b/model-00020-of-00079.safetensors index 6d6deb71bbf146bd758c2fd7a7f01c301e226dee..1115c1e4d6fd23d209aa3bbc87b6bf09994a5819 100644 --- a/model-00020-of-00079.safetensors +++ b/model-00020-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:228db4bb48e54ce0eb20a7a568039ab90ff033b39aeff41466c4a4cc8407a3ec -size 4996097224 +oid sha256:4c917ce98ee2a3749b38d7373547126a588992b46cb1a4bf6f46235d0ec559bb +size 4994068864 diff --git a/model-00021-of-00079.safetensors b/model-00021-of-00079.safetensors index cfe656e8cac3616951f89d70105b18c15b62df33..30054ec72fd94db9d11e617d6f8576a094cdb6b6 100644 --- a/model-00021-of-00079.safetensors +++ b/model-00021-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38462646d85acc421d8745448884543a26aa025406186730767b15040567263c -size 4995471808 +oid sha256:bc82fc9fde06b7eb4c6dcce461f8461d3bc73ef29d589648fa939ad94cbcb5e8 +size 4994069184 diff --git a/model-00022-of-00079.safetensors b/model-00022-of-00079.safetensors index 7c9ae6e3f810a992f118d35faf4ade33c269f035..0f7d76a3f74a5771ecf77f2e1611af015d4c9dee 100644 --- a/model-00022-of-00079.safetensors +++ b/model-00022-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c09ec3106fc87a1e19181d49305478a1a3f25c6582c7f163404bf72a1ee280f -size 4994246032 +oid sha256:caf7e9e6a6de91baf01de7982bc2d0047151043d967954b901298b83ea947d38 +size 4994069312 diff --git a/model-00023-of-00079.safetensors b/model-00023-of-00079.safetensors index 8612ba2188d045dac7c0104b889858ac7dacd93f..f303ad134c4b4d15eb56a00d6f33fa398cde5a16 100644 --- a/model-00023-of-00079.safetensors +++ b/model-00023-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:687241da1f606c15f341d5ad59a994cd3dde87ffcc0b4ddeceeccbe0e239e3df -size 4994728040 +oid sha256:042a086e7f261a05fa8386d46f06bdb8c1b63436bae01dfe4aef53785c1da1af +size 4994397696 diff --git a/model-00024-of-00079.safetensors b/model-00024-of-00079.safetensors index 4e64310e3a20af69e78c9e6c17fe527834493c95..56a1207a2d4477d7ed76514e38cb45f15d80e54e 100644 --- a/model-00024-of-00079.safetensors +++ b/model-00024-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c0dc60f33ae9210fc65efe96f0b133efaef793f99dd2635a061d3c772dc5609 -size 4998608136 +oid sha256:4f8856fc6453c2f6e81861effefdafd43b3fcb2f1535e9f71c7885387acd797f +size 4993051984 diff --git a/model-00025-of-00079.safetensors b/model-00025-of-00079.safetensors index 87dfe478b62559b36d4c11a40f8e87f73fada378..288ed378e003dd78f9dccffc574fb871f8bd0fe8 100644 --- a/model-00025-of-00079.safetensors +++ b/model-00025-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7429e3c12879bb5bd99696e912844da1749c4632af7e846fcb0ce057619264c -size 4999985960 +oid sha256:981ba2705feaa0aa1447a2ae55ace1539dc1ca74db9dc9cc22819bb6fff7ec01 +size 4994246856 diff --git a/model-00026-of-00079.safetensors b/model-00026-of-00079.safetensors index 8ed006d1c593c37231952612f3908639e80f2830..bed2c7934404d0a27233aa1aa0ce72e0b6270f88 100644 --- a/model-00026-of-00079.safetensors +++ b/model-00026-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f8c4e1aa21e15bde223cd5b41ffe762b0b9e16b5ec041d94413f97d62d63313 -size 4998685960 +oid sha256:a47fdbe24e8e93e658c3d36dcc5e77f7ddbd63b91dd06ee7c02b27767ba3579a +size 4995450128 diff --git a/model-00027-of-00079.safetensors b/model-00027-of-00079.safetensors index 9e9642935b3200ede9c9c301e7d31d502136e1e2..fce6c8a9011faa94415187ec8e024b272c20c8b2 100644 --- a/model-00027-of-00079.safetensors +++ b/model-00027-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95a541ae4a0fc59b5d103592494c9d33df3effe1d8c2bc33756d4ba3967275eb -size 4998665648 +oid sha256:88dd0d48c6e04c80ba6ff6de83111736eb4f80c0dbfce2fb04483dee37e1416f +size 4995450272 diff --git a/model-00028-of-00079.safetensors b/model-00028-of-00079.safetensors index a11a2459df0db9d41647222fab8836cd0755ec5d..07943ce40ade57c013136a7ceadcd02299da9d8b 100644 --- a/model-00028-of-00079.safetensors +++ b/model-00028-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0214c0d1e9c19997dc36f1f98763ad764cc693d3dea56c2b11b57dcaa74fbd08 -size 5000069440 +oid sha256:81f91e1070cd58904f0db15954504770d684faa482bf2ec79be82977c328ce9b +size 4995471808 diff --git a/model-00029-of-00079.safetensors b/model-00029-of-00079.safetensors index 6ca469331b7942e227c1fc97b40098eca93b6e70..fcf3ae142739c007c3cd7061bcf832ecc2df2aab 100644 --- a/model-00029-of-00079.safetensors +++ b/model-00029-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ecc28701dfb18b88fa69875153001ba56bb3d49a19c39881a90345e850c49c2a -size 4996397720 +oid sha256:b0cc9dc1781166bb4f68f7f5370e50f432c8278e83e7ccb9871fc31ccaf1dd05 +size 4994245952 diff --git a/model-00030-of-00079.safetensors b/model-00030-of-00079.safetensors index bd8f61e510771096bb628d23dc4959a915e7bf7e..6e5902b009eb93cdbf85bf2a9d164b3aee4712e2 100644 --- a/model-00030-of-00079.safetensors +++ b/model-00030-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d2345922423da00baf3e38703cb7d9c75c2dd5116db1228e0185e9dbe0c7f2e -size 4994246256 +oid sha256:a44ab131d0abf2a80a298902eea77a9cd3467e5bbb97c3657fa443c5b43f5cb3 +size 4995450128 diff --git a/model-00031-of-00079.safetensors b/model-00031-of-00079.safetensors index bd3757fb761e6aaf481c9d955464a2acef8f49ea..3532b9e9346d56eb60d2555966c0e848bda80822 100644 --- a/model-00031-of-00079.safetensors +++ b/model-00031-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e924a03e540453bf0bc72ea43e5c8d7c077a6f523c9d6ea17cf9eb0c1430685 -size 4997657768 +oid sha256:2815d06c77055d5ea54f000a514cfa48c870611a6be073948595d5972fecd6eb +size 4997607984 diff --git a/model-00032-of-00079.safetensors b/model-00032-of-00079.safetensors index 43fc69e27e2338293ec7b78074ef6718ddd96674..5c3a5f8ad6c4b20cf4c41131f991f4128c26cfd4 100644 --- a/model-00032-of-00079.safetensors +++ b/model-00032-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bfbc11afce5a07f6a3ffc90e6cda6586fd912ac5b8a7322023865f419a6175c1 -size 4995678456 +oid sha256:96323ef69ad48ea3817a49fbeadc5b0643a9624baaef356d50899c92723b41e4 +size 4995749432 diff --git a/model-00033-of-00079.safetensors b/model-00033-of-00079.safetensors index 64993b78fa30d833b9e67b8e6e7499550db4af1c..d5dddb076ff22bcf014121188006729b3899c8ed 100644 --- a/model-00033-of-00079.safetensors +++ b/model-00033-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b844c5e6e2f8194259cd680a1a7d1dc4624e68d6eccc031ef3a5e05fb122c30a -size 4995471064 +oid sha256:3d2422b2c7c76e6b711a1714a2d1fefc42a8ffbbaad0c907df2167b37db7438f +size 4994246536 diff --git a/model-00034-of-00079.safetensors b/model-00034-of-00079.safetensors index c48b292285bd566d52b01c438892b1ef14e605ed..76d9b03e9634c9039a9acb13cdbf009f36624ef2 100644 --- a/model-00034-of-00079.safetensors +++ b/model-00034-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bc4f1a54314c89c0014b8b2abf10108f64499cb806125cc6ed965336946e357 -size 4994246840 +oid sha256:03adee106e7885063b0d2e3f0583ea5a3c7c3faf4302924cc834f8156b3fd2c8 +size 4995450128 diff --git a/model-00035-of-00079.safetensors b/model-00035-of-00079.safetensors index ebb32ccde846b75c2541958abdbf97121fb915ce..1d3691fee3e8b4fe909804cabbcb18a258b6543b 100644 --- a/model-00035-of-00079.safetensors +++ b/model-00035-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ef038da5e8fdc785b5504346948d1a1c529296d209c17d7543b1212a1a1bd92 -size 4995450128 +oid sha256:0fccc8cebe20179932ffcfdf6254ee8fa004f25441bdd232314885e58b71708b +size 4995450584 diff --git a/model-00036-of-00079.safetensors b/model-00036-of-00079.safetensors index 8ed96983c3a0026de2c0561a014c0a1621337b97..6b2101ee3fc80a08506afaf34665f323d0dcde0e 100644 --- a/model-00036-of-00079.safetensors +++ b/model-00036-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e56bf3fd4a1bcb46e2f031de41d1fb0be269c4cf44c7332a6a96039a2028ac1b -size 4995450280 +oid sha256:7b6f1466e4287a5f0474424434d3c98e082a4c84d1a97265bccb577785b6878e +size 4994246848 diff --git a/model-00037-of-00079.safetensors b/model-00037-of-00079.safetensors index 8508bee9357861187abe2be53b2cb8b612a29375..1e856eeac62d7d2d93b4033483317eeb6b27eda0 100644 --- a/model-00037-of-00079.safetensors +++ b/model-00037-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:107040b83f0561a0a0b0b64714b77f754fa1bd30181c7aad4c3ccf09acd09415 -size 4995471816 +oid sha256:5989589e1d6dcb23fe6d39b739de34d8eaad1a5024094284af9d1f33306fdff5 +size 4995470600 diff --git a/model-00038-of-00079.safetensors b/model-00038-of-00079.safetensors index a3eb3e1d9a0528030ce1c900daece22ec1a27f92..4089360a3266dadad4a7204017b38cd975c40af8 100644 --- a/model-00038-of-00079.safetensors +++ b/model-00038-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e50b3cfd6b1221ff0d83d777fdc720a9e198c043c055b47ad20266f24720034 -size 4994245928 +oid sha256:07fcc900d47518a0667800e983ee807d80635df58da1c75f5981619eac07e6f5 +size 4995450280 diff --git a/model-00039-of-00079.safetensors b/model-00039-of-00079.safetensors index 92561550de5ffb404d7b316216d52fc16ebb5b11..5740eea2e8b333b6fe4458b727e803afa4079f85 100644 --- a/model-00039-of-00079.safetensors +++ b/model-00039-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1dd6578881cc976f46754174731722a3714dbf49ba97b6f52b2a790a870ae12 -size 4995450128 +oid sha256:5cf09ed283d878d452f9d562a6d659d98dfdfb926640ff488d6aab5526ee505c +size 4995451336 diff --git a/model-00040-of-00079.safetensors b/model-00040-of-00079.safetensors index a47d7d686a81398ad7ea23c0a5e7ce575bcd6391..e1bb868ee630c7f208c4db1aaa306bf4c07e1ad8 100644 --- a/model-00040-of-00079.safetensors +++ b/model-00040-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d719a0ae212f3c7ae4ed8062edc01e026d882f13c9c6212d96459e5c7b6a466 -size 4995451184 +oid sha256:9bb7e29836a8996030e08dd7b39b68a89d8f289b6b32d6de64285aa1e338cc97 +size 4994245936 diff --git a/model-00041-of-00079.safetensors b/model-00041-of-00079.safetensors index 9dd1f6386d8ce0cda6d646ca0fbbf1ebd8fe7175..a807597347fb2e130530abc120dbf046e68f071d 100644 --- a/model-00041-of-00079.safetensors +++ b/model-00041-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:482507dc2cc318f25ebe2a7ddd41e3d2da6b5b5f71f0dbdec2d436f659a47710 -size 4994246248 +oid sha256:e586256e5a48794dd94f64280683b347f8fb95ade2913ad011f646a97a2b647e +size 4995470600 diff --git a/model-00042-of-00079.safetensors b/model-00042-of-00079.safetensors index 86c4949cdc61bf3ec633917be3d61ef98c1d486c..7c3965542579c78203cede50604f8a2d87338245 100644 --- a/model-00042-of-00079.safetensors +++ b/model-00042-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12dee9589ef1dd0a46be6dbd83cb0b40b373921cd2e3e3cfefd4ec83d66482ef -size 4999025200 +oid sha256:9d55a15816ef7b374020f5bd9041ba764b020231ee01f959c463ca853a7f35c7 +size 4995451176 diff --git a/model-00043-of-00079.safetensors b/model-00043-of-00079.safetensors index 370b88cc8ee199ebd482acd6e63a2d4d3c8ca262..2b21f2aa84988f61a1db2e1c6fff9bdce92583dc 100644 --- a/model-00043-of-00079.safetensors +++ b/model-00043-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2409d3636b1fe3fe5bac573a5a4f0a915cad502b8e1b227567433dd02a2e4f6b -size 4994331488 +oid sha256:83e99db8bd34f9abb7949e489bc354e122fcdabecf696c7ef335842606cedb99 +size 4994246248 diff --git a/model-00044-of-00079.safetensors b/model-00044-of-00079.safetensors index a337627354a85acf3a15fd81c213d6754c9fca85..f7f5d045d138e2d5d26b32b24a911d65b2d2adf1 100644 --- a/model-00044-of-00079.safetensors +++ b/model-00044-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee3040c7c4a929e1c794bd968e36bc1df2e288cbe370e6c6a74e5ebeaa576b4c -size 4995450600 +oid sha256:0749d1e2db50bd2ef9446ee881cf5d4a53cd57c48c387a6db3cbb004107fc7e0 +size 4999034024 diff --git a/model-00045-of-00079.safetensors b/model-00045-of-00079.safetensors index 575cdcd340303b4649884689bfcb4fa50eda616b..32c66b344a82ce4b0f9ff1fc46c2e8ee4f818dbe 100644 --- a/model-00045-of-00079.safetensors +++ b/model-00045-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e171472cae45bdb822dd69d8e3e909610c4a876544024afe1fe7a6094a3abbd0 -size 4994246832 +oid sha256:b570cda3528df1758e757a839a8b257e8b74c0ba3f3dc1111054e2cd644b0f8c +size 4994069312 diff --git a/model-00046-of-00079.safetensors b/model-00046-of-00079.safetensors index 7ea50e54279c5e05d2f158e3669f75c24df62a2e..7813e37385546ca1db93926ea1737f5b3bd65f24 100644 --- a/model-00046-of-00079.safetensors +++ b/model-00046-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e3f4018c9ef7d93166415fb47f65041cf13411baa2af726a677807244605090 -size 4995470600 +oid sha256:a143d56531526512a6ff72c22ac34e4c16cab93f9f3ed4305bd55f926ec2a9ec +size 4998139472 diff --git a/model-00047-of-00079.safetensors b/model-00047-of-00079.safetensors index ec1c5b88f26ae0e404edaefb069f9c4b6807a9ec..c4dbf3e831c5ad8a9147689709089a8187915097 100644 --- a/model-00047-of-00079.safetensors +++ b/model-00047-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c84f6b789cfc63d1ffb2c634723ae041d6621f57b269d3307dfb0bcae678a71 -size 4995450296 +oid sha256:893ff7c01453f7fa864fa4eb52474ed61367b0eff80d54a7e8362ca27099a340 +size 4995450312 diff --git a/model-00048-of-00079.safetensors b/model-00048-of-00079.safetensors index d99ad37889bff6c186b6c15ec6ea9246f04d60e5..1dc68651180847ecb1c2b5f0f9cf067e4b636aa6 100644 --- a/model-00048-of-00079.safetensors +++ b/model-00048-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38a492e2f4a5c1cb110a203e949c4fa3d6be38b0d651b2658e143a05139a1270 -size 4995451352 +oid sha256:ff0360ae893fc8a25dde73f625d1db71caef6a3ba87a0c7994367c898456fe77 +size 4995471848 diff --git a/model-00049-of-00079.safetensors b/model-00049-of-00079.safetensors index e0198c7e422233ee3e6d49f5a20196502145f7fe..d8f0a199470486d1e5343f8513598258e50a0f64 100644 --- a/model-00049-of-00079.safetensors +++ b/model-00049-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4b4aa132fcc8a9da8e1e9ca6026cd1d1ec91e59e02e2e6e7c77009d759270b52 -size 4994245904 +oid sha256:7729399a9959fa469a849b48254a3a9478383c3b4ad233c2ddfa0077584019c2 +size 4994245864 diff --git a/model-00050-of-00079.safetensors b/model-00050-of-00079.safetensors index 2b139bd6f6fdf06a4876c2b57d0ecde12d0b79c8..f62db09a9cb965d1eb31d61a6829c82d5548d831 100644 --- a/model-00050-of-00079.safetensors +++ b/model-00050-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6305a86b724962b3c62a2cc3db3ea9eb17235f833f3a349b06a14fd1ec52167 -size 4995470600 +oid sha256:bb173ef7eb36b2cd85cd2e6a096e49b198cba051448a35719f2bf9e018f42d40 +size 4995450128 diff --git a/model-00051-of-00079.safetensors b/model-00051-of-00079.safetensors index db4fd4948c9c3b246f26b3ae27eccc03abba826e..02a02e2b24bce051fb764630d1390cf49748aa88 100644 --- a/model-00051-of-00079.safetensors +++ b/model-00051-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9447b3296828b534c8f3c86fa3a0f3723eebd197d225efca377d0413a5c75096 -size 4995451192 +oid sha256:954798da375d6c0e771899eba96c64dc25bee1f61a2e353304601ce336a85bf2 +size 4995451216 diff --git a/model-00052-of-00079.safetensors b/model-00052-of-00079.safetensors index 1585797f8e1876327ecfdca16b61ab331ed94036..0e9bde13060954545e1a3c07585df2de1087112d 100644 --- a/model-00052-of-00079.safetensors +++ b/model-00052-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1713b85d84efc329bc594a8f9f7671431b41ef94db40bff52001b425817057c0 -size 4994246232 +oid sha256:ee258ca26fc60760cee52e5f28b2969a58c8c6058623a2f6adfbadd9bf77ea46 +size 4994246216 diff --git a/model-00053-of-00079.safetensors b/model-00053-of-00079.safetensors index 642733288a105f586b436a1f02fdfc4a83a1b955..f35b20050b4d515e86b639ba264fe16fa669b0e8 100644 --- a/model-00053-of-00079.safetensors +++ b/model-00053-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d839b4714d73e8f2e106d763900935cfa90ebe094294e5546e97057eecbc100d -size 4995450128 +oid sha256:ec1ad2b41aa98a483aac46ec6f03c8c37fef43b178fb423f441b41dcabe1b476 +size 4995470600 diff --git a/model-00054-of-00079.safetensors b/model-00054-of-00079.safetensors index e1e96c10aa93756e98e1f93b5ddfad17f57faf91..dafcee57961e5336c22839e46cd29353b42b0baf 100644 --- a/model-00054-of-00079.safetensors +++ b/model-00054-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:576f586fa44968701ceeb0f41363421383e87e3cece76a4b389aadad1e19a18f -size 4995471360 +oid sha256:2ac5add8585b30489780d4a475336f9b8d64ca8df8c7ba60e25c7294de7f076b +size 4995450912 diff --git a/model-00055-of-00079.safetensors b/model-00055-of-00079.safetensors index 91e8d3d9c097e17b2eca21338d6a1b099d2fad7d..3d489134057d92036999f628d8e681cfba7b5d94 100644 --- a/model-00055-of-00079.safetensors +++ b/model-00055-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:821ef9ddcf14cc6e6f5ae0358551f9f4375eeb5ba80f35098a37247879fc9a97 -size 4994246536 +oid sha256:5d82b456ee3b14fd0de712b7abd5e00c729fe07ceae79451c0dce16b21253439 +size 4994246520 diff --git a/model-00056-of-00079.safetensors b/model-00056-of-00079.safetensors index 7907cd432149786216047d4263cb2bd4ea1ff9a3..7314f036a5fb1cff663643aa04f87c0c107109ea 100644 --- a/model-00056-of-00079.safetensors +++ b/model-00056-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff7025ed54d03feb0d6a58676b34a1e384212561b7dc3f8c195be1575a20c28f +oid sha256:6486bfdae82e4e306d5f8ab4c213f94c260c4891e6833272234b375fe0d5ee6b size 4995450128 diff --git a/model-00057-of-00079.safetensors b/model-00057-of-00079.safetensors index a1ec45b79eaaf690a9cd2b62ed415290ca59465b..aae3eb3516df571ed16627852c99c9a8e2e8863e 100644 --- a/model-00057-of-00079.safetensors +++ b/model-00057-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e0b034d69d8e4bda7b71740e93852aef0f29b7022375b6868d4e76d62fd8891 -size 4995450584 +oid sha256:789d669cfd498dd74e6e751616efe15e59a342bdfbf14d0f0758afdb1c3c9b08 +size 4995471080 diff --git a/model-00058-of-00079.safetensors b/model-00058-of-00079.safetensors index 2393767fc4e56f7d673846f7b4699b3eb6aef722..a72a04d66e34b5a69dc8a40abc58967046c5f745 100644 --- a/model-00058-of-00079.safetensors +++ b/model-00058-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2cccb0b444af9aab7b910b13dd766bbe9787dd1c3e77ef295a6daf1bcd3c36ef -size 4994246848 +oid sha256:b61235330567be2525add4b89016ec07407b3f1e4b9d51a809d1aa3eea7531d0 +size 4994246824 diff --git a/model-00059-of-00079.safetensors b/model-00059-of-00079.safetensors index c37633fe5b179c8c7b813dd2d81437f18826cf88..2176cdb914bd06be4460769304c133fe4b67161d 100644 --- a/model-00059-of-00079.safetensors +++ b/model-00059-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8f68cefa917f453787029b5bc2d60c27ce72d99c9bfb3163e7446ca79f3e23f -size 4995470600 +oid sha256:b9e6c843f3a64469d98ba6a7212d7cce6e3c02201d2e8c0c1fef94bad06e777f +size 4995450128 diff --git a/model-00060-of-00079.safetensors b/model-00060-of-00079.safetensors index 364fa2c24e93b3d81efb83d1d7912eed63f3b074..db859a0c07519743ae4fcfcbf41137b628625cd0 100644 --- a/model-00060-of-00079.safetensors +++ b/model-00060-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29a8377d7c3fc8d04d3ecc7cd6fa1afabbe9d59106f85d605a23c299429de17b -size 4995450280 +oid sha256:570ebbba67e9af86ff0000b707a376e9e9ccde01e9e8d27f57fb619ef0af48a7 +size 4995450296 diff --git a/model-00061-of-00079.safetensors b/model-00061-of-00079.safetensors index 4afeec4cd5becf71209a0a5117f31cc107cd2cc1..85cfc76ef68f6ee93f3a6c54a90f66fa1717958e 100644 --- a/model-00061-of-00079.safetensors +++ b/model-00061-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33ac4df3bfd436cea427918739fa248ccda7c128a3087ba5ecba5ad5667b0d74 -size 4995451336 +oid sha256:139fcff5c6d5f2d68362ddd0d1dcefc7df3792f7926cca9a258e4336101d7b1c +size 4995471832 diff --git a/model-00062-of-00079.safetensors b/model-00062-of-00079.safetensors index 1c61be44f5ab4792d1ad6a87c419334291cdcbed..eed6a45c03a042b9a267b3af07f22b44e1137ca0 100644 --- a/model-00062-of-00079.safetensors +++ b/model-00062-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:90454dfe7d6565437a5068969cfcf11fea3b368ac91ade84e53087fa1d257b84 -size 4994245936 +oid sha256:61aebac7cd3cd615bf7f1560769c782468561fc4d1d87d50113facbea920e161 +size 4994245896 diff --git a/model-00063-of-00079.safetensors b/model-00063-of-00079.safetensors index 40db1fc7c4a3dcde58d4c93f041dd03e500adf73..686234ca0fbad1313eea2843dd6cd1b7eaa42632 100644 --- a/model-00063-of-00079.safetensors +++ b/model-00063-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfc358d1da7f30392580c070cdce7d3b8d0c65204dbf89ff89e4ae02aa2d52c0 -size 4995470600 +oid sha256:282b57c1a02f4a52684498a45bed28d86790679f94947a0c13bf827d07d1b567 +size 4995450128 diff --git a/model-00064-of-00079.safetensors b/model-00064-of-00079.safetensors index 72d20c2f8eef907736976071a6a2709b9db490a6..486727f2050225814984b713a27a847db6398750 100644 --- a/model-00064-of-00079.safetensors +++ b/model-00064-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ddfd3db443bf862e3ee9f384a244de957c9d465bceb4e88721f571ddb5a17fb -size 4995451176 +oid sha256:3b68e34e734a239d8d2de9304492776e24275c6dc3b8954520bc789728fd6f2d +size 4995451200 diff --git a/model-00065-of-00079.safetensors b/model-00065-of-00079.safetensors index 0aa294d025ce800afade3f1aaa15a49ccbac4e20..946354cf3927f11d0170b33a043c8bc597697e0e 100644 --- a/model-00065-of-00079.safetensors +++ b/model-00065-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac2f1fae640077be9e5dafac509f99cba78987b26f81c9649cf2a68aca62122e -size 4994246248 +oid sha256:f87c78fa583ecbdb4a8e5fd74477a8b91fb47221b64f7fc5d6ab532fea0a8835 +size 4994246232 diff --git a/model-00066-of-00079.safetensors b/model-00066-of-00079.safetensors index a72ad415191f37f106edefeb0c5bc1f1efd6c551..98302c500172c501f58fd6cbcd804e82a1c25509 100644 --- a/model-00066-of-00079.safetensors +++ b/model-00066-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c20d842e993db734c6c657d640bc53e1a654d766bc59c265f82ce6329dc3ac5c -size 4995450128 +oid sha256:8abd4d9e42d7d592280358f8e73358ecc14327e26fe57f1e09a836e82d0f06db +size 4995470600 diff --git a/model-00067-of-00079.safetensors b/model-00067-of-00079.safetensors index 706deabb4c9462827cdf8df361aaf8ead8255b9d..d59747b5436cc613c4f5e83af5631cd463939e20 100644 --- a/model-00067-of-00079.safetensors +++ b/model-00067-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c68b6bc28b2de1a408cef497c5dc2cd6ae1d3c98b0a3c5cd84bb897f40f6453 -size 4995471344 +oid sha256:96dd5fd802eb2a972528b74ba861818008ef9d2167e4abe94d81e6629da5c545 +size 4995450896 diff --git a/model-00068-of-00079.safetensors b/model-00068-of-00079.safetensors index a0be4d297c39752cd50ca24418e4a41e8ef28a5f..f8165117773b68d681aa6c2be545d275af0dd615 100644 --- a/model-00068-of-00079.safetensors +++ b/model-00068-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e10cba6a054b82e73a9c5cce8bacc9f4148085a6c4068252955f68ea6445b070 -size 4994246552 +oid sha256:e492097cde3bf40994858736c66bd9e7525618b0f96fcd204d0d5398cfb24d42 +size 4994246536 diff --git a/model-00069-of-00079.safetensors b/model-00069-of-00079.safetensors index 680f35c6d22693a322e60695cdea8f15196a6ded..0c1dba1643c99031f1122ff8b62717ac0bac7793 100644 --- a/model-00069-of-00079.safetensors +++ b/model-00069-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f450be82718eac9660dd72159a8e54aa8cf0419355cd9aa2b45a1ea2e084e439 +oid sha256:8ff8d1c3fcbaf6c5aeb68b6cee77a585c203526bb1c982d827285c2249faed31 size 4995450128 diff --git a/model-00070-of-00079.safetensors b/model-00070-of-00079.safetensors index cd67569f2b0cd9ca9e17ca1dbf605c8e40efb169..4929d4d1d30e9ce0f879cafafd0ce0c3121c145b 100644 --- a/model-00070-of-00079.safetensors +++ b/model-00070-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:16ae95e527bb3cb4951cd20d8dc3220d78892a5c55e8bd82ade865f0135b040e -size 4995450568 +oid sha256:5c2c5fb0d23aaa061100e759f37e95a8095986d57ef8effecf7961333e61c23e +size 4995471064 diff --git a/model-00071-of-00079.safetensors b/model-00071-of-00079.safetensors index 6c9b5c99455d816dc77ad735f0657cd108bfc383..ac85d347f31dc2dd5aca9bee6169e4d57f635f98 100644 --- a/model-00071-of-00079.safetensors +++ b/model-00071-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2c73a0feb6244961777aed9de6b5bad53b2f81063a88cbb38d748983e433100 -size 4994246856 +oid sha256:afc08b458ea83a925390e441640e130e5178e159fed704ee3b906cb8b1ceec26 +size 4994246840 diff --git a/model-00072-of-00079.safetensors b/model-00072-of-00079.safetensors index 2957ff70b29ca4bb4f768bd164ad1c1cc6c37baf..9f2f77c40a54ee8d3ac9eea9afde8f8f9a1f78b5 100644 --- a/model-00072-of-00079.safetensors +++ b/model-00072-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4a023801bc6b730752fda37282eb2773f6316c9296512c648fc0fe0fa06350b -size 4999110768 +oid sha256:a5782523359026db0d351facf663e8916809738a849941d3332e39ffee1b20b7 +size 4995450128 diff --git a/model-00073-of-00079.safetensors b/model-00073-of-00079.safetensors index 1df7e0adaaf738f6982534ca8989ad8f5951be4f..dc9494ffd7a98da6ec845214cb223e15815a1c7a 100644 --- a/model-00073-of-00079.safetensors +++ b/model-00073-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd481dfea312dd0290706e9247685eed238afca625b774a0f10828644a2b473f -size 4994245920 +oid sha256:1ad09e08ce8aa69e11007e14664052232747abe155c8331499017cfcf1dad7d7 +size 5000063344 diff --git a/model-00074-of-00079.safetensors b/model-00074-of-00079.safetensors index 634ca2990b841f00a60a1d275fbaadf8cc84e8e3..8799d822b7bd02670c6e9e56fae6963ab4b56b39 100644 --- a/model-00074-of-00079.safetensors +++ b/model-00074-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48b76da535c9d50078049b3bd0d94777dbe033c5c32c6c05e984ef1ee1000a38 -size 4995450128 +oid sha256:65403d444fb219a92ba277a3dfb9e44348c8a6c301a509850ea73b51decc70ab +size 4993293304 diff --git a/model-00075-of-00079.safetensors b/model-00075-of-00079.safetensors index 0d022ecbcf4847b7af32eba304f3e78c0e7d7114..b7439f47dabc46bbc3a3eea108b740d21f253bf2 100644 --- a/model-00075-of-00079.safetensors +++ b/model-00075-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23929635d89f088fa72a44629df6d1618113295d1fcc19322ea30225dfb84f9a -size 4995471664 +oid sha256:f19a10a063b4fbf5a781711d2be16a103065f300d387972779042aad06ca9e70 +size 4995451208 diff --git a/model-00076-of-00079.safetensors b/model-00076-of-00079.safetensors index 7c8df5a5993a2875da456828547ad353fa050c18..211ecb4224fdeba78915e86853de374f8cd8751b 100644 --- a/model-00076-of-00079.safetensors +++ b/model-00076-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4422724f00bbed6859c8435d90a400ad1f00cce2a7a54241c3e099e952344f76 -size 4994246240 +oid sha256:fc3bd0deed9d901e65eda16d2894fa484f2bbbf3b21276b23b2b8b31c5032254 +size 4994246216 diff --git a/model-00077-of-00079.safetensors b/model-00077-of-00079.safetensors index b79f49a6e21034e319df34ad9f1443a3efb85258..b9811ff66aa71c58559baf16eb57872140f492d0 100644 --- a/model-00077-of-00079.safetensors +++ b/model-00077-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ddddbf84870daa2f122b4000e8e08c5daae2cfb044cc59eb7a381f103ac0ca80 +oid sha256:f453c00257240da33b32d42dcdc482e68369b61b0cd2d7f76be05c7a53f88b9e size 4995450128 diff --git a/model-00078-of-00079.safetensors b/model-00078-of-00079.safetensors index 08c15cf198bccec8450396407a91a41ee500165f..f5b7f95f9f7a1fe6cf5d10f821045576711825d5 100644 --- a/model-00078-of-00079.safetensors +++ b/model-00078-of-00079.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e487554cbd935d33046fe3d39ee6748b0943bdcec0db7cdadde7071e3c6b6b10 -size 3517154656 +oid sha256:7663b25745fa64a56b756368dbe2edc0b53d34ef2070e4bb55a09856d3575799 +size 3553951520 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 44fac9848cda53c1b5500f67ea56992add29ab01..7439adc4a800e53a4ec5cd8bb2f8cd6c30f5c058 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46693ed0d45667997b4ec060a61d01a63643b5904a7a146b1a18cb1056f8b5a2 +oid sha256:eb34714a209a8a0b81a6044e0f93b2bd98c156d0fa74255a5509cbccfc470dd7 size 20783505 diff --git a/quant_strategy.json b/quant_strategy.json index fd02a4d52324c12a2c3d442388150ac4ada7a73e..90deef9716ba5d161b7013a23c8d1cc8060ea0d4 100644 --- a/quant_strategy.json +++ b/quant_strategy.json @@ -1,7 +1,7 @@ { "measurement": { "model.layers.0": { - "accuracy": 0.9837261077482253, + "accuracy": 0.9836017141933553, "total_bits": 2516201472.0, "q_a_proj": { "group_size": { @@ -109,7 +109,7 @@ } }, "model.layers.1": { - "accuracy": 0.9947622956242412, + "accuracy": 0.9947275305603398, "total_bits": 2516201472.0, "q_a_proj": { "group_size": { @@ -217,7 +217,7 @@ } }, "model.layers.2": { - "accuracy": 0.9893310712650418, + "accuracy": 0.9892190659011248, "total_bits": 2516201472.0, "q_a_proj": { "group_size": { @@ -325,8 +325,8 @@ } }, "model.layers.3": { - "accuracy": 0.9887572305742651, - "total_bits": 32325176320.0, + "accuracy": 0.9843865929287858, + "total_bits": 32448351232.0, "q_a_proj": { "group_size": { "4": 128 @@ -431,6 +431,45 @@ "scale_bits": 4, "scale_groups:": 32 }, + "moe_shared_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, "moe_gate": { "bits": 16, "group_size": 0, @@ -438,8 +477,8 @@ } }, "model.layers.4": { - "accuracy": 0.9887091068085283, - "total_bits": 32325176320.0, + "accuracy": 0.9804863141616806, + "total_bits": 32448351232.0, "q_a_proj": { "group_size": { "4": 128 @@ -544,21 +583,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.5": { - "accuracy": 0.9895120826549828, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -566,9 +596,22 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 }, "bits": [ 4 @@ -579,7 +622,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.5": { + "accuracy": 0.9889985889894888, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -592,7 +644,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -605,7 +657,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -618,7 +670,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -631,7 +683,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -644,7 +696,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -657,16 +709,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.6": { - "accuracy": 0.9870037506334484, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -679,7 +722,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -692,7 +735,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -705,7 +748,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -718,7 +761,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -731,7 +774,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.6": { + "accuracy": 0.9873386551626027, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -744,7 +796,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -757,7 +809,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -770,16 +822,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.7": { - "accuracy": 0.9861991163343191, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -792,7 +835,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -805,7 +848,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -818,7 +861,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -831,7 +874,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -844,7 +887,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -857,7 +900,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -870,7 +913,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -889,9 +932,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.8": { - "accuracy": 0.9886232852004468, - "total_bits": 49456623616.0, + "model.layers.7": { + "accuracy": 0.9860153485496994, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -996,16 +1039,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.9": { - "accuracy": 0.9861124339513481, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -1018,7 +1052,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -1031,7 +1065,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -1044,7 +1078,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.8": { + "accuracy": 0.9847448625660036, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -1057,7 +1100,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -1070,7 +1113,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -1083,7 +1126,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -1096,7 +1139,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -1109,16 +1152,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.10": { - "accuracy": 0.9829977352637798, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -1131,7 +1165,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -1144,7 +1178,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -1157,7 +1191,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -1170,7 +1204,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -1183,7 +1217,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -1196,7 +1230,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.9": { + "accuracy": 0.9817010213155299, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -1209,7 +1252,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -1222,16 +1265,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.11": { - "accuracy": 0.9850956411100924, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -1244,7 +1278,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -1257,7 +1291,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "o_proj": { "group_size": { "4": 32 }, @@ -1270,7 +1304,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -1283,7 +1317,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -1296,7 +1330,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -1309,7 +1343,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -1322,7 +1356,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -1341,9 +1388,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.12": { - "accuracy": 0.9900272490922362, - "total_bits": 49456623616.0, + "model.layers.10": { + "accuracy": 0.9794488882180303, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -1448,18 +1495,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.13": { - "accuracy": 0.9851942050736398, - "total_bits": 32325176320.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1470,9 +1508,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1483,9 +1521,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1496,9 +1534,18 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.11": { + "accuracy": 0.9824990088818595, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1509,9 +1556,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1522,12 +1569,2027 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.12": { + "accuracy": 0.9849340560904238, + "total_bits": 49646546944.0, + "q_a_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.13": { + "accuracy": 0.9906857509049587, + "total_bits": 49646546944.0, + "q_a_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.14": { + "accuracy": 0.9904032560880296, + "total_bits": 46949960704.0, + "q_a_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.15": { + "accuracy": 0.9876436085323803, + "total_bits": 32448351232.0, + "q_a_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.16": { + "accuracy": 0.9869564180844463, + "total_bits": 32448351232.0, + "q_a_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.17": { + "accuracy": 0.9861695145664271, + "total_bits": 32448351232.0, + "q_a_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.18": { + "accuracy": 0.9839566865412053, + "total_bits": 32448351232.0, + "q_a_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.19": { + "accuracy": 0.982039811933646, + "total_bits": 32448351232.0, + "q_a_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.20": { + "accuracy": 0.9796012884471565, + "total_bits": 32448351232.0, + "q_a_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.21": { + "accuracy": 0.9901153031969443, + "total_bits": 46949960704.0, + "q_a_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.22": { + "accuracy": 0.9890681402757764, + "total_bits": 49646546944.0, + "q_a_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.23": { + "accuracy": 0.9887834941036999, + "total_bits": 49646546944.0, + "q_a_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.24": { + "accuracy": 0.9874449702329002, + "total_bits": 49646546944.0, + "q_a_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 ], "bits_prop": [ 1 @@ -1537,10 +3599,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -1550,7 +3612,46 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 32 }, "bits": [ 4 @@ -1567,12 +3668,164 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.14": { - "accuracy": 0.9977487137366552, - "total_bits": 46770359296.0, + "model.layers.25": { + "accuracy": 0.9868336729414295, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { - "4": 128 + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "q_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_a_proj_with_mqa": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "kv_b_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.26": { + "accuracy": 0.986997331638122, + "total_bits": 49646546944.0, + "q_a_proj": { + "group_size": { + "4": 32 }, "bits": [ 4 @@ -1585,7 +3838,7 @@ }, "q_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1598,7 +3851,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1611,7 +3864,7 @@ }, "kv_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1624,7 +3877,7 @@ }, "o_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1637,7 +3890,7 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1650,7 +3903,7 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1663,7 +3916,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1674,18 +3927,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.15": { - "accuracy": 0.9976791741210036, - "total_bits": 46770359296.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1696,9 +3940,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1709,9 +3953,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1722,7 +3966,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.27": { + "accuracy": 0.9610166220227256, + "total_bits": 32448351232.0, + "q_a_proj": { "group_size": { "4": 128 }, @@ -1735,7 +3988,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 128 }, @@ -1748,7 +4001,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 128 }, @@ -1761,7 +4014,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 128 }, @@ -1774,7 +4027,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 128 }, @@ -1787,21 +4040,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.16": { - "accuracy": 0.994229809846729, - "total_bits": 32325176320.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { - "4": 128 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -1809,12 +4053,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { - "4": 128 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -1822,7 +4066,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { "4": 128 }, @@ -1835,12 +4079,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 128 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -1848,7 +4092,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { "group_size": { "4": 128 }, @@ -1861,12 +4118,21 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.28": { + "accuracy": 0.9852554257959127, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -1874,12 +4140,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "q_b_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -1887,9 +4153,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "kv_a_proj_with_mqa": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1900,18 +4166,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.17": { - "accuracy": 0.9939945223159157, - "total_bits": 32325176320.0, - "q_a_proj": { + "kv_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1922,9 +4179,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "o_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1935,9 +4192,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1948,9 +4205,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1961,9 +4218,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -1974,12 +4231,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -1987,12 +4244,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2000,9 +4257,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2019,9 +4276,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.18": { - "accuracy": 0.9974436602788046, - "total_bits": 49456623616.0, + "model.layers.29": { + "accuracy": 0.9843449779436924, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -2126,16 +4383,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.19": { - "accuracy": 0.9973050620756112, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -2148,7 +4396,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -2161,7 +4409,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -2174,7 +4422,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.30": { + "accuracy": 0.9839022597298026, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -2187,7 +4444,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -2200,7 +4457,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -2213,7 +4470,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -2226,7 +4483,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -2239,18 +4496,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.20": { - "accuracy": 0.9914545317878947, - "total_bits": 32325176320.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2261,9 +4509,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2274,9 +4522,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2287,9 +4535,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2300,9 +4548,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2313,12 +4561,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_down_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2326,12 +4574,21 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.31": { + "accuracy": 0.9827811672585085, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2339,9 +4596,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "q_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2352,16 +4609,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.21": { - "accuracy": 0.9969465080648661, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -2374,7 +4622,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -2387,7 +4635,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "o_proj": { "group_size": { "4": 32 }, @@ -2400,7 +4648,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -2413,7 +4661,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -2426,7 +4674,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -2439,7 +4700,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -2452,7 +4713,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -2471,12 +4732,12 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.22": { - "accuracy": 0.9893084313953295, - "total_bits": 32325176320.0, + "model.layers.32": { + "accuracy": 0.9849323070375249, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2489,7 +4750,7 @@ }, "q_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2502,7 +4763,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2515,7 +4776,7 @@ }, "kv_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2528,7 +4789,7 @@ }, "o_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2541,10 +4802,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2554,10 +4815,10 @@ }, "moe_expert_up_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2567,7 +4828,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2578,18 +4839,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.23": { - "accuracy": 0.9866153525654227, - "total_bits": 32325176320.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2600,9 +4852,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2613,9 +4865,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2626,9 +4878,18 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.33": { + "accuracy": 0.9866606635332573, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2639,9 +4900,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2652,12 +4913,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2665,12 +4926,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2678,9 +4939,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2691,18 +4952,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.24": { - "accuracy": 0.9857842645142227, - "total_bits": 32325176320.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2713,9 +4965,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2726,9 +4978,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2739,9 +4991,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2752,9 +5004,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2765,12 +5017,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_down_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2778,12 +5030,21 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.34": { + "accuracy": 0.9865586688974872, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { - "2": 64 + "4": 32 }, "bits": [ - 2 + 4 ], "bits_prop": [ 1 @@ -2791,9 +5052,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "q_b_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -2804,16 +5065,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.25": { - "accuracy": 0.9946274071699008, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -2826,7 +5078,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -2839,7 +5091,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "o_proj": { "group_size": { "4": 32 }, @@ -2852,7 +5104,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -2865,7 +5117,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -2878,7 +5130,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -2891,7 +5143,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -2904,7 +5156,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -2923,9 +5188,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.26": { - "accuracy": 0.994916282244958, - "total_bits": 49456623616.0, + "model.layers.35": { + "accuracy": 0.9868608236429282, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -3030,18 +5295,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.27": { - "accuracy": 0.9849393269978464, - "total_bits": 32325176320.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -3052,9 +5308,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -3065,9 +5321,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -3078,7 +5334,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.36": { + "accuracy": 0.9563268288038671, + "total_bits": 32448351232.0, + "q_a_proj": { "group_size": { "4": 128 }, @@ -3091,7 +5356,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 128 }, @@ -3104,33 +5369,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { - "group_size": { - "2": 64 - }, - "bits": [ - 2 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4, - "scale_groups:": 32 - }, - "moe_expert_up_proj": { - "group_size": { - "2": 64 - }, - "bits": [ - 2 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4, - "scale_groups:": 32 - }, - "moe_expert_down_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 128 }, @@ -3143,18 +5382,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.28": { - "accuracy": 0.9935862241545692, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3165,9 +5395,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3178,12 +5408,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_gate_proj": { "group_size": { - "4": 32 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -3191,12 +5421,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_up_proj": { "group_size": { - "4": 32 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -3204,9 +5434,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3217,12 +5447,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -3230,12 +5460,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -3243,9 +5473,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3262,12 +5492,12 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.29": { - "accuracy": 0.9933130200952291, - "total_bits": 49456623616.0, + "model.layers.37": { + "accuracy": 0.955550791346468, + "total_bits": 32448351232.0, "q_a_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3280,7 +5510,7 @@ }, "q_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3293,7 +5523,7 @@ }, "kv_a_proj_with_mqa": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3306,7 +5536,7 @@ }, "kv_b_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3319,7 +5549,7 @@ }, "o_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3332,10 +5562,10 @@ }, "moe_expert_gate_proj": { "group_size": { - "4": 32 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -3345,10 +5575,10 @@ }, "moe_expert_up_proj": { "group_size": { - "4": 32 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -3358,7 +5588,7 @@ }, "moe_expert_down_proj": { "group_size": { - "4": 32 + "4": 128 }, "bits": [ 4 @@ -3369,21 +5599,12 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.30": { - "accuracy": 0.9928346405504271, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 32 + "2": 64 }, "bits": [ - 4 + 2 ], "bits_prop": [ 1 @@ -3391,9 +5612,22 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 32 + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 }, "bits": [ 4 @@ -3404,7 +5638,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.38": { + "accuracy": 0.9879953919735271, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -3417,7 +5660,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -3430,7 +5673,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -3443,7 +5686,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -3456,7 +5699,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -3469,7 +5712,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -3482,16 +5725,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.31": { - "accuracy": 0.9926120019517839, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -3504,7 +5738,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -3517,7 +5751,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -3530,7 +5764,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -3543,7 +5777,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -3556,7 +5790,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.39": { + "accuracy": 0.9868617769097909, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -3569,7 +5812,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -3582,7 +5825,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -3595,16 +5838,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.32": { - "accuracy": 0.9933211682364345, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -3617,7 +5851,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -3630,7 +5864,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -3643,7 +5877,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -3656,7 +5890,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -3669,7 +5903,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -3682,7 +5916,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -3695,7 +5929,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -3714,9 +5948,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.33": { - "accuracy": 0.9938841551775113, - "total_bits": 49456623616.0, + "model.layers.40": { + "accuracy": 0.9849735620082356, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -3821,16 +6055,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.34": { - "accuracy": 0.9937848683912307, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -3843,7 +6068,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -3856,7 +6081,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -3869,7 +6094,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.41": { + "accuracy": 0.9857438872568309, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -3882,7 +6116,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -3895,7 +6129,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -3908,7 +6142,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -3921,7 +6155,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -3934,18 +6168,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.35": { - "accuracy": 0.980633502593264, - "total_bits": 32325176320.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -3956,9 +6181,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -3969,9 +6194,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -3982,9 +6207,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -3995,9 +6220,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -4008,35 +6233,9 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { - "group_size": { - "2": 64 - }, - "bits": [ - 2 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4, - "scale_groups:": 32 - }, - "moe_expert_up_proj": { - "group_size": { - "2": 64 - }, - "bits": [ - 2 - ], - "bits_prop": [ - 1 - ], - "scale_bits": 4, - "scale_groups:": 32 - }, - "moe_expert_down_proj": { + "moe_shared_expert_down_proj": { "group_size": { - "4": 128 + "4": 32 }, "bits": [ 4 @@ -4053,9 +6252,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.36": { - "accuracy": 0.9936748370528221, - "total_bits": 49456623616.0, + "model.layers.42": { + "accuracy": 0.9868348813324701, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -4160,16 +6359,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.37": { - "accuracy": 0.9934563674032688, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4182,7 +6372,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -4195,7 +6385,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -4208,7 +6398,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.43": { + "accuracy": 0.9866155976196751, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -4221,7 +6420,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -4234,7 +6433,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -4247,7 +6446,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -4260,7 +6459,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -4273,16 +6472,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.38": { - "accuracy": 0.9943448413396254, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4295,7 +6485,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -4308,7 +6498,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -4321,7 +6511,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4334,7 +6524,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -4347,7 +6537,29 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_down_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.44": { + "accuracy": 0.9863877832249273, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -4360,7 +6572,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -4373,7 +6585,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -4386,16 +6598,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.39": { - "accuracy": 0.9929845606675372, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -4408,7 +6611,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -4421,7 +6624,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4434,7 +6637,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -4447,7 +6650,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -4460,7 +6663,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4473,7 +6676,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -4486,7 +6689,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -4505,9 +6708,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.40": { - "accuracy": 0.9924973219167441, - "total_bits": 49456623616.0, + "model.layers.45": { + "accuracy": 0.9854023810476065, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -4612,16 +6815,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.41": { - "accuracy": 0.9928232359234244, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4634,7 +6828,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -4647,7 +6841,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -4660,7 +6854,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.46": { + "accuracy": 0.9850968104728963, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -4673,7 +6876,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -4686,7 +6889,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -4699,7 +6902,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -4712,7 +6915,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -4725,16 +6928,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.42": { - "accuracy": 0.9929470097413287, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4747,7 +6941,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -4760,7 +6954,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -4773,7 +6967,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4786,7 +6980,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -4799,7 +6993,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -4812,7 +7006,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.47": { + "accuracy": 0.9855538352276199, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -4825,7 +7028,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -4838,16 +7041,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.43": { - "accuracy": 0.9934052276657894, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -4860,7 +7054,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -4873,7 +7067,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "o_proj": { "group_size": { "4": 32 }, @@ -4886,7 +7080,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4899,7 +7093,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -4912,7 +7106,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -4925,7 +7119,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -4938,7 +7132,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -4957,9 +7164,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.44": { - "accuracy": 0.9925676481798291, - "total_bits": 49456623616.0, + "model.layers.48": { + "accuracy": 0.9850831855437718, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -5064,16 +7271,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.45": { - "accuracy": 0.9913281861227006, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5086,7 +7284,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -5099,7 +7297,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -5112,7 +7310,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.49": { + "accuracy": 0.9848428060940932, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -5125,7 +7332,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -5138,7 +7345,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -5151,7 +7358,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -5164,7 +7371,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -5177,16 +7384,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.46": { - "accuracy": 0.9916278015589342, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5199,7 +7397,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -5212,7 +7410,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -5225,7 +7423,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5238,7 +7436,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -5251,7 +7449,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -5264,7 +7462,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.50": { + "accuracy": 0.9842289972875733, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -5277,7 +7484,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -5290,16 +7497,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.47": { - "accuracy": 0.9919057273073122, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -5312,7 +7510,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -5325,7 +7523,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "o_proj": { "group_size": { "4": 32 }, @@ -5338,7 +7536,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5351,7 +7549,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -5364,7 +7562,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -5377,7 +7575,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5390,7 +7588,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -5409,9 +7620,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.48": { - "accuracy": 0.9904983656015247, - "total_bits": 49456623616.0, + "model.layers.51": { + "accuracy": 0.9855613028921653, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -5516,16 +7727,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.49": { - "accuracy": 0.9907608616631478, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5538,7 +7740,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -5551,7 +7753,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -5564,7 +7766,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.52": { + "accuracy": 0.984318298578728, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -5577,7 +7788,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -5590,7 +7801,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -5603,7 +7814,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -5616,7 +7827,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -5629,16 +7840,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.50": { - "accuracy": 0.9900015126913786, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -5651,7 +7866,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -5664,7 +7879,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5677,7 +7892,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -5690,7 +7905,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -5703,7 +7918,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.53": { + "accuracy": 0.9847224070690572, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -5716,7 +7940,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -5729,7 +7953,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -5742,16 +7966,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.51": { - "accuracy": 0.990100669907406, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -5764,7 +7979,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -5777,7 +7992,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5790,7 +8005,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -5803,7 +8018,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -5816,7 +8031,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5829,7 +8044,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -5842,7 +8057,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -5861,9 +8076,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.52": { - "accuracy": 0.9885729716625065, - "total_bits": 49456623616.0, + "model.layers.54": { + "accuracy": 0.9827672768151388, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -5968,16 +8183,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.53": { - "accuracy": 0.9882970913313329, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -5990,7 +8196,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -6003,7 +8209,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -6016,7 +8222,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.55": { + "accuracy": 0.9829433120903559, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -6029,7 +8244,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -6042,7 +8257,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -6055,7 +8270,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -6068,7 +8283,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -6081,16 +8296,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.54": { - "accuracy": 0.9873007377609611, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6103,7 +8309,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -6116,7 +8322,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -6129,7 +8335,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6142,7 +8348,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -6155,7 +8361,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -6168,7 +8374,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.56": { + "accuracy": 0.9832319259585347, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -6181,7 +8396,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -6194,16 +8409,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.55": { - "accuracy": 0.9870856697671115, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -6216,7 +8422,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -6229,7 +8435,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "o_proj": { "group_size": { "4": 32 }, @@ -6242,7 +8448,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6255,7 +8461,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -6268,7 +8474,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -6281,7 +8487,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6294,7 +8500,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -6313,9 +8532,9 @@ "desc": "Not quantized (original precision)" } }, - "model.layers.56": { - "accuracy": 0.9580157678574324, - "total_bits": 32325176320.0, + "model.layers.57": { + "accuracy": 0.9462334433337674, + "total_bits": 32448351232.0, "q_a_proj": { "group_size": { "4": 128 @@ -6420,15 +8639,54 @@ "scale_bits": 4, "scale_groups:": 32 }, + "moe_shared_expert_gate_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_up_proj": { + "group_size": { + "2": 64 + }, + "bits": [ + 2 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, "moe_gate": { "bits": 16, "group_size": 0, "desc": "Not quantized (original precision)" } }, - "model.layers.57": { - "accuracy": 0.9854848813265562, - "total_bits": 49456623616.0, + "model.layers.58": { + "accuracy": 0.984666926873615, + "total_bits": 49646546944.0, "q_a_proj": { "group_size": { "4": 32 @@ -6533,16 +8791,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.58": { - "accuracy": 0.9876069577876478, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6555,7 +8804,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -6568,7 +8817,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -6581,7 +8830,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.59": { + "accuracy": 0.9812533727963455, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -6594,7 +8852,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -6607,7 +8865,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -6620,7 +8878,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -6633,7 +8891,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "o_proj": { "group_size": { "4": 32 }, @@ -6646,16 +8904,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.59": { - "accuracy": 0.9854305572807789, - "total_bits": 49456623616.0, - "q_a_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6668,7 +8917,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -6681,7 +8930,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -6694,7 +8943,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6707,7 +8956,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_shared_expert_up_proj": { "group_size": { "4": 32 }, @@ -6720,7 +8969,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -6733,7 +8982,16 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_gate": { + "bits": 16, + "group_size": 0, + "desc": "Not quantized (original precision)" + } + }, + "model.layers.60": { + "accuracy": 1e-06, + "total_bits": 49646546944.0, + "q_a_proj": { "group_size": { "4": 32 }, @@ -6746,7 +9004,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "q_b_proj": { "group_size": { "4": 32 }, @@ -6759,16 +9017,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_gate": { - "bits": 16, - "group_size": 0, - "desc": "Not quantized (original precision)" - } - }, - "model.layers.60": { - "accuracy": 1e-06, - "total_bits": 49456623616.0, - "q_a_proj": { + "kv_a_proj_with_mqa": { "group_size": { "4": 32 }, @@ -6781,7 +9030,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "q_b_proj": { + "kv_b_proj": { "group_size": { "4": 32 }, @@ -6794,7 +9043,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_a_proj_with_mqa": { + "o_proj": { "group_size": { "4": 32 }, @@ -6807,7 +9056,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "kv_b_proj": { + "moe_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6820,7 +9069,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "o_proj": { + "moe_expert_up_proj": { "group_size": { "4": 32 }, @@ -6833,7 +9082,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_gate_proj": { + "moe_expert_down_proj": { "group_size": { "4": 32 }, @@ -6846,7 +9095,7 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_up_proj": { + "moe_shared_expert_gate_proj": { "group_size": { "4": 32 }, @@ -6859,7 +9108,20 @@ "scale_bits": 4, "scale_groups:": 32 }, - "moe_expert_down_proj": { + "moe_shared_expert_up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4, + "scale_groups:": 32 + }, + "moe_shared_expert_down_proj": { "group_size": { "4": 32 }, @@ -6879,4 +9141,4 @@ } } } -} \ No newline at end of file +}