diff --git a/model-00001-of-00098.safetensors b/model-00001-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18e047c60ceb712d182bda13671b93caa253addd --- /dev/null +++ b/model-00001-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8990d1029d707a899f571ae09acc86219f8d88e4dc7a23caf988d73d70e121e +size 933299536 diff --git a/model-00002-of-00098.safetensors b/model-00002-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b7ec2695cbeecd8f624073fda5b4f4f14ee24f0 --- /dev/null +++ b/model-00002-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddbb39b3476a49975e1fc3cb93f1cca427b87c577d9951a1e374dade3123e1e9 +size 939525160 diff --git a/model-00003-of-00098.safetensors b/model-00003-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bcf3bb1b20f5e8050e392474d99a2d4141e9c12c --- /dev/null +++ b/model-00003-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:426f70f5ef542f5fe6b635a8fdaf9cdaccb5416eb10813b723288591824ee961 +size 939525160 diff --git a/model-00004-of-00098.safetensors b/model-00004-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c08317b7285fde36433a0925ee50377186ae211e --- /dev/null +++ b/model-00004-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:721a52121f90937fd8b98d853fb5c90bbec66a46461c7bfa5f7e191bed474a1e +size 906053328 diff --git a/model-00005-of-00098.safetensors b/model-00005-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3a425ad3e988c3dedd94e80d7c7d842ac1cf69b --- /dev/null +++ b/model-00005-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116c7ad8065b471bd295ffc85ac4985d56dbcc7cfa5bbb501f51f8089a8c84b3 +size 939525160 diff --git a/model-00006-of-00098.safetensors b/model-00006-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5dddb1eca55d01f292c3f5f92df24956d679ffb --- /dev/null +++ b/model-00006-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b7c5735cbcf78bd95fba3f4251bf54b87cd00714f095ddd8da00f5c76795bf +size 939525160 diff --git a/model-00007-of-00098.safetensors b/model-00007-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5524f01a7df0639ddfa5423a2ca6fe2b17a046c4 --- /dev/null +++ b/model-00007-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70dff1389df359b5f9be178d55301c3ac7c2f020150eb6f4b5b3a7d6168ae8a9 +size 906053328 diff --git a/model-00008-of-00098.safetensors b/model-00008-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd73548e8c269d9259bc7746434f422ecda58529 --- /dev/null +++ b/model-00008-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b305067582c66f35a69aa7efbf37fff17f5d5df9aa870ec10fb253023252fe +size 939525160 diff --git a/model-00009-of-00098.safetensors b/model-00009-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37dae2f2cb56ea24d1b5c9b58b349e6d638a2990 --- /dev/null +++ b/model-00009-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c52922929d0084ec247137c41eab79040cc578bdd5820f3d51380dc650064c26 +size 939525160 diff --git a/model-00010-of-00098.safetensors b/model-00010-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2fd559fdf7f8df7ef4a0cb441c88341ce590f5cd --- /dev/null +++ b/model-00010-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfe890611c37321c9c6917c325ea0d7aa539962864e1934644c6b1bdec0d8e12 +size 906053328 diff --git a/model-00011-of-00098.safetensors b/model-00011-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20aa9f2a4160dacaa56c3aa303bfa64f4f9bc359 --- /dev/null +++ b/model-00011-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fdcbf79b3350251956d76a9c7a7d7a8302373737fd72a23d440dc79f1a88262 +size 939525160 diff --git a/model-00012-of-00098.safetensors b/model-00012-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e42fdd83ca361044d4a4945aaeca37c2fafa3952 --- /dev/null +++ b/model-00012-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243c145c5eb72e5869832550a8afba196e9eb28ce475fc9ca7df26f0955c97e9 +size 939525160 diff --git a/model-00013-of-00098.safetensors b/model-00013-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e266d1a70ddc40315d97c65e97811d5f8756416 --- /dev/null +++ b/model-00013-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0c775299cbef04c6141fa04188790d2aed81a380f1cbdbaeccd7e9b3f3a93f +size 906053328 diff --git a/model-00014-of-00098.safetensors b/model-00014-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37798d9e9f587bec4c898fc9d8defb375cb03de2 --- /dev/null +++ b/model-00014-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73f8f3fc235066fc04007b6acf2eeadc5a7c88e84150af0c3c0170852ce2e3b +size 939525160 diff --git a/model-00015-of-00098.safetensors b/model-00015-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8451cbbfef7a90cb4b9d08dc83e4f83b04042af4 --- /dev/null +++ b/model-00015-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3720cdabb8955792117c6571b6d005e7aaeec34ac0e732e33d10a68c04b974d +size 939525160 diff --git a/model-00016-of-00098.safetensors b/model-00016-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62972f04fa83e4238f735fe2e3601058414dfd51 --- /dev/null +++ b/model-00016-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c8248be02468fa55ac955d1b2873a1f7c507c63c29683ebe172cbf1d73abd62 +size 906053328 diff --git a/model-00017-of-00098.safetensors b/model-00017-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de917929e8e88a106f38d7175ff04bd9ee28d5aa --- /dev/null +++ b/model-00017-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a6639edb5c05a65b85998eb58e29462f37bc6b472224bfae73977f26653f2fa +size 939525160 diff --git a/model-00018-of-00098.safetensors b/model-00018-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eae6f3eb7989ff0c6325835eb51b082ecc59d206 --- /dev/null +++ b/model-00018-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d177e4b5519edddd0bd815aa79241c3839a85bd83c80a293e198ff9573367730 +size 939525160 diff --git a/model-00019-of-00098.safetensors b/model-00019-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a304f1da801e99cec932a7ecc6d97ad5a437a0b7 --- /dev/null +++ b/model-00019-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79161121aa7e4e63319ef7b85c7edcaf8914e6be0374f6e06d4dce22dfcae3ea +size 989873768 diff --git a/model-00020-of-00098.safetensors b/model-00020-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01f2a2401e4e8c89f10a699674d6db8e19ed535c --- /dev/null +++ b/model-00020-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:514c79ab0e882cbbd22a4e98517c98bedcf50efa04d1c8768f73ad76ee039583 +size 973145360 diff --git a/model-00021-of-00098.safetensors b/model-00021-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccdd64b18ae1881cc647a9733fe317367bd8f37c --- /dev/null +++ b/model-00021-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8162756844443521b4cf66ed784dac25426957980b63caa63458c4f65e88f79c +size 939525160 diff --git a/model-00022-of-00098.safetensors b/model-00022-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c21a685e9ac0b773d11c4269d61a0071b3ee64c3 --- /dev/null +++ b/model-00022-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d62d05599a311650ee723b01b416be77df188a97ebba765b087304b9d85a2ed +size 989873768 diff --git a/model-00023-of-00098.safetensors b/model-00023-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39b4c6fe42c2462f2da6e986795ae26cad942b21 --- /dev/null +++ b/model-00023-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e9fa9cfead9676e1d914c6b4a3dfdc701550293d12196cb209a553386640d8 +size 973145360 diff --git a/model-00024-of-00098.safetensors b/model-00024-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15442ff4e3becd6b74d3e25b95eb0835f012b2a1 --- /dev/null +++ b/model-00024-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:880be671549171ebd91426cbad81227b85fcb6aa778747aa704e7eb4c7cc1198 +size 939525160 diff --git a/model-00025-of-00098.safetensors b/model-00025-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c5cd00e95dd5ddeedb33f174db276d0e32da00b --- /dev/null +++ b/model-00025-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9418bfbef1b3c1f348e7fae36db4d153b7c834cb56d6a015c83ec53b21813e +size 989873768 diff --git a/model-00026-of-00098.safetensors b/model-00026-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af67a21d956aa4e00098f38c49f4cd50236078a0 --- /dev/null +++ b/model-00026-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07fa139d24b4506e2179ee698ec8ee152f91921f6cabc0001fe3a27d654d5004 +size 973145360 diff --git a/model-00027-of-00098.safetensors b/model-00027-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abd4610e3a3e6724e723d4caec470af7d6ae1dfa --- /dev/null +++ b/model-00027-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c86311c5153ef55bd07ea85c2027eba829e59329a99585408d3ec69e8d8bc6 +size 939525160 diff --git a/model-00028-of-00098.safetensors b/model-00028-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d379add01d9f6f7eab4e18a5ceb86ee3283f7380 --- /dev/null +++ b/model-00028-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee49f9f8b4a8b96c28980412119bbc8fb0b4b9439da6b8819187e825cb13aa4b +size 989873768 diff --git a/model-00029-of-00098.safetensors b/model-00029-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2990249198469cc1092e2c8e167d73e22c20a7bc --- /dev/null +++ b/model-00029-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d66957a454fae2964d42d5d6caa380e46d40d682c1c218dcfcb7f82b0fd93c +size 973145360 diff --git a/model-00030-of-00098.safetensors b/model-00030-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2d0a20172b8fe3968a694f94739ec42f569d1ee --- /dev/null +++ b/model-00030-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ce9b00b4107d4cfaca616f72d6b106ba847de5f7ad2fb9c0b0e1f00a7623144 +size 939525160 diff --git a/model-00031-of-00098.safetensors b/model-00031-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95ca9249cd06817d43415a4d2d05b4c2fa9d2cea --- /dev/null +++ b/model-00031-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8755d1932036c223abe6d05b9dfb3df0f24f4b0f8b6270d3c70402e2766e601 +size 989873760 diff --git a/model-00032-of-00098.safetensors b/model-00032-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7559088170adb272f79370f0b8efd11544dcc48f --- /dev/null +++ b/model-00032-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a133bde845195c7ecfc991b352689f6190b736a6fd580ec654dcb75fa8016ec9 +size 973145376 diff --git a/model-00033-of-00098.safetensors b/model-00033-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9495c4f2d8c77fc17d17a6cf4906894c4ced663d --- /dev/null +++ b/model-00033-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd91bdc9f3a335e6db5869d202a0c43832fa4bba0bc88e5589ff81bd8c747f0e +size 939525168 diff --git a/model-00034-of-00098.safetensors b/model-00034-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8336cc9c81113bfada18ad26691062c1f2a99651 --- /dev/null +++ b/model-00034-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28135a066cef7115c6d95c91e447b5a86c9bab10d416635e75c94d2552210dbe +size 989873784 diff --git a/model-00035-of-00098.safetensors b/model-00035-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fd4b0314470368fabcf3682c4aea59166563fb8 --- /dev/null +++ b/model-00035-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f92f5db3f39dcaccaa83f629c21b3152d58c5e4f4d36c32647d6d73fd55849d0 +size 973145376 diff --git a/model-00036-of-00098.safetensors b/model-00036-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57fde92a656970a5c1f25a673e004f1a2bbbd8c2 --- /dev/null +++ b/model-00036-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffd1274533a967a97a7ca73e4e31102a477662676a2572369448e3a0b66b8af +size 939525168 diff --git a/model-00037-of-00098.safetensors b/model-00037-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd531d1c8424a4dacab8867bd4741d379f42f61f --- /dev/null +++ b/model-00037-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e47fb18d79262fb8cc59b9afce83cb7a835a4a0fcb803ce8a853d77eb02d06f +size 989873784 diff --git a/model-00038-of-00098.safetensors b/model-00038-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd102f5fbbae67c8ac26e72dbea1824016a1c721 --- /dev/null +++ b/model-00038-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff816f60ca98c263fb5877adb95befd4321064ce16a9a0c358314d14e6d828aa +size 973145376 diff --git a/model-00039-of-00098.safetensors b/model-00039-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8cf693a7eeea514342b48c6923248b58d59ef35 --- /dev/null +++ b/model-00039-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f39d2b643bcd37c0929a6e55adc8718923f3eab9bd6e3553d7e1a68aa66be22 +size 939525168 diff --git a/model-00040-of-00098.safetensors b/model-00040-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3072ea512c94f98b13980cca180f7e10991a1c4 --- /dev/null +++ b/model-00040-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec9a0e25b925b87f0b62cd1d4901380ea86e3c364a82ea63ad4be934241ff83 +size 989873784 diff --git a/model-00041-of-00098.safetensors b/model-00041-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a7851505d8982567203a39ab9f9256427b571e5 --- /dev/null +++ b/model-00041-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e1769e85f5878ce6b8b1136968acaaab46045e2a8b80c8bd3d0c920d9b2996 +size 973145376 diff --git a/model-00042-of-00098.safetensors b/model-00042-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ade503c27d13176cd1df92fcd600c85c9420eb2 --- /dev/null +++ b/model-00042-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3524816aecc7216c9c0ea852e33b0b6d1c4b916e6c5c01e168a9b444c7bbd9f8 +size 939525168 diff --git a/model-00043-of-00098.safetensors b/model-00043-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71f6c568f0a3bdbddd725c4878b233ba2b273a1c --- /dev/null +++ b/model-00043-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22b630be7af28c858f37881abda14db05c4edd2b66efbdcc59bdfd416caa9da +size 989873784 diff --git a/model-00044-of-00098.safetensors b/model-00044-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fad44a9fa89d15d980dbb7d014951bcf61ed922 --- /dev/null +++ b/model-00044-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b5c084958673ccc12fd1b06f33d34d217c3c5b49f27a1f819a562c7519fa75a +size 973145376 diff --git a/model-00045-of-00098.safetensors b/model-00045-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3fd59d828b155c7583a4582b5e9ae6415ff1f10 --- /dev/null +++ b/model-00045-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6686e8ee417e48d11cd46924369512130629f9a013ae662a64b676c6c8f720ad +size 939525168 diff --git a/model-00046-of-00098.safetensors b/model-00046-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5f33d5967942c8b8747951f57f995b83d185565 --- /dev/null +++ b/model-00046-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f383321e40829894e176bee07fd39ecd966ca76c6ed19a2a696685aa2ea6fa1 +size 989873784 diff --git a/model-00047-of-00098.safetensors b/model-00047-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f37c00966711fcb1209433bc5c0eeb5369a5cde --- /dev/null +++ b/model-00047-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a8f4e49b3e3fbf5a697ab6f89e50c85a58a6202110f3c250860a868bcb62a8 +size 973145376 diff --git a/model-00048-of-00098.safetensors b/model-00048-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c30e61b883eb6a96859cd9debf7cd00a9fe555f --- /dev/null +++ b/model-00048-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f653a76058f8b84d97512ad41106ddb89e103877c0d510ef3f5cda333f7a56ca +size 939525168 diff --git a/model-00049-of-00098.safetensors b/model-00049-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..212dee004bd1b8b6e3eb5099f887b395e446f06b --- /dev/null +++ b/model-00049-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c56be83520ee3fb34a33aaf0a1811fc3fad21add98a7476d0a3248fb95a0df32 +size 989873784 diff --git a/model-00050-of-00098.safetensors b/model-00050-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3db0b3ce2ca8dbae81cc05df6cccdba687fec49 --- /dev/null +++ b/model-00050-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9621627e51c8e60d1a3424581f5d78843b4e26123c86e1df870828e0ff1cbd20 +size 973145376 diff --git a/model-00051-of-00098.safetensors b/model-00051-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35bf43974340ae935b1925c6c9f320b40f69c540 --- /dev/null +++ b/model-00051-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f13b24a4116306d68d96ba9e56b775abad19aa995a4153f5ef6df9bda26f9c1d +size 939525168 diff --git a/model-00052-of-00098.safetensors b/model-00052-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8969c2afb03adc3f8b2da29506cc7286927da0eb --- /dev/null +++ b/model-00052-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e715dd405aa9b4c92f8a9e2bbc0facb112a74eff213f1de071a0e0584c0e5a +size 989873784 diff --git a/model-00053-of-00098.safetensors b/model-00053-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e13b3b4094c2205f56d9f2026b009b7b0a0ee21 --- /dev/null +++ b/model-00053-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81b736678bc28e9774080ca46dee5d078cb4fd25883c0dc1c336f803a2331bdd +size 973145376 diff --git a/model-00054-of-00098.safetensors b/model-00054-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb04c980893725ce9861cd424c4ec6dcafe5e59e --- /dev/null +++ b/model-00054-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3006936b85afa9b68b5fc0694e9eaed12bf0e829f0b4b198f1619b395b75a1fa +size 939525168 diff --git a/model-00055-of-00098.safetensors b/model-00055-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbaad646395960ae5432b47c308e646519944077 --- /dev/null +++ b/model-00055-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:265facbcdb8d4dc4f153939dd8e1ea5fa029d036a7330323e5c66c46475b837b +size 989873784 diff --git a/model-00056-of-00098.safetensors b/model-00056-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d669b394710f0500243379ae8715d934f9c2584 --- /dev/null +++ b/model-00056-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53d604a2409460b603452e9426a6ee33bc22a2eeb78b5709251f1cb42de2831 +size 973145376 diff --git a/model-00057-of-00098.safetensors b/model-00057-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18e411c91b8486acd4e4830f92b1b2136766fba4 --- /dev/null +++ b/model-00057-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b5c16f8f9398ce89f00ebe1aaa38511cd351c7f56eb3504ee059abd86e3477 +size 939525168 diff --git a/model-00058-of-00098.safetensors b/model-00058-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6dd4496834d7d436ed12e859f6e39216fe6cb815 --- /dev/null +++ b/model-00058-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0781571b879572058730e23453a46736f4970c014689d322d3e2c9b4957f1a6 +size 989873784 diff --git a/model-00059-of-00098.safetensors b/model-00059-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..264c32f50821fad187a81331865d84344f7b55ff --- /dev/null +++ b/model-00059-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666073e527ac319c768dfb806b64becdd64d83458713d81decf867c2fe558d0b +size 973145376 diff --git a/model-00060-of-00098.safetensors b/model-00060-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2aef832e63269a2a912d1367e5227dfb4e0dbbfe --- /dev/null +++ b/model-00060-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1aacc0e8a5c86ee193bc74a62654daad8b429ee69753cde63d28e6518f95fa +size 939525168 diff --git a/model-00061-of-00098.safetensors b/model-00061-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93d51e682582ed17d7a00c8dfd064fef8ca9cca9 --- /dev/null +++ b/model-00061-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d43d24a93f501f6a26b998a57abde6f2fad56560bfd5caabfbe84559927a11 +size 989873784 diff --git a/model-00062-of-00098.safetensors b/model-00062-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cca92d8654d993eebb01cec072c159e00e18238 --- /dev/null +++ b/model-00062-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e1e0e69b623e35bc47fad77ea2f787b5b05d087db96bfba3cdb890e31ceb882 +size 973145376 diff --git a/model-00063-of-00098.safetensors b/model-00063-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..747bcac6f82e0a6f333306872818a2455ed675bf --- /dev/null +++ b/model-00063-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8abe07242bfb034d17e03a38633e6cf1fb8a1ebfa91100fccb7f1e58f30d4b4c +size 939525168 diff --git a/model-00064-of-00098.safetensors b/model-00064-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f8c035cce3da44030ac4c3db83b1805242ec1f6 --- /dev/null +++ b/model-00064-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7136988bc43f38ab6b63a6eac9b110764c556cb665e5477cc52ef0d30639dd +size 989873784 diff --git a/model-00065-of-00098.safetensors b/model-00065-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a1bb8e9090c715470b48daa03ce396373a49019 --- /dev/null +++ b/model-00065-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a574300bfa2c02836b11e1df40aae9c9298e69efae86f65957db8309cb83d496 +size 973145376 diff --git a/model-00066-of-00098.safetensors b/model-00066-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cda4baa27d08b40cf1f62552e8322e404aa784ac --- /dev/null +++ b/model-00066-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf2d3132b05781978b7c8be973a52b74ecff83ab83d3cfc69bde48b0172892c +size 939525168 diff --git a/model-00067-of-00098.safetensors b/model-00067-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec21d3fc10d2bb9c778f3e87743a07910f4e18fe --- /dev/null +++ b/model-00067-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11903ad84ed8455125ffda1e54e3360d7206635a653694fe66bce6ffd448641c +size 989873784 diff --git a/model-00068-of-00098.safetensors b/model-00068-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d55d98173d6390147af1b79af64161d1d42faccb --- /dev/null +++ b/model-00068-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0365c64cb0580652a8c111337f22d67008545a79f099db2d407e4c8ff7b466b9 +size 973145376 diff --git a/model-00069-of-00098.safetensors b/model-00069-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ddfec0dc63b0986b67da6cd14aacb98cff6f0e7 --- /dev/null +++ b/model-00069-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9526e81118c5e95b4f8c4fa322f0f3689eec23c22c80a53578802eebeb901345 +size 939525168 diff --git a/model-00070-of-00098.safetensors b/model-00070-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..336042a3284b9c45c8aa1a8154b59cda84777376 --- /dev/null +++ b/model-00070-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8865e955599f5c496e8627098563fabe1dfd54c1329c2f70be75079124678624 +size 989873784 diff --git a/model-00071-of-00098.safetensors b/model-00071-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da67fd2f67d0ae01ff247d0b7aada40dda0028b5 --- /dev/null +++ b/model-00071-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3392bf4ca69d8b9c47869dcdf1da2cd24cd0075e065331f37bebd4c14ee5bf8 +size 973145376 diff --git a/model-00072-of-00098.safetensors b/model-00072-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..919c85c070d4924cf8d9733e19176304fd84fb95 --- /dev/null +++ b/model-00072-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3561eff62711ccd037d6eb4a6f09dfbe77e5f1503f27ae2b0d9efef2557593fb +size 939525168 diff --git a/model-00073-of-00098.safetensors b/model-00073-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4abb597e6d40305d77528693ccf000a0fe469c9 --- /dev/null +++ b/model-00073-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a42ad1377bfb96d143ba41e34ad69b6384503f01729b01185ba6276cd2898450 +size 989873784 diff --git a/model-00074-of-00098.safetensors b/model-00074-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13cbb3b3993b579647deafbc9be57dc6c5f89182 --- /dev/null +++ b/model-00074-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b2504733ec5972a44f8b3408d1bc59a57c4aff75061dd0de612439f9371c0e +size 973145376 diff --git a/model-00075-of-00098.safetensors b/model-00075-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd6eeb326696653909185085f305eef1f097ea5b --- /dev/null +++ b/model-00075-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe3c8157c3f9a7be460d52a77090b31c7f60f703a5997080ee2b1310153e63a4 +size 939525168 diff --git a/model-00076-of-00098.safetensors b/model-00076-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c98dc743bbe2d2f0f96d58ab093d50777a861f0 --- /dev/null +++ b/model-00076-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49a203d51c90374f3c13f525ab26fea6603691b8b91a4152543c6a3807741d32 +size 989873784 diff --git a/model-00077-of-00098.safetensors b/model-00077-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d58b25ab0cf60c87e0160893f6ea8ec9fba46dfa --- /dev/null +++ b/model-00077-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edffe54a0801ed207cb86db2394d8154a86d587a8a2fcc656d49566d7114c089 +size 973145376 diff --git a/model-00078-of-00098.safetensors b/model-00078-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73f7ba1a5c3ee0409ce641c625ab4aa840dadeb5 --- /dev/null +++ b/model-00078-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615a798622593b790f09ab5b00253e8008181f3e46270071b4d1d1075f6259a5 +size 939525168 diff --git a/model-00079-of-00098.safetensors b/model-00079-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ba0b215a29a8cdd37cfcf74cabb12860fb7b291 --- /dev/null +++ b/model-00079-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43741dc8cce72e1b91318ea06c0c4a038964d6ef7f6ff1a7167c107bd0078b5 +size 989873784 diff --git a/model-00080-of-00098.safetensors b/model-00080-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0c85c8a99ee8123ab3a133ead1e69653133d0e6 --- /dev/null +++ b/model-00080-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955b1c2d8c723e221bc983fc5dfb36a7bd0e3bd06d4ada8140f1626a58d6496f +size 973145376 diff --git a/model-00081-of-00098.safetensors b/model-00081-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3d105fbdda7423046f396a102abff9de4292963 --- /dev/null +++ b/model-00081-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a3275832a4b1532c89372b0421c41d9656e9ffa1c5727879b09c3e1b66cccb +size 939525168 diff --git a/model-00082-of-00098.safetensors b/model-00082-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4849e7ed5720ba334c5e887cc4488d6022c23ea5 --- /dev/null +++ b/model-00082-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ab837f4bf94184a82c0642cf6551425cf78b7e41828f87611cbef8af042e60 +size 989873784 diff --git a/model-00083-of-00098.safetensors b/model-00083-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aaca9185be0827ce864f39491c20d01fe713d274 --- /dev/null +++ b/model-00083-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e87aa33ebecc4c62e203b7e8c2437ede122735857a63c3f1b89cc7005fe05184 +size 973145376 diff --git a/model-00084-of-00098.safetensors b/model-00084-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7145636b7b9d41180e255d5a5f3d391acfe62a8e --- /dev/null +++ b/model-00084-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e760932fe3ed9b19ac49b40d84f16c39737d7b10816fd5d26244516f43abcd1 +size 939525168 diff --git a/model-00085-of-00098.safetensors b/model-00085-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8e7b435a328241f51951a7e86976af60de3df01 --- /dev/null +++ b/model-00085-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f012e85704a891e3c88fa633974bfc99723143b6d3d750cd2122d2ee3c4ae3 +size 989873784 diff --git a/model-00086-of-00098.safetensors b/model-00086-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c6a906ed8f0d25bd413b9356713199da9f2bb3a --- /dev/null +++ b/model-00086-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f60aed86deef08f197df5c53dffe784b5026f7c460fafa5d0712d5906778f7 +size 973145376 diff --git a/model-00087-of-00098.safetensors b/model-00087-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c887768c00676372d77ac6e6646314244a8dd255 --- /dev/null +++ b/model-00087-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed0eb244683a7da1221645ec90a058c0768d212700575c1e391fa5d4ac1a8f97 +size 939525168 diff --git a/model-00088-of-00098.safetensors b/model-00088-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b892c55fc260e13f0447d73f491a4aec1ffc18a --- /dev/null +++ b/model-00088-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a87c920afe9de9ad917f210cbd41f7016c34f06d6b5554a908bb7c327c93bb11 +size 989873784 diff --git a/model-00089-of-00098.safetensors b/model-00089-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..433c217e0c1e6c04ba222e1470a45a2830cce035 --- /dev/null +++ b/model-00089-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a375478d1e44eaec7159175deb44cdc584d91109ca019fcc39de4d9608b45c2 +size 973145376 diff --git a/model-00090-of-00098.safetensors b/model-00090-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5532edc87ab5f2f4615f3c0c39d45127a56ba1d0 --- /dev/null +++ b/model-00090-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d8c90247e9d9a11b1ff8e423953969440d383117a25fe602afaa3b751b8d20 +size 939525168 diff --git a/model-00091-of-00098.safetensors b/model-00091-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12e6e516e6918fd667b42badff0cd9bbb8e2cea8 --- /dev/null +++ b/model-00091-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b34a43dc43455fb7ee2c8006b3f76f47b1bda1f26b5d7dd76a606645940b78 +size 989873784 diff --git a/model-00092-of-00098.safetensors b/model-00092-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f01bd88ce8d967cb309b3b9e15374a9967aa2dd6 --- /dev/null +++ b/model-00092-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d97299b26e32108bca7b1697af7fd06191f9fef07369bc0fe0cd20685b6f36 +size 973145376 diff --git a/model-00093-of-00098.safetensors b/model-00093-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1fa68d1446d77d6834b8042e58532cfde0d3310 --- /dev/null +++ b/model-00093-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a953706571ffd8fbec5fd56e0007d86351813955e0af1728211856d1060852f +size 939525168 diff --git a/model-00094-of-00098.safetensors b/model-00094-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1408862c20c14a417bc5a46631fe86cc294915a --- /dev/null +++ b/model-00094-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74db3ecc978a2d11c7ab26292ce440baf64aa66b25c7ac6dc33800e6d4632b4 +size 989873784 diff --git a/model-00095-of-00098.safetensors b/model-00095-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2394d48cc0fe446fed02964f334383d5da34646 --- /dev/null +++ b/model-00095-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e3c6351cc4bf879577c633a9dac06fc90d33d1e351525bdccd10d44bf1f9472 +size 973145376 diff --git a/model-00096-of-00098.safetensors b/model-00096-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c2086061c54a64d49de5c85bad1f0cbe1543d6b --- /dev/null +++ b/model-00096-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2495f2021e2e0957963c798771a8d08a12f7d79854f43a99464f516790a53c +size 939525168 diff --git a/model-00097-of-00098.safetensors b/model-00097-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4a7809986633bac8bfddef20bcb6658cd3a08f4 --- /dev/null +++ b/model-00097-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c297dc47c1771370bd41decde37116d9c8f276667b7db08c0da6334b47d166fe +size 939550064 diff --git a/model-00098-of-00098.safetensors b/model-00098-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b06c66f84f094f1271667f6fe311f6609e2844f --- /dev/null +++ b/model-00098-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858d9595b7f5849aaf0eda8f3679ab6b7f30a01f49ba34859128c6a914453dd9 +size 262144128 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..e70e660c9d5539791271aeac2a37b01a1f9d62e7 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1002 @@ +{ + "metadata": { + "total_size": 93405585408 + }, + "weight_map": { + "lm_head.weight": "model-00098-of-00098.safetensors", + "model.embed_tokens.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00098.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00098.safetensors", + "model.layers.0.input_layernorm.weight": "model-00004-of-00098.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00004-of-00098.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00098.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00098.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00098.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00098.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00004-of-00098.safetensors", + "model.layers.1.input_layernorm.weight": "model-00007-of-00098.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00007-of-00098.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00004-of-00098.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00004-of-00098.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00004-of-00098.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00004-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00032-of-00098.safetensors", + "model.layers.10.input_layernorm.weight": "model-00034-of-00098.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00034-of-00098.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00031-of-00098.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00032-of-00098.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00031-of-00098.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00031-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00035-of-00098.safetensors", + "model.layers.11.input_layernorm.weight": "model-00037-of-00098.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00037-of-00098.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00034-of-00098.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00035-of-00098.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00034-of-00098.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00034-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00038-of-00098.safetensors", + "model.layers.12.input_layernorm.weight": "model-00040-of-00098.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00040-of-00098.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00037-of-00098.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00038-of-00098.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00037-of-00098.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00037-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00041-of-00098.safetensors", + "model.layers.13.input_layernorm.weight": "model-00043-of-00098.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00043-of-00098.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00040-of-00098.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00041-of-00098.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00040-of-00098.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00040-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00044-of-00098.safetensors", + "model.layers.14.input_layernorm.weight": "model-00046-of-00098.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00046-of-00098.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00043-of-00098.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00044-of-00098.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00043-of-00098.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00043-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00047-of-00098.safetensors", + "model.layers.15.input_layernorm.weight": "model-00049-of-00098.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00049-of-00098.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00046-of-00098.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00047-of-00098.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00046-of-00098.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00046-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00050-of-00098.safetensors", + "model.layers.16.input_layernorm.weight": "model-00052-of-00098.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00052-of-00098.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00049-of-00098.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00050-of-00098.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00049-of-00098.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00049-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00053-of-00098.safetensors", + "model.layers.17.input_layernorm.weight": "model-00055-of-00098.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00055-of-00098.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00052-of-00098.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00053-of-00098.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00052-of-00098.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00052-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00056-of-00098.safetensors", + "model.layers.18.input_layernorm.weight": "model-00058-of-00098.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00058-of-00098.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00055-of-00098.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00056-of-00098.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00055-of-00098.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00055-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00059-of-00098.safetensors", + "model.layers.19.input_layernorm.weight": "model-00061-of-00098.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00061-of-00098.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00058-of-00098.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00059-of-00098.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00058-of-00098.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00058-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00007-of-00098.safetensors", + "model.layers.2.input_layernorm.weight": "model-00010-of-00098.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00010-of-00098.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00007-of-00098.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00007-of-00098.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00007-of-00098.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00007-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00062-of-00098.safetensors", + "model.layers.20.input_layernorm.weight": "model-00064-of-00098.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00064-of-00098.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00061-of-00098.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00062-of-00098.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00061-of-00098.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00061-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00065-of-00098.safetensors", + "model.layers.21.input_layernorm.weight": "model-00067-of-00098.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00067-of-00098.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00064-of-00098.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00065-of-00098.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00064-of-00098.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00064-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00068-of-00098.safetensors", + "model.layers.22.input_layernorm.weight": "model-00070-of-00098.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00070-of-00098.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00067-of-00098.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00068-of-00098.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00067-of-00098.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00067-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00071-of-00098.safetensors", + "model.layers.23.input_layernorm.weight": "model-00073-of-00098.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00073-of-00098.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00070-of-00098.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00071-of-00098.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00070-of-00098.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00070-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00074-of-00098.safetensors", + "model.layers.24.input_layernorm.weight": "model-00076-of-00098.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00076-of-00098.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00073-of-00098.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00074-of-00098.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00073-of-00098.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00073-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00077-of-00098.safetensors", + "model.layers.25.input_layernorm.weight": "model-00079-of-00098.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00079-of-00098.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00076-of-00098.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00077-of-00098.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00076-of-00098.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00076-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00080-of-00098.safetensors", + "model.layers.26.input_layernorm.weight": "model-00082-of-00098.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00082-of-00098.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00079-of-00098.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00080-of-00098.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00079-of-00098.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00079-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00083-of-00098.safetensors", + "model.layers.27.input_layernorm.weight": "model-00085-of-00098.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00085-of-00098.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00082-of-00098.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00083-of-00098.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00082-of-00098.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00082-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00086-of-00098.safetensors", + "model.layers.28.input_layernorm.weight": "model-00088-of-00098.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00088-of-00098.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00085-of-00098.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00086-of-00098.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00085-of-00098.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00085-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00089-of-00098.safetensors", + "model.layers.29.input_layernorm.weight": "model-00091-of-00098.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00091-of-00098.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00088-of-00098.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00089-of-00098.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00088-of-00098.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00088-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00010-of-00098.safetensors", + "model.layers.3.input_layernorm.weight": "model-00013-of-00098.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00013-of-00098.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00010-of-00098.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00010-of-00098.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00010-of-00098.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00010-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00092-of-00098.safetensors", + "model.layers.30.input_layernorm.weight": "model-00094-of-00098.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00094-of-00098.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00091-of-00098.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00092-of-00098.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00091-of-00098.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00091-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00095-of-00098.safetensors", + "model.layers.31.input_layernorm.weight": "model-00097-of-00098.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00097-of-00098.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00094-of-00098.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00095-of-00098.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00094-of-00098.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00094-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00013-of-00098.safetensors", + "model.layers.4.input_layernorm.weight": "model-00016-of-00098.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00016-of-00098.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00013-of-00098.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00013-of-00098.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00013-of-00098.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00013-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00016-of-00098.safetensors", + "model.layers.5.input_layernorm.weight": "model-00019-of-00098.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00019-of-00098.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00016-of-00098.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00016-of-00098.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00016-of-00098.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00016-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00020-of-00098.safetensors", + "model.layers.6.input_layernorm.weight": "model-00022-of-00098.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00022-of-00098.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00019-of-00098.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00020-of-00098.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00019-of-00098.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00019-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00023-of-00098.safetensors", + "model.layers.7.input_layernorm.weight": "model-00025-of-00098.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00025-of-00098.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00022-of-00098.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00023-of-00098.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00022-of-00098.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00022-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00026-of-00098.safetensors", + "model.layers.8.input_layernorm.weight": "model-00028-of-00098.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00028-of-00098.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00025-of-00098.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00026-of-00098.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00025-of-00098.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00025-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00029-of-00098.safetensors", + "model.layers.9.input_layernorm.weight": "model-00031-of-00098.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00031-of-00098.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00028-of-00098.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00029-of-00098.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00028-of-00098.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00028-of-00098.safetensors", + "model.norm.weight": "model-00097-of-00098.safetensors" + } +} \ No newline at end of file