diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ffa0b8b2ad1615500cbef6743aef35ef97cab369 --- /dev/null +++ b/config.json @@ -0,0 +1,42 @@ +{ + "_name_or_path": "LatitudeGames/Wayfarer-Large-70B-Llama-3.3", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128009, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "mixtral", + "num_attention_heads": 64, + "num_experts_per_tok": 2, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "num_local_experts": 2, + "output_router_logits": false, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.49.0", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/mergekit_moe_config.yml b/mergekit_moe_config.yml new file mode 100644 index 0000000000000000000000000000000000000000..2963bd2baae0ee1eff970b50d2a8da87c46f2019 --- /dev/null +++ b/mergekit_moe_config.yml @@ -0,0 +1,6 @@ +base_model: LatitudeGames/Wayfarer-Large-70B-Llama-3.3 +gate_mode: random +dtype: bfloat16 +experts: + - source_model: LatitudeGames/Wayfarer-Large-70B-Llama-3.3 + - source_model: SicariusSicariiStuff/Negative_LLAMA_70B diff --git a/model-00001-of-00052.safetensors b/model-00001-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97e0054f709bf0111479ceb769d4ca0282ac75b6 --- /dev/null +++ b/model-00001-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876b6749b30ad7736f360f278f3b0b54daff7f1e3111817dd160cbecaa8f06c8 +size 4752180696 diff --git a/model-00002-of-00052.safetensors b/model-00002-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9381cfeaf1a4b1004eed79c4bcb85b5aec7f3e0a --- /dev/null +++ b/model-00002-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6b581582355897a665bf8abfbdbe059f3a4e93b2f24f85bcecdb1dab857e496 +size 4831906352 diff --git a/model-00003-of-00052.safetensors b/model-00003-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8055443fad7823f7ae9f647046bcefdd5ec0929a --- /dev/null +++ b/model-00003-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97cb7ff14a2bf37507879e8b0e3db15c44524b0dc9cc77326e776cc33a76a28 +size 4999661672 diff --git a/model-00004-of-00052.safetensors b/model-00004-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7edbbbf3c6f0291b7ca3b07ce5ceec17d613c65 --- /dev/null +++ b/model-00004-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4a35936906bb8ee103d63a6b60496abcfdc232b940fb0313a01a882b2682e5 +size 4831889856 diff --git a/model-00005-of-00052.safetensors b/model-00005-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08bcfffda73a946593b6a462dabff3e22afe1f47 --- /dev/null +++ b/model-00005-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c32baadf9a53b5bb4982b0ac75e564903aff86ec71878710141bc3be7227aade +size 4831906352 diff --git a/model-00006-of-00052.safetensors b/model-00006-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b128d6ceed61cff74e22b1199d76d937b2a3538a --- /dev/null +++ b/model-00006-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2867e40dfcce1f28fff9e0c0383312d068d1015b0e6b46ff75ea8bcd846f019 +size 4999645176 diff --git a/model-00007-of-00052.safetensors b/model-00007-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7730e5ef5c68df22373bcf088f32e0fd88a1250f --- /dev/null +++ b/model-00007-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c493102d01c40cd356d5ac1c48aa96720aa0324e9e9fa0057392f44c310d02 +size 4831906352 diff --git a/model-00008-of-00052.safetensors b/model-00008-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..54ab6d5a605d2450dec94a7a34b9e4f26b396e1f --- /dev/null +++ b/model-00008-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1923a12910fccde8d63d43103eb33a494971731fd05d0a6c6ffe7f261bebbf73 +size 4999645192 diff --git a/model-00009-of-00052.safetensors b/model-00009-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6e1cfac5164e0b92944fcc49b52f3c52019b60f --- /dev/null +++ b/model-00009-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aa86f7db81ac9526459412bdb27e0b8ba29a4e85f335daea5c36cd5a1a71d78 +size 4831906376 diff --git a/model-00010-of-00052.safetensors b/model-00010-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0429354f6608651d8432c7e97f6f48458e14ebfe --- /dev/null +++ b/model-00010-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d655548d6d7dd7c84aa2d26c074cf06d02fd18f456296a5c1743fdcb79fa01 +size 4999661688 diff --git a/model-00011-of-00052.safetensors b/model-00011-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d0e9bc3af7956af9ad68846c99168d6c843f435 --- /dev/null +++ b/model-00011-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e04e6abb5fd5a4a9a32ee80f0acd65c156c9eb06badbcc1e2c80baffe0ea66 +size 4831889880 diff --git a/model-00012-of-00052.safetensors b/model-00012-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed4265131cedf5acc69b8bc528ee0576115172cb --- /dev/null +++ b/model-00012-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a5e09eaee2704abffc3980c393ecfc6c6bb0bd16c965b6589062fd1b0e73be +size 4831906376 diff --git a/model-00013-of-00052.safetensors b/model-00013-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1022e686fb63fc06a6c9ad731927236de7a5c8a --- /dev/null +++ b/model-00013-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7759f157402f08516b5ef54fd8352ad7fefa0feab5930d58f3cbe3e3655203ea +size 4999645192 diff --git a/model-00014-of-00052.safetensors b/model-00014-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34e17cfe52b814fbb6427298d423c75c6fbb3e2a --- /dev/null +++ b/model-00014-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb57570c4f8c901c8bab7f80174cf738d275561074fc8f44ce72657bf2e4f889 +size 4831906376 diff --git a/model-00015-of-00052.safetensors b/model-00015-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f8072b06b8e990467cbb1a581afc59006af02a1 --- /dev/null +++ b/model-00015-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c2160e095cbb8bfe799deeacda4578241fa59b17a6b00cd7aece298ef401bb +size 4999645192 diff --git a/model-00016-of-00052.safetensors b/model-00016-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc17c494d663e5f75d76ce899906e63fafcd99a0 --- /dev/null +++ b/model-00016-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196b296ca9446cbb8307c4f7f84231cad628b291c0298d06bc3f569501821524 +size 4831906376 diff --git a/model-00017-of-00052.safetensors b/model-00017-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..765519f20dcf572d9838e598e6925b5353732c0f --- /dev/null +++ b/model-00017-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7be2b846ee89cdf0440df0847d005db561bddb296a7216cd9f6bd8058c6cd0 +size 4999661688 diff --git a/model-00018-of-00052.safetensors b/model-00018-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4675d47d75beb64ccb32f9e58487803251098df3 --- /dev/null +++ b/model-00018-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6e42ff589378af1c3cd1b912672326dd5a15ca10095120a31ece57cfce474f3 +size 4831889880 diff --git a/model-00019-of-00052.safetensors b/model-00019-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..406b57ee89cd6c9f9c36edf462a13ae8f0ceac77 --- /dev/null +++ b/model-00019-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0bca2709e10aad114e1c9dec31c7f0ec5926af813838064f98b5b4236a5f2f3 +size 4831906376 diff --git a/model-00020-of-00052.safetensors b/model-00020-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1dc2b27abe2a66b3c898664e8146cfffa701836d --- /dev/null +++ b/model-00020-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f78924ffbf2d07aa57e3540d9c7bef7d68c4bccbd5d37a0dadc7e4f88835686d +size 4999645192 diff --git a/model-00021-of-00052.safetensors b/model-00021-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d35041c58623787f65c27ad865ce2c13513653f0 --- /dev/null +++ b/model-00021-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c996094abb459272bad1020fa3f3b19cdb8687274f4ac67414966fc6bcd44caa +size 4831906376 diff --git a/model-00022-of-00052.safetensors b/model-00022-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17dab93fc3528e17f05eeff8e0c88409195d1bf3 --- /dev/null +++ b/model-00022-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4889c22ccbfa516a7b736b44b5c292334551613c5f1fcf981c4bcbc5724f7735 +size 4999645192 diff --git a/model-00023-of-00052.safetensors b/model-00023-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4be9eb6c4e606ba4f53818043955b22511a6ba58 --- /dev/null +++ b/model-00023-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fdf0efc05b7111738dd6ac218fc8e65a2a0447e964c7d6ad61d0645ac398a3e +size 4831906376 diff --git a/model-00024-of-00052.safetensors b/model-00024-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..380c0cced6e806bb1c9b7be5bd8a551bf87f7155 --- /dev/null +++ b/model-00024-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b93f51063c2868e4007a7fd33982f6ee22cb3b5f963190f2155b22f8913954c +size 4999661688 diff --git a/model-00025-of-00052.safetensors b/model-00025-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4263d20f8870ec1b10e5ec50c6909199ed0d7fa --- /dev/null +++ b/model-00025-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c882a3b02e01e5b462d23831c4a87943a71204ca72dd6b5911a3e645ba9e7d +size 4831889880 diff --git a/model-00026-of-00052.safetensors b/model-00026-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b59e92d47d531e89727bc61a6e6311f6bc8bfcbb --- /dev/null +++ b/model-00026-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115b1c5ed4ff40d7cfdef2606d7c8e500fbb26dc48638c8ed8340e403f7f1a3b +size 4831906376 diff --git a/model-00027-of-00052.safetensors b/model-00027-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a53fe28a6cf14dd5a7ed8bc391fbd5bef107de1f --- /dev/null +++ b/model-00027-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5d7be5a1415659dd7a45c21a73817472c44ca844f23fcb27c20b0805e73d73d +size 4999645192 diff --git a/model-00028-of-00052.safetensors b/model-00028-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..972e7340ef844a9f4d32bcca3e950fa058c5320d --- /dev/null +++ b/model-00028-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6665697f453c53010baf2d0a74b4f2489a73235ae95933560a43a3d2606bf84f +size 4831906376 diff --git a/model-00029-of-00052.safetensors b/model-00029-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02dfd97c5ccf4fb644c0804ae4ebe3e205ef37f3 --- /dev/null +++ b/model-00029-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6ac2f3e866b9f92d4e115f025ff9e0cf1332078a8ddd72feb45c4e38a7dc41 +size 4999645192 diff --git a/model-00030-of-00052.safetensors b/model-00030-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56039140d2601d923e8b117f127c77e11603f1cd --- /dev/null +++ b/model-00030-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87526ccc05b95c9bbc62656eb952e10967815a4d8e32e8ba839d4809a17fd6df +size 4831906376 diff --git a/model-00031-of-00052.safetensors b/model-00031-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e1817289037c28a080fe7741a0330900cc66604 --- /dev/null +++ b/model-00031-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4013dc988aa5275000e428428e6a10725433a10eeebda65c6adac144d52a230c +size 4999661688 diff --git a/model-00032-of-00052.safetensors b/model-00032-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3b0bf0f29cd80c2842566709a4dcc05642974f4 --- /dev/null +++ b/model-00032-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4921008d311516014c268c351f9db5ebc261014f4b19928b1c1746d57fb5368b +size 4831889880 diff --git a/model-00033-of-00052.safetensors b/model-00033-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0deb001a878f8ab4f67fcf0a5cc5fad9fdfe7e40 --- /dev/null +++ b/model-00033-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:123b789dbf3b71d29996b9e1b4107066d8c392be6547bbea7f325011235d7450 +size 4831906376 diff --git a/model-00034-of-00052.safetensors b/model-00034-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14f5950a03308f9e1cb73f76282aa0b835f362ed --- /dev/null +++ b/model-00034-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1075f5548a04ad6e4da209ef1a3fc73e6eb4583bce0edb2c716965b6ae69e363 +size 4999645192 diff --git a/model-00035-of-00052.safetensors b/model-00035-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f904a7f4184a619ff79d33bdc98357f9d255b492 --- /dev/null +++ b/model-00035-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55d8944875821715ffb7342a67d7138b79790753ef907b5331da98e62bf095c +size 4831906376 diff --git a/model-00036-of-00052.safetensors b/model-00036-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e26fa5b02be9ec61a3c32987753939c50a2c7e35 --- /dev/null +++ b/model-00036-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42cae457e34083242cd9c153c4d561a31fd6cd31db5a1e344a41b3fe20677598 +size 4999645192 diff --git a/model-00037-of-00052.safetensors b/model-00037-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4bdb4ffc3e70fddbd369b61b716b0518425b3e0 --- /dev/null +++ b/model-00037-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff9f6ade58567d2c0070e3bb3a8c16524cdab954429657f9ebe49a03dd6f7d14 +size 4831906376 diff --git a/model-00038-of-00052.safetensors b/model-00038-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..247b0e8f1fdeb0db21f936cd831e5e97abbd93a6 --- /dev/null +++ b/model-00038-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b3451c8a072c0ed72fb9ac31d39ea180b1d0acb3f03b55bd2984e0e8cfc98a +size 4999661688 diff --git a/model-00039-of-00052.safetensors b/model-00039-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9a1580a53aeec6f279aa9efb3ba3c747099e0ea --- /dev/null +++ b/model-00039-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f008b595e394807e8e7c90983b0c9d9d138bc4ab578103d3ffb0df2bf477a2b +size 4831889880 diff --git a/model-00040-of-00052.safetensors b/model-00040-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f25586b6eadf567fc79532ac242cfb91eedf71c4 --- /dev/null +++ b/model-00040-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c463c3c03094834a775e504b707b12535cb14769e3761cb904dab95ec0a7a520 +size 4831906376 diff --git a/model-00041-of-00052.safetensors b/model-00041-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d65d49f9a064f2db17cf50e5a255de95287b8c56 --- /dev/null +++ b/model-00041-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a186d79a982dc19fb3bf912c7a9a11e4d0f765ef15886762671b25fc76f7ef +size 4999645192 diff --git a/model-00042-of-00052.safetensors b/model-00042-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..246952981a54241a49bcee46bdf18222e255bc38 --- /dev/null +++ b/model-00042-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cb123ed1c9863a0c192a3230c91a4d102921304ae49108f71b1d6f156b7e715 +size 4831906376 diff --git a/model-00043-of-00052.safetensors b/model-00043-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7ab438c13b99b7b8ccd9b614e122228d1d0332c --- /dev/null +++ b/model-00043-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42fbc46bb92715f4640146b0a0075e982d250f7521a0c11ad2aad86c773e0606 +size 4999645192 diff --git a/model-00044-of-00052.safetensors b/model-00044-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a190b0105625ca0c2a5b2e19cf921b4a19c809d5 --- /dev/null +++ b/model-00044-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c247202cf52d15f19a52e846a5204e6137c45d7654ffb3e87120e72d7043d9fd +size 4831906376 diff --git a/model-00045-of-00052.safetensors b/model-00045-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8056b64dc430b9d1044e4908cc61b3735f232f4 --- /dev/null +++ b/model-00045-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b5097c2f2739248df11ca87110a8cd420ab7dd470ee25f12444d013cfb8f92a +size 4999661688 diff --git a/model-00046-of-00052.safetensors b/model-00046-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed7f37a4bafdcaa06fd65234c55b95d0bb1c8862 --- /dev/null +++ b/model-00046-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ffe044f0c8d1cceb4ea245733f75d538666c15d985d6b4316a1bda1e7b9515 +size 4831889880 diff --git a/model-00047-of-00052.safetensors b/model-00047-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20e01702812631f9ed11a1a12fba5d3da228d718 --- /dev/null +++ b/model-00047-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1f808641e9e3219cd99cd6a89861b902c2c7bc55eb81ed057ba964f82af978 +size 4831906376 diff --git a/model-00048-of-00052.safetensors b/model-00048-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08b47af410d3230ae0dce0a6b520cd859e650086 --- /dev/null +++ b/model-00048-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4862e590f52e373f29c769c09225e8787dc061c90a211772ef02a57394bce1df +size 4999645192 diff --git a/model-00049-of-00052.safetensors b/model-00049-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4867353ee142ba25c7011fd14e4c0de032071c5 --- /dev/null +++ b/model-00049-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb4b40f5fab336737b3d99fb772d7bbed97caa553810ea46a27ef656723c478 +size 4831906376 diff --git a/model-00050-of-00052.safetensors b/model-00050-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc4fa94250bcb0c3980b8f5633239d4027005488 --- /dev/null +++ b/model-00050-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2ab0d8078f48a59983d02711f160a48d2d85f26331f6307ce8863dbe2d4bdc +size 4999645192 diff --git a/model-00051-of-00052.safetensors b/model-00051-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76c986b79bfb1417b51b59c571c78680885cb254 --- /dev/null +++ b/model-00051-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc06a77b20cbdcdb32d95ff0e8962d18b7cefb2f6fda949e6bef679736f5534f +size 4831906376 diff --git a/model-00052-of-00052.safetensors b/model-00052-of-00052.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd3efad14d4419ead5aa6aac64c57fb908aa3eba --- /dev/null +++ b/model-00052-of-00052.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f497bfdf2ca48fa2e3dacfce8361e60eabfb2bcc137bcc7f01f8154d012b65f4 +size 3983042656 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..6fb63118231d4c6dea41729a3f6689007c2f8a8c --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1 @@ +{"metadata": {"mergekit_version": "0.1.0"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00052.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00052.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00052.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00052.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00052.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00052.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00052.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00052.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00052.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00052.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00052.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00052.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00052.safetensors", "model.layers.1.input_layernorm.weight": "model-00002-of-00052.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00052.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00052.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00052.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00052.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00052.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00052.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00052.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00052.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00052.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00052.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00052.safetensors", "model.layers.2.input_layernorm.weight": "model-00002-of-00052.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00052.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00052.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00052.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00052.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00052.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00052.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00052.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00052.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00052.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00052.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00052.safetensors", "model.layers.3.input_layernorm.weight": "model-00003-of-00052.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00003-of-00052.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00003-of-00052.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00003-of-00052.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00003-of-00052.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00003-of-00052.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00052.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00052.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00052.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00052.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00052.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00052.safetensors", "model.layers.4.input_layernorm.weight": "model-00003-of-00052.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00052.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00052.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00052.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00052.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00004-of-00052.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00052.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00052.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00052.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00052.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00052.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00052.safetensors", "model.layers.5.input_layernorm.weight": "model-00004-of-00052.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00052.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00004-of-00052.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00052.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00004-of-00052.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00004-of-00052.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00052.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00052.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00052.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00052.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00052.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00052.safetensors", "model.layers.6.input_layernorm.weight": "model-00005-of-00052.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00005-of-00052.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00005-of-00052.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00005-of-00052.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00005-of-00052.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00005-of-00052.safetensors", "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00052.safetensors", "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00052.safetensors", "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00052.safetensors", "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00052.safetensors", "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00052.safetensors", "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00052.safetensors", "model.layers.7.input_layernorm.weight": "model-00005-of-00052.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00005-of-00052.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00005-of-00052.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00005-of-00052.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00005-of-00052.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00005-of-00052.safetensors", "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00052.safetensors", "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00052.safetensors", "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00052.safetensors", "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00052.safetensors", "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00052.safetensors", "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00052.safetensors", "model.layers.8.input_layernorm.weight": "model-00006-of-00052.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00006-of-00052.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00006-of-00052.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00006-of-00052.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00006-of-00052.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00006-of-00052.safetensors", "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00052.safetensors", "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00052.safetensors", "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00052.safetensors", "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00052.safetensors", "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00052.safetensors", "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00052.safetensors", "model.layers.9.input_layernorm.weight": "model-00007-of-00052.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00052.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00007-of-00052.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00052.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00007-of-00052.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00007-of-00052.safetensors", "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00052.safetensors", "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00052.safetensors", "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00052.safetensors", "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00052.safetensors", "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00052.safetensors", "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00052.safetensors", "model.layers.10.input_layernorm.weight": "model-00007-of-00052.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00007-of-00052.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00007-of-00052.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00007-of-00052.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00007-of-00052.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00007-of-00052.safetensors", "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00052.safetensors", "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00052.safetensors", "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00052.safetensors", "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00052.safetensors", "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00052.safetensors", "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00052.safetensors", "model.layers.11.input_layernorm.weight": "model-00008-of-00052.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00008-of-00052.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00008-of-00052.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00008-of-00052.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00008-of-00052.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00008-of-00052.safetensors", "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00052.safetensors", "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00052.safetensors", "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00052.safetensors", "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00052.safetensors", "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00052.safetensors", "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00052.safetensors", "model.layers.12.input_layernorm.weight": "model-00009-of-00052.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00009-of-00052.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00009-of-00052.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00009-of-00052.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00009-of-00052.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00009-of-00052.safetensors", "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00052.safetensors", "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00052.safetensors", "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00052.safetensors", "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00052.safetensors", "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00052.safetensors", "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00052.safetensors", "model.layers.13.input_layernorm.weight": "model-00009-of-00052.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00009-of-00052.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00009-of-00052.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00009-of-00052.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00009-of-00052.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00009-of-00052.safetensors", "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00052.safetensors", "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00052.safetensors", "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00052.safetensors", "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00052.safetensors", "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00052.safetensors", "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00052.safetensors", "model.layers.14.input_layernorm.weight": "model-00010-of-00052.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00010-of-00052.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00010-of-00052.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00010-of-00052.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00010-of-00052.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00010-of-00052.safetensors", "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00052.safetensors", "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00052.safetensors", "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00052.safetensors", "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00052.safetensors", "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00052.safetensors", "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00052.safetensors", "model.layers.15.input_layernorm.weight": "model-00010-of-00052.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00011-of-00052.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00011-of-00052.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00011-of-00052.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00011-of-00052.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00011-of-00052.safetensors", "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00052.safetensors", "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00052.safetensors", "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00052.safetensors", "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00052.safetensors", "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00052.safetensors", "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00052.safetensors", "model.layers.16.input_layernorm.weight": "model-00011-of-00052.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00011-of-00052.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00011-of-00052.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00011-of-00052.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00011-of-00052.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00011-of-00052.safetensors", "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00052.safetensors", "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00052.safetensors", "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00052.safetensors", "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00052.safetensors", "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00052.safetensors", "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00052.safetensors", "model.layers.17.input_layernorm.weight": "model-00012-of-00052.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00012-of-00052.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00012-of-00052.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00012-of-00052.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00012-of-00052.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00012-of-00052.safetensors", "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00052.safetensors", "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00052.safetensors", "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00052.safetensors", "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00052.safetensors", "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00052.safetensors", "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00052.safetensors", "model.layers.18.input_layernorm.weight": "model-00012-of-00052.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00012-of-00052.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00012-of-00052.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00012-of-00052.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00012-of-00052.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00012-of-00052.safetensors", "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00052.safetensors", "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00052.safetensors", "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00052.safetensors", "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00052.safetensors", "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00052.safetensors", "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00052.safetensors", "model.layers.19.input_layernorm.weight": "model-00013-of-00052.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00013-of-00052.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00013-of-00052.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00013-of-00052.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00013-of-00052.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00013-of-00052.safetensors", "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00052.safetensors", "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00052.safetensors", "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00052.safetensors", "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00052.safetensors", "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00052.safetensors", "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00052.safetensors", "model.layers.20.input_layernorm.weight": "model-00014-of-00052.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00014-of-00052.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00014-of-00052.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00014-of-00052.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00014-of-00052.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00014-of-00052.safetensors", "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00052.safetensors", "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00052.safetensors", "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00052.safetensors", "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00052.safetensors", "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00052.safetensors", "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00052.safetensors", "model.layers.21.input_layernorm.weight": "model-00014-of-00052.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00014-of-00052.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00014-of-00052.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00014-of-00052.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00014-of-00052.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00014-of-00052.safetensors", "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00052.safetensors", "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00052.safetensors", "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00052.safetensors", "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00052.safetensors", "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00052.safetensors", "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00052.safetensors", "model.layers.22.input_layernorm.weight": "model-00015-of-00052.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00015-of-00052.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00015-of-00052.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00015-of-00052.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00015-of-00052.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00015-of-00052.safetensors", "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00052.safetensors", "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00052.safetensors", "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00052.safetensors", "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00052.safetensors", "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00052.safetensors", "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00052.safetensors", "model.layers.23.input_layernorm.weight": "model-00016-of-00052.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00016-of-00052.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00016-of-00052.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00016-of-00052.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00016-of-00052.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00016-of-00052.safetensors", "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00052.safetensors", "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00052.safetensors", "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00052.safetensors", "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00052.safetensors", "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00052.safetensors", "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00052.safetensors", "model.layers.24.input_layernorm.weight": "model-00016-of-00052.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00016-of-00052.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00016-of-00052.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00016-of-00052.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00016-of-00052.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00016-of-00052.safetensors", "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00052.safetensors", "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00052.safetensors", "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00052.safetensors", "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00052.safetensors", "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00052.safetensors", "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00052.safetensors", "model.layers.25.input_layernorm.weight": "model-00017-of-00052.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00017-of-00052.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00017-of-00052.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00017-of-00052.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00017-of-00052.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00017-of-00052.safetensors", "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00052.safetensors", "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00052.safetensors", "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00052.safetensors", "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00052.safetensors", "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00052.safetensors", "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00052.safetensors", "model.layers.26.input_layernorm.weight": "model-00017-of-00052.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00018-of-00052.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00018-of-00052.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00018-of-00052.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00018-of-00052.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00018-of-00052.safetensors", "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00052.safetensors", "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00052.safetensors", "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00052.safetensors", "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00052.safetensors", "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00052.safetensors", "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00052.safetensors", "model.layers.27.input_layernorm.weight": "model-00018-of-00052.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00018-of-00052.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00018-of-00052.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00018-of-00052.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00018-of-00052.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00018-of-00052.safetensors", "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00052.safetensors", "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00052.safetensors", "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00052.safetensors", "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00052.safetensors", "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00052.safetensors", "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00052.safetensors", "model.layers.28.input_layernorm.weight": "model-00019-of-00052.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00019-of-00052.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00019-of-00052.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00019-of-00052.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00019-of-00052.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00019-of-00052.safetensors", "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00052.safetensors", "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00052.safetensors", "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00052.safetensors", "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00052.safetensors", "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00052.safetensors", "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00052.safetensors", "model.layers.29.input_layernorm.weight": "model-00019-of-00052.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00019-of-00052.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00019-of-00052.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00019-of-00052.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00019-of-00052.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00019-of-00052.safetensors", "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00052.safetensors", "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00052.safetensors", "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00052.safetensors", "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00052.safetensors", "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00052.safetensors", "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00052.safetensors", "model.layers.30.input_layernorm.weight": "model-00020-of-00052.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00020-of-00052.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00020-of-00052.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00020-of-00052.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00020-of-00052.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00020-of-00052.safetensors", "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00052.safetensors", "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00052.safetensors", "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00052.safetensors", "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00052.safetensors", "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00052.safetensors", "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00052.safetensors", "model.layers.31.input_layernorm.weight": "model-00021-of-00052.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00021-of-00052.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00021-of-00052.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00021-of-00052.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00021-of-00052.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00021-of-00052.safetensors", "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00052.safetensors", "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00052.safetensors", "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00052.safetensors", "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00052.safetensors", "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00052.safetensors", "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00052.safetensors", "model.layers.32.input_layernorm.weight": "model-00021-of-00052.safetensors", "model.layers.32.self_attn.q_proj.weight": "model-00021-of-00052.safetensors", "model.layers.32.self_attn.k_proj.weight": "model-00021-of-00052.safetensors", "model.layers.32.self_attn.v_proj.weight": "model-00021-of-00052.safetensors", "model.layers.32.self_attn.o_proj.weight": "model-00021-of-00052.safetensors", "model.layers.32.post_attention_layernorm.weight": "model-00021-of-00052.safetensors", "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00052.safetensors", "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00052.safetensors", "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00052.safetensors", "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00052.safetensors", "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00052.safetensors", "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00052.safetensors", "model.layers.33.input_layernorm.weight": "model-00022-of-00052.safetensors", "model.layers.33.self_attn.q_proj.weight": "model-00022-of-00052.safetensors", "model.layers.33.self_attn.k_proj.weight": "model-00022-of-00052.safetensors", "model.layers.33.self_attn.v_proj.weight": "model-00022-of-00052.safetensors", "model.layers.33.self_attn.o_proj.weight": "model-00022-of-00052.safetensors", "model.layers.33.post_attention_layernorm.weight": "model-00022-of-00052.safetensors", "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00052.safetensors", "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00052.safetensors", "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00052.safetensors", "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00052.safetensors", "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00052.safetensors", "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00052.safetensors", "model.layers.34.input_layernorm.weight": "model-00023-of-00052.safetensors", "model.layers.34.self_attn.q_proj.weight": "model-00023-of-00052.safetensors", "model.layers.34.self_attn.k_proj.weight": "model-00023-of-00052.safetensors", "model.layers.34.self_attn.v_proj.weight": "model-00023-of-00052.safetensors", "model.layers.34.self_attn.o_proj.weight": "model-00023-of-00052.safetensors", "model.layers.34.post_attention_layernorm.weight": "model-00023-of-00052.safetensors", "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00052.safetensors", "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00052.safetensors", "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00052.safetensors", "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00052.safetensors", "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00052.safetensors", "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00052.safetensors", "model.layers.35.input_layernorm.weight": "model-00023-of-00052.safetensors", "model.layers.35.self_attn.q_proj.weight": "model-00023-of-00052.safetensors", "model.layers.35.self_attn.k_proj.weight": "model-00023-of-00052.safetensors", "model.layers.35.self_attn.v_proj.weight": "model-00023-of-00052.safetensors", "model.layers.35.self_attn.o_proj.weight": "model-00023-of-00052.safetensors", "model.layers.35.post_attention_layernorm.weight": "model-00023-of-00052.safetensors", "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00052.safetensors", "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00052.safetensors", "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00052.safetensors", "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00052.safetensors", "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00052.safetensors", "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00052.safetensors", "model.layers.36.input_layernorm.weight": "model-00024-of-00052.safetensors", "model.layers.36.self_attn.q_proj.weight": "model-00024-of-00052.safetensors", "model.layers.36.self_attn.k_proj.weight": "model-00024-of-00052.safetensors", "model.layers.36.self_attn.v_proj.weight": "model-00024-of-00052.safetensors", "model.layers.36.self_attn.o_proj.weight": "model-00024-of-00052.safetensors", "model.layers.36.post_attention_layernorm.weight": "model-00024-of-00052.safetensors", "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00052.safetensors", "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00052.safetensors", "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00052.safetensors", "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00052.safetensors", "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00052.safetensors", "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00052.safetensors", "model.layers.37.input_layernorm.weight": "model-00024-of-00052.safetensors", "model.layers.37.self_attn.q_proj.weight": "model-00025-of-00052.safetensors", "model.layers.37.self_attn.k_proj.weight": "model-00025-of-00052.safetensors", "model.layers.37.self_attn.v_proj.weight": "model-00025-of-00052.safetensors", "model.layers.37.self_attn.o_proj.weight": "model-00025-of-00052.safetensors", "model.layers.37.post_attention_layernorm.weight": "model-00025-of-00052.safetensors", "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00052.safetensors", "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00052.safetensors", "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00052.safetensors", "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00052.safetensors", "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00052.safetensors", "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00052.safetensors", "model.layers.38.input_layernorm.weight": "model-00025-of-00052.safetensors", "model.layers.38.self_attn.q_proj.weight": "model-00025-of-00052.safetensors", "model.layers.38.self_attn.k_proj.weight": "model-00025-of-00052.safetensors", "model.layers.38.self_attn.v_proj.weight": "model-00025-of-00052.safetensors", "model.layers.38.self_attn.o_proj.weight": "model-00025-of-00052.safetensors", "model.layers.38.post_attention_layernorm.weight": "model-00025-of-00052.safetensors", "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00052.safetensors", "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00052.safetensors", "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00052.safetensors", "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00052.safetensors", "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00052.safetensors", "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00052.safetensors", "model.layers.39.input_layernorm.weight": "model-00026-of-00052.safetensors", "model.layers.39.self_attn.q_proj.weight": "model-00026-of-00052.safetensors", "model.layers.39.self_attn.k_proj.weight": "model-00026-of-00052.safetensors", "model.layers.39.self_attn.v_proj.weight": "model-00026-of-00052.safetensors", "model.layers.39.self_attn.o_proj.weight": "model-00026-of-00052.safetensors", "model.layers.39.post_attention_layernorm.weight": "model-00026-of-00052.safetensors", "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00052.safetensors", "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00052.safetensors", "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00052.safetensors", "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00052.safetensors", "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00052.safetensors", "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00052.safetensors", "model.layers.40.input_layernorm.weight": "model-00026-of-00052.safetensors", "model.layers.40.self_attn.q_proj.weight": "model-00026-of-00052.safetensors", "model.layers.40.self_attn.k_proj.weight": "model-00026-of-00052.safetensors", "model.layers.40.self_attn.v_proj.weight": "model-00026-of-00052.safetensors", "model.layers.40.self_attn.o_proj.weight": "model-00026-of-00052.safetensors", "model.layers.40.post_attention_layernorm.weight": "model-00026-of-00052.safetensors", "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00052.safetensors", "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00052.safetensors", "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00052.safetensors", "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00052.safetensors", "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00052.safetensors", "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00052.safetensors", "model.layers.41.input_layernorm.weight": "model-00027-of-00052.safetensors", "model.layers.41.self_attn.q_proj.weight": "model-00027-of-00052.safetensors", "model.layers.41.self_attn.k_proj.weight": "model-00027-of-00052.safetensors", "model.layers.41.self_attn.v_proj.weight": "model-00027-of-00052.safetensors", "model.layers.41.self_attn.o_proj.weight": "model-00027-of-00052.safetensors", "model.layers.41.post_attention_layernorm.weight": "model-00027-of-00052.safetensors", "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00052.safetensors", "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00052.safetensors", "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00052.safetensors", "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00052.safetensors", "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00052.safetensors", "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00052.safetensors", "model.layers.42.input_layernorm.weight": "model-00028-of-00052.safetensors", "model.layers.42.self_attn.q_proj.weight": "model-00028-of-00052.safetensors", "model.layers.42.self_attn.k_proj.weight": "model-00028-of-00052.safetensors", "model.layers.42.self_attn.v_proj.weight": "model-00028-of-00052.safetensors", "model.layers.42.self_attn.o_proj.weight": "model-00028-of-00052.safetensors", "model.layers.42.post_attention_layernorm.weight": "model-00028-of-00052.safetensors", "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00052.safetensors", "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00052.safetensors", "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00052.safetensors", "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00052.safetensors", "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00052.safetensors", "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00052.safetensors", "model.layers.43.input_layernorm.weight": "model-00028-of-00052.safetensors", "model.layers.43.self_attn.q_proj.weight": "model-00028-of-00052.safetensors", "model.layers.43.self_attn.k_proj.weight": "model-00028-of-00052.safetensors", "model.layers.43.self_attn.v_proj.weight": "model-00028-of-00052.safetensors", "model.layers.43.self_attn.o_proj.weight": "model-00028-of-00052.safetensors", "model.layers.43.post_attention_layernorm.weight": "model-00028-of-00052.safetensors", "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00052.safetensors", "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00052.safetensors", "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00052.safetensors", "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00052.safetensors", "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00052.safetensors", "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00052.safetensors", "model.layers.44.input_layernorm.weight": "model-00029-of-00052.safetensors", "model.layers.44.self_attn.q_proj.weight": "model-00029-of-00052.safetensors", "model.layers.44.self_attn.k_proj.weight": "model-00029-of-00052.safetensors", "model.layers.44.self_attn.v_proj.weight": "model-00029-of-00052.safetensors", "model.layers.44.self_attn.o_proj.weight": "model-00029-of-00052.safetensors", "model.layers.44.post_attention_layernorm.weight": "model-00029-of-00052.safetensors", "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00052.safetensors", "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00052.safetensors", "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00052.safetensors", "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00052.safetensors", "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00052.safetensors", "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00052.safetensors", "model.layers.45.input_layernorm.weight": "model-00030-of-00052.safetensors", "model.layers.45.self_attn.q_proj.weight": "model-00030-of-00052.safetensors", "model.layers.45.self_attn.k_proj.weight": "model-00030-of-00052.safetensors", "model.layers.45.self_attn.v_proj.weight": "model-00030-of-00052.safetensors", "model.layers.45.self_attn.o_proj.weight": "model-00030-of-00052.safetensors", "model.layers.45.post_attention_layernorm.weight": "model-00030-of-00052.safetensors", "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00052.safetensors", "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00052.safetensors", "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00052.safetensors", "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00052.safetensors", "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00052.safetensors", "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00052.safetensors", "model.layers.46.input_layernorm.weight": "model-00030-of-00052.safetensors", "model.layers.46.self_attn.q_proj.weight": "model-00030-of-00052.safetensors", "model.layers.46.self_attn.k_proj.weight": "model-00030-of-00052.safetensors", "model.layers.46.self_attn.v_proj.weight": "model-00030-of-00052.safetensors", "model.layers.46.self_attn.o_proj.weight": "model-00030-of-00052.safetensors", "model.layers.46.post_attention_layernorm.weight": "model-00030-of-00052.safetensors", "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00052.safetensors", "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00052.safetensors", "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00031-of-00052.safetensors", "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00052.safetensors", "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00052.safetensors", "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00052.safetensors", "model.layers.47.input_layernorm.weight": "model-00031-of-00052.safetensors", "model.layers.47.self_attn.q_proj.weight": "model-00031-of-00052.safetensors", "model.layers.47.self_attn.k_proj.weight": "model-00031-of-00052.safetensors", "model.layers.47.self_attn.v_proj.weight": "model-00031-of-00052.safetensors", "model.layers.47.self_attn.o_proj.weight": "model-00031-of-00052.safetensors", "model.layers.47.post_attention_layernorm.weight": "model-00031-of-00052.safetensors", "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00052.safetensors", "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00052.safetensors", "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00031-of-00052.safetensors", "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00052.safetensors", "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00052.safetensors", "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00052.safetensors", "model.layers.48.input_layernorm.weight": "model-00031-of-00052.safetensors", "model.layers.48.self_attn.q_proj.weight": "model-00032-of-00052.safetensors", "model.layers.48.self_attn.k_proj.weight": "model-00032-of-00052.safetensors", "model.layers.48.self_attn.v_proj.weight": "model-00032-of-00052.safetensors", "model.layers.48.self_attn.o_proj.weight": "model-00032-of-00052.safetensors", "model.layers.48.post_attention_layernorm.weight": "model-00032-of-00052.safetensors", "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00052.safetensors", "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00052.safetensors", "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00052.safetensors", "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00052.safetensors", "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00052.safetensors", "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00052.safetensors", "model.layers.49.input_layernorm.weight": "model-00032-of-00052.safetensors", "model.layers.49.self_attn.q_proj.weight": "model-00032-of-00052.safetensors", "model.layers.49.self_attn.k_proj.weight": "model-00032-of-00052.safetensors", "model.layers.49.self_attn.v_proj.weight": "model-00032-of-00052.safetensors", "model.layers.49.self_attn.o_proj.weight": "model-00032-of-00052.safetensors", "model.layers.49.post_attention_layernorm.weight": "model-00032-of-00052.safetensors", "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00052.safetensors", "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00052.safetensors", "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00052.safetensors", "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00052.safetensors", "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00052.safetensors", "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00052.safetensors", "model.layers.50.input_layernorm.weight": "model-00033-of-00052.safetensors", "model.layers.50.self_attn.q_proj.weight": "model-00033-of-00052.safetensors", "model.layers.50.self_attn.k_proj.weight": "model-00033-of-00052.safetensors", "model.layers.50.self_attn.v_proj.weight": "model-00033-of-00052.safetensors", "model.layers.50.self_attn.o_proj.weight": "model-00033-of-00052.safetensors", "model.layers.50.post_attention_layernorm.weight": "model-00033-of-00052.safetensors", "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00052.safetensors", "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00052.safetensors", "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00052.safetensors", "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00052.safetensors", "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00052.safetensors", "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00052.safetensors", "model.layers.51.input_layernorm.weight": "model-00033-of-00052.safetensors", "model.layers.51.self_attn.q_proj.weight": "model-00033-of-00052.safetensors", "model.layers.51.self_attn.k_proj.weight": "model-00033-of-00052.safetensors", "model.layers.51.self_attn.v_proj.weight": "model-00033-of-00052.safetensors", "model.layers.51.self_attn.o_proj.weight": "model-00033-of-00052.safetensors", "model.layers.51.post_attention_layernorm.weight": "model-00033-of-00052.safetensors", "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00052.safetensors", "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00052.safetensors", "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00052.safetensors", "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00052.safetensors", "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00052.safetensors", "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00052.safetensors", "model.layers.52.input_layernorm.weight": "model-00034-of-00052.safetensors", "model.layers.52.self_attn.q_proj.weight": "model-00034-of-00052.safetensors", "model.layers.52.self_attn.k_proj.weight": "model-00034-of-00052.safetensors", "model.layers.52.self_attn.v_proj.weight": "model-00034-of-00052.safetensors", "model.layers.52.self_attn.o_proj.weight": "model-00034-of-00052.safetensors", "model.layers.52.post_attention_layernorm.weight": "model-00034-of-00052.safetensors", "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00052.safetensors", "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00052.safetensors", "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00052.safetensors", "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00052.safetensors", "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00052.safetensors", "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00052.safetensors", "model.layers.53.input_layernorm.weight": "model-00035-of-00052.safetensors", "model.layers.53.self_attn.q_proj.weight": "model-00035-of-00052.safetensors", "model.layers.53.self_attn.k_proj.weight": "model-00035-of-00052.safetensors", "model.layers.53.self_attn.v_proj.weight": "model-00035-of-00052.safetensors", "model.layers.53.self_attn.o_proj.weight": "model-00035-of-00052.safetensors", "model.layers.53.post_attention_layernorm.weight": "model-00035-of-00052.safetensors", "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00052.safetensors", "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00052.safetensors", "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00052.safetensors", "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00052.safetensors", "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00052.safetensors", "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00052.safetensors", "model.layers.54.input_layernorm.weight": "model-00035-of-00052.safetensors", "model.layers.54.self_attn.q_proj.weight": "model-00035-of-00052.safetensors", "model.layers.54.self_attn.k_proj.weight": "model-00035-of-00052.safetensors", "model.layers.54.self_attn.v_proj.weight": "model-00035-of-00052.safetensors", "model.layers.54.self_attn.o_proj.weight": "model-00035-of-00052.safetensors", "model.layers.54.post_attention_layernorm.weight": "model-00035-of-00052.safetensors", "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00052.safetensors", "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00052.safetensors", "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00052.safetensors", "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00052.safetensors", "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00052.safetensors", "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00052.safetensors", "model.layers.55.input_layernorm.weight": "model-00036-of-00052.safetensors", "model.layers.55.self_attn.q_proj.weight": "model-00036-of-00052.safetensors", "model.layers.55.self_attn.k_proj.weight": "model-00036-of-00052.safetensors", "model.layers.55.self_attn.v_proj.weight": "model-00036-of-00052.safetensors", "model.layers.55.self_attn.o_proj.weight": "model-00036-of-00052.safetensors", "model.layers.55.post_attention_layernorm.weight": "model-00036-of-00052.safetensors", "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00052.safetensors", "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00052.safetensors", "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00052.safetensors", "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00052.safetensors", "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00052.safetensors", "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00052.safetensors", "model.layers.56.input_layernorm.weight": "model-00037-of-00052.safetensors", "model.layers.56.self_attn.q_proj.weight": "model-00037-of-00052.safetensors", "model.layers.56.self_attn.k_proj.weight": "model-00037-of-00052.safetensors", "model.layers.56.self_attn.v_proj.weight": "model-00037-of-00052.safetensors", "model.layers.56.self_attn.o_proj.weight": "model-00037-of-00052.safetensors", "model.layers.56.post_attention_layernorm.weight": "model-00037-of-00052.safetensors", "model.layers.56.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00052.safetensors", "model.layers.56.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00052.safetensors", "model.layers.56.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00052.safetensors", "model.layers.56.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00052.safetensors", "model.layers.56.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00052.safetensors", "model.layers.56.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00052.safetensors", "model.layers.57.input_layernorm.weight": "model-00037-of-00052.safetensors", "model.layers.57.self_attn.q_proj.weight": "model-00037-of-00052.safetensors", "model.layers.57.self_attn.k_proj.weight": "model-00037-of-00052.safetensors", "model.layers.57.self_attn.v_proj.weight": "model-00037-of-00052.safetensors", "model.layers.57.self_attn.o_proj.weight": "model-00037-of-00052.safetensors", "model.layers.57.post_attention_layernorm.weight": "model-00037-of-00052.safetensors", "model.layers.57.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00052.safetensors", "model.layers.57.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00052.safetensors", "model.layers.57.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00052.safetensors", "model.layers.57.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00052.safetensors", "model.layers.57.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00052.safetensors", "model.layers.57.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00052.safetensors", "model.layers.58.input_layernorm.weight": "model-00038-of-00052.safetensors", "model.layers.58.self_attn.q_proj.weight": "model-00038-of-00052.safetensors", "model.layers.58.self_attn.k_proj.weight": "model-00038-of-00052.safetensors", "model.layers.58.self_attn.v_proj.weight": "model-00038-of-00052.safetensors", "model.layers.58.self_attn.o_proj.weight": "model-00038-of-00052.safetensors", "model.layers.58.post_attention_layernorm.weight": "model-00038-of-00052.safetensors", "model.layers.58.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00052.safetensors", "model.layers.58.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00052.safetensors", "model.layers.58.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00052.safetensors", "model.layers.58.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00052.safetensors", "model.layers.58.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00052.safetensors", "model.layers.58.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00052.safetensors", "model.layers.59.input_layernorm.weight": "model-00038-of-00052.safetensors", "model.layers.59.self_attn.q_proj.weight": "model-00039-of-00052.safetensors", "model.layers.59.self_attn.k_proj.weight": "model-00039-of-00052.safetensors", "model.layers.59.self_attn.v_proj.weight": "model-00039-of-00052.safetensors", "model.layers.59.self_attn.o_proj.weight": "model-00039-of-00052.safetensors", "model.layers.59.post_attention_layernorm.weight": "model-00039-of-00052.safetensors", "model.layers.59.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00052.safetensors", "model.layers.59.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00052.safetensors", "model.layers.59.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00052.safetensors", "model.layers.59.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00052.safetensors", "model.layers.59.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00052.safetensors", "model.layers.59.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00052.safetensors", "model.layers.60.input_layernorm.weight": "model-00039-of-00052.safetensors", "model.layers.60.self_attn.q_proj.weight": "model-00039-of-00052.safetensors", "model.layers.60.self_attn.k_proj.weight": "model-00039-of-00052.safetensors", "model.layers.60.self_attn.v_proj.weight": "model-00039-of-00052.safetensors", "model.layers.60.self_attn.o_proj.weight": "model-00039-of-00052.safetensors", "model.layers.60.post_attention_layernorm.weight": "model-00039-of-00052.safetensors", "model.layers.60.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00052.safetensors", "model.layers.60.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00052.safetensors", "model.layers.60.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00052.safetensors", "model.layers.60.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00052.safetensors", "model.layers.60.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00052.safetensors", "model.layers.60.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00052.safetensors", "model.layers.61.input_layernorm.weight": "model-00040-of-00052.safetensors", "model.layers.61.self_attn.q_proj.weight": "model-00040-of-00052.safetensors", "model.layers.61.self_attn.k_proj.weight": "model-00040-of-00052.safetensors", "model.layers.61.self_attn.v_proj.weight": "model-00040-of-00052.safetensors", "model.layers.61.self_attn.o_proj.weight": "model-00040-of-00052.safetensors", "model.layers.61.post_attention_layernorm.weight": "model-00040-of-00052.safetensors", "model.layers.61.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00052.safetensors", "model.layers.61.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00052.safetensors", "model.layers.61.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00052.safetensors", "model.layers.61.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00052.safetensors", "model.layers.61.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00052.safetensors", "model.layers.61.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00052.safetensors", "model.layers.62.input_layernorm.weight": "model-00040-of-00052.safetensors", "model.layers.62.self_attn.q_proj.weight": "model-00040-of-00052.safetensors", "model.layers.62.self_attn.k_proj.weight": "model-00040-of-00052.safetensors", "model.layers.62.self_attn.v_proj.weight": "model-00040-of-00052.safetensors", "model.layers.62.self_attn.o_proj.weight": "model-00040-of-00052.safetensors", "model.layers.62.post_attention_layernorm.weight": "model-00040-of-00052.safetensors", "model.layers.62.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00052.safetensors", "model.layers.62.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00052.safetensors", "model.layers.62.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00052.safetensors", "model.layers.62.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00052.safetensors", "model.layers.62.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00052.safetensors", "model.layers.62.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00052.safetensors", "model.layers.63.input_layernorm.weight": "model-00041-of-00052.safetensors", "model.layers.63.self_attn.q_proj.weight": "model-00041-of-00052.safetensors", "model.layers.63.self_attn.k_proj.weight": "model-00041-of-00052.safetensors", "model.layers.63.self_attn.v_proj.weight": "model-00041-of-00052.safetensors", "model.layers.63.self_attn.o_proj.weight": "model-00041-of-00052.safetensors", "model.layers.63.post_attention_layernorm.weight": "model-00041-of-00052.safetensors", "model.layers.63.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00052.safetensors", "model.layers.63.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00052.safetensors", "model.layers.63.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00052.safetensors", "model.layers.63.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00052.safetensors", "model.layers.63.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00052.safetensors", "model.layers.63.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00052.safetensors", "model.layers.64.input_layernorm.weight": "model-00042-of-00052.safetensors", "model.layers.64.self_attn.q_proj.weight": "model-00042-of-00052.safetensors", "model.layers.64.self_attn.k_proj.weight": "model-00042-of-00052.safetensors", "model.layers.64.self_attn.v_proj.weight": "model-00042-of-00052.safetensors", "model.layers.64.self_attn.o_proj.weight": "model-00042-of-00052.safetensors", "model.layers.64.post_attention_layernorm.weight": "model-00042-of-00052.safetensors", "model.layers.64.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00052.safetensors", "model.layers.64.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00052.safetensors", "model.layers.64.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00052.safetensors", "model.layers.64.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00052.safetensors", "model.layers.64.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00052.safetensors", "model.layers.64.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00052.safetensors", "model.layers.65.input_layernorm.weight": "model-00042-of-00052.safetensors", "model.layers.65.self_attn.q_proj.weight": "model-00042-of-00052.safetensors", "model.layers.65.self_attn.k_proj.weight": "model-00042-of-00052.safetensors", "model.layers.65.self_attn.v_proj.weight": "model-00042-of-00052.safetensors", "model.layers.65.self_attn.o_proj.weight": "model-00042-of-00052.safetensors", "model.layers.65.post_attention_layernorm.weight": "model-00042-of-00052.safetensors", "model.layers.65.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00052.safetensors", "model.layers.65.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00052.safetensors", "model.layers.65.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00052.safetensors", "model.layers.65.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00052.safetensors", "model.layers.65.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00052.safetensors", "model.layers.65.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00052.safetensors", "model.layers.66.input_layernorm.weight": "model-00043-of-00052.safetensors", "model.layers.66.self_attn.q_proj.weight": "model-00043-of-00052.safetensors", "model.layers.66.self_attn.k_proj.weight": "model-00043-of-00052.safetensors", "model.layers.66.self_attn.v_proj.weight": "model-00043-of-00052.safetensors", "model.layers.66.self_attn.o_proj.weight": "model-00043-of-00052.safetensors", "model.layers.66.post_attention_layernorm.weight": "model-00043-of-00052.safetensors", "model.layers.66.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00052.safetensors", "model.layers.66.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00052.safetensors", "model.layers.66.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00052.safetensors", "model.layers.66.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00052.safetensors", "model.layers.66.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00052.safetensors", "model.layers.66.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00052.safetensors", "model.layers.67.input_layernorm.weight": "model-00044-of-00052.safetensors", "model.layers.67.self_attn.q_proj.weight": "model-00044-of-00052.safetensors", "model.layers.67.self_attn.k_proj.weight": "model-00044-of-00052.safetensors", "model.layers.67.self_attn.v_proj.weight": "model-00044-of-00052.safetensors", "model.layers.67.self_attn.o_proj.weight": "model-00044-of-00052.safetensors", "model.layers.67.post_attention_layernorm.weight": "model-00044-of-00052.safetensors", "model.layers.67.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00052.safetensors", "model.layers.67.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00052.safetensors", "model.layers.67.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00052.safetensors", "model.layers.67.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00052.safetensors", "model.layers.67.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00052.safetensors", "model.layers.67.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00052.safetensors", "model.layers.68.input_layernorm.weight": "model-00044-of-00052.safetensors", "model.layers.68.self_attn.q_proj.weight": "model-00044-of-00052.safetensors", "model.layers.68.self_attn.k_proj.weight": "model-00044-of-00052.safetensors", "model.layers.68.self_attn.v_proj.weight": "model-00044-of-00052.safetensors", "model.layers.68.self_attn.o_proj.weight": "model-00044-of-00052.safetensors", "model.layers.68.post_attention_layernorm.weight": "model-00044-of-00052.safetensors", "model.layers.68.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00052.safetensors", "model.layers.68.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00052.safetensors", "model.layers.68.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00052.safetensors", "model.layers.68.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00052.safetensors", "model.layers.68.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00052.safetensors", "model.layers.68.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00052.safetensors", "model.layers.69.input_layernorm.weight": "model-00045-of-00052.safetensors", "model.layers.69.self_attn.q_proj.weight": "model-00045-of-00052.safetensors", "model.layers.69.self_attn.k_proj.weight": "model-00045-of-00052.safetensors", "model.layers.69.self_attn.v_proj.weight": "model-00045-of-00052.safetensors", "model.layers.69.self_attn.o_proj.weight": "model-00045-of-00052.safetensors", "model.layers.69.post_attention_layernorm.weight": "model-00045-of-00052.safetensors", "model.layers.69.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00052.safetensors", "model.layers.69.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00052.safetensors", "model.layers.69.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00052.safetensors", "model.layers.69.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00052.safetensors", "model.layers.69.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00052.safetensors", "model.layers.69.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00052.safetensors", "model.layers.70.input_layernorm.weight": "model-00045-of-00052.safetensors", "model.layers.70.self_attn.q_proj.weight": "model-00046-of-00052.safetensors", "model.layers.70.self_attn.k_proj.weight": "model-00046-of-00052.safetensors", "model.layers.70.self_attn.v_proj.weight": "model-00046-of-00052.safetensors", "model.layers.70.self_attn.o_proj.weight": "model-00046-of-00052.safetensors", "model.layers.70.post_attention_layernorm.weight": "model-00046-of-00052.safetensors", "model.layers.70.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00052.safetensors", "model.layers.70.block_sparse_moe.experts.1.w3.weight": "model-00046-of-00052.safetensors", "model.layers.70.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00052.safetensors", "model.layers.70.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00052.safetensors", "model.layers.70.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00052.safetensors", "model.layers.70.block_sparse_moe.experts.1.w2.weight": "model-00046-of-00052.safetensors", "model.layers.71.input_layernorm.weight": "model-00046-of-00052.safetensors", "model.layers.71.self_attn.q_proj.weight": "model-00046-of-00052.safetensors", "model.layers.71.self_attn.k_proj.weight": "model-00046-of-00052.safetensors", "model.layers.71.self_attn.v_proj.weight": "model-00046-of-00052.safetensors", "model.layers.71.self_attn.o_proj.weight": "model-00046-of-00052.safetensors", "model.layers.71.post_attention_layernorm.weight": "model-00046-of-00052.safetensors", "model.layers.71.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00052.safetensors", "model.layers.71.block_sparse_moe.experts.1.w3.weight": "model-00046-of-00052.safetensors", "model.layers.71.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00052.safetensors", "model.layers.71.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00052.safetensors", "model.layers.71.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00052.safetensors", "model.layers.71.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00052.safetensors", "model.layers.72.input_layernorm.weight": "model-00047-of-00052.safetensors", "model.layers.72.self_attn.q_proj.weight": "model-00047-of-00052.safetensors", "model.layers.72.self_attn.k_proj.weight": "model-00047-of-00052.safetensors", "model.layers.72.self_attn.v_proj.weight": "model-00047-of-00052.safetensors", "model.layers.72.self_attn.o_proj.weight": "model-00047-of-00052.safetensors", "model.layers.72.post_attention_layernorm.weight": "model-00047-of-00052.safetensors", "model.layers.72.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00052.safetensors", "model.layers.72.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00052.safetensors", "model.layers.72.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00052.safetensors", "model.layers.72.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00052.safetensors", "model.layers.72.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00052.safetensors", "model.layers.72.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00052.safetensors", "model.layers.73.input_layernorm.weight": "model-00047-of-00052.safetensors", "model.layers.73.self_attn.q_proj.weight": "model-00047-of-00052.safetensors", "model.layers.73.self_attn.k_proj.weight": "model-00047-of-00052.safetensors", "model.layers.73.self_attn.v_proj.weight": "model-00047-of-00052.safetensors", "model.layers.73.self_attn.o_proj.weight": "model-00047-of-00052.safetensors", "model.layers.73.post_attention_layernorm.weight": "model-00047-of-00052.safetensors", "model.layers.73.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00052.safetensors", "model.layers.73.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00052.safetensors", "model.layers.73.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00052.safetensors", "model.layers.73.block_sparse_moe.experts.1.w1.weight": "model-00048-of-00052.safetensors", "model.layers.73.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00052.safetensors", "model.layers.73.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00052.safetensors", "model.layers.74.input_layernorm.weight": "model-00048-of-00052.safetensors", "model.layers.74.self_attn.q_proj.weight": "model-00048-of-00052.safetensors", "model.layers.74.self_attn.k_proj.weight": "model-00048-of-00052.safetensors", "model.layers.74.self_attn.v_proj.weight": "model-00048-of-00052.safetensors", "model.layers.74.self_attn.o_proj.weight": "model-00048-of-00052.safetensors", "model.layers.74.post_attention_layernorm.weight": "model-00048-of-00052.safetensors", "model.layers.74.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00052.safetensors", "model.layers.74.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00052.safetensors", "model.layers.74.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00052.safetensors", "model.layers.74.block_sparse_moe.experts.1.w1.weight": "model-00048-of-00052.safetensors", "model.layers.74.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00052.safetensors", "model.layers.74.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00052.safetensors", "model.layers.75.input_layernorm.weight": "model-00049-of-00052.safetensors", "model.layers.75.self_attn.q_proj.weight": "model-00049-of-00052.safetensors", "model.layers.75.self_attn.k_proj.weight": "model-00049-of-00052.safetensors", "model.layers.75.self_attn.v_proj.weight": "model-00049-of-00052.safetensors", "model.layers.75.self_attn.o_proj.weight": "model-00049-of-00052.safetensors", "model.layers.75.post_attention_layernorm.weight": "model-00049-of-00052.safetensors", "model.layers.75.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00052.safetensors", "model.layers.75.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00052.safetensors", "model.layers.75.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00052.safetensors", "model.layers.75.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00052.safetensors", "model.layers.75.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00052.safetensors", "model.layers.75.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00052.safetensors", "model.layers.76.input_layernorm.weight": "model-00049-of-00052.safetensors", "model.layers.76.self_attn.q_proj.weight": "model-00049-of-00052.safetensors", "model.layers.76.self_attn.k_proj.weight": "model-00049-of-00052.safetensors", "model.layers.76.self_attn.v_proj.weight": "model-00049-of-00052.safetensors", "model.layers.76.self_attn.o_proj.weight": "model-00049-of-00052.safetensors", "model.layers.76.post_attention_layernorm.weight": "model-00049-of-00052.safetensors", "model.layers.76.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00052.safetensors", "model.layers.76.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00052.safetensors", "model.layers.76.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00052.safetensors", "model.layers.76.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00052.safetensors", "model.layers.76.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00052.safetensors", "model.layers.76.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00052.safetensors", "model.layers.77.input_layernorm.weight": "model-00050-of-00052.safetensors", "model.layers.77.self_attn.q_proj.weight": "model-00050-of-00052.safetensors", "model.layers.77.self_attn.k_proj.weight": "model-00050-of-00052.safetensors", "model.layers.77.self_attn.v_proj.weight": "model-00050-of-00052.safetensors", "model.layers.77.self_attn.o_proj.weight": "model-00050-of-00052.safetensors", "model.layers.77.post_attention_layernorm.weight": "model-00050-of-00052.safetensors", "model.layers.77.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00052.safetensors", "model.layers.77.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00052.safetensors", "model.layers.77.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00052.safetensors", "model.layers.77.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00052.safetensors", "model.layers.77.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00052.safetensors", "model.layers.77.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00052.safetensors", "model.layers.78.input_layernorm.weight": "model-00051-of-00052.safetensors", "model.layers.78.self_attn.q_proj.weight": "model-00051-of-00052.safetensors", "model.layers.78.self_attn.k_proj.weight": "model-00051-of-00052.safetensors", "model.layers.78.self_attn.v_proj.weight": "model-00051-of-00052.safetensors", "model.layers.78.self_attn.o_proj.weight": "model-00051-of-00052.safetensors", "model.layers.78.post_attention_layernorm.weight": "model-00051-of-00052.safetensors", "model.layers.78.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00052.safetensors", "model.layers.78.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00052.safetensors", "model.layers.78.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00052.safetensors", "model.layers.78.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00052.safetensors", "model.layers.78.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00052.safetensors", "model.layers.78.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00052.safetensors", "model.layers.79.input_layernorm.weight": "model-00051-of-00052.safetensors", "model.layers.79.self_attn.q_proj.weight": "model-00051-of-00052.safetensors", "model.layers.79.self_attn.k_proj.weight": "model-00051-of-00052.safetensors", "model.layers.79.self_attn.v_proj.weight": "model-00051-of-00052.safetensors", "model.layers.79.self_attn.o_proj.weight": "model-00051-of-00052.safetensors", "model.layers.79.post_attention_layernorm.weight": "model-00051-of-00052.safetensors", "model.layers.79.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00052.safetensors", "model.layers.79.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00052.safetensors", "model.layers.79.block_sparse_moe.experts.0.w1.weight": "model-00052-of-00052.safetensors", "model.layers.79.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00052.safetensors", "model.layers.79.block_sparse_moe.experts.0.w2.weight": "model-00052-of-00052.safetensors", "model.layers.79.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00052.safetensors", "model.norm.weight": "model-00052-of-00052.safetensors", "lm_head.weight": "model-00052-of-00052.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.6.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.7.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.8.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.9.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.10.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.11.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.12.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.13.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.14.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.15.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.16.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.17.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.18.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.19.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.20.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.21.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.22.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.23.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.24.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.25.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.26.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.27.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.28.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.29.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.30.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.31.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.32.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.33.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.34.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.35.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.36.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.37.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.38.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.39.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.40.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.41.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.42.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.43.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.44.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.45.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.46.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.47.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.48.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.49.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.50.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.51.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.52.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.53.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.54.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.55.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.56.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.57.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.58.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.59.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.60.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.61.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.62.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.63.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.64.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.65.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.66.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.67.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.68.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.69.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.70.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.71.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.72.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.73.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.74.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.75.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.76.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.77.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.78.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors", "model.layers.79.block_sparse_moe.gate.weight": "model-00052-of-00052.safetensors"}} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea7b34dbc9ac87cfac536219a0b8f7a380d1bbb --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|begin_of_text|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4f2cb4fdc65fd97c6fac149432c8347f2dcfc3e --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|begin_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +}