diff --git a/config.json b/config.json deleted file mode 100644 index 45fc64df6fa94a1b472c47602a76066aee7df64d..0000000000000000000000000000000000000000 --- a/config.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "architectures": [ - "GptOssForCausalLM" - ], - "attention_bias": true, - "attention_dropout": 0.0, - "eos_token_id": 200002, - "experts_per_token": 4, - "head_dim": 64, - "hidden_act": "silu", - "hidden_size": 2880, - "initial_context_length": 4096, - "initializer_range": 0.02, - "intermediate_size": 2880, - "layer_types": [ - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention", - "sliding_attention", - "full_attention" - ], - "max_position_embeddings": 131072, - "model_type": "gpt_oss", - "num_attention_heads": 64, - "num_experts_per_tok": 4, - "num_hidden_layers": 36, - "num_key_value_heads": 8, - "num_local_experts": 128, - "output_router_logits": false, - "pad_token_id": 199999, - "rms_norm_eps": 1e-05, - "rope_scaling": { - "beta_fast": 32.0, - "beta_slow": 1.0, - "factor": 32.0, - "original_max_position_embeddings": 4096, - "rope_type": "yarn", - "truncate": false - }, - "rope_theta": 150000, - "router_aux_loss_coef": 0.9, - "sliding_window": 128, - "swiglu_limit": 7.0, - "tie_word_embeddings": false, - "transformers_version": "4.55.0.dev0", - "use_cache": true, - "vocab_size": 201088, - "torch_dtype": "bfloat16" -} \ No newline at end of file diff --git a/model-00000-of-00014.safetensors b/model-00000-of-00014.safetensors deleted file mode 100644 index 8615e64cd507449d0ff278269f6f30f6d7046047..0000000000000000000000000000000000000000 --- a/model-00000-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:695218884684c611fe08a74751ee443f971e9bd9bc062edba822da3fe45969b7 -size 4625017896 diff --git a/model-00001-of-00014.safetensors b/model-00001-of-00014.safetensors deleted file mode 100644 index f446c6f97f7d27a0efbfc027362a808b24a9e2c5..0000000000000000000000000000000000000000 --- a/model-00001-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a881aa5f561b26a22b14a8262aa61849ace349ffd73d74769e030ac90a1fcf8a -size 4115586736 diff --git a/model-00001-of-00073.safetensors b/model-00001-of-00073.safetensors deleted file mode 100644 index 9e95ce0892cda3b58c0a218e10f62e4e9ed6bf3b..0000000000000000000000000000000000000000 --- a/model-00001-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3914b9e47635ff09120b12a8d1b9066ca9b3680100ca20f44647f9a4c18f0c74 -size 1212106032 diff --git a/model-00002-of-00014.safetensors b/model-00002-of-00014.safetensors deleted file mode 100644 index 5cdecd0ac50e5a996b93321274c91789dd91b3cb..0000000000000000000000000000000000000000 --- a/model-00002-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:022478dd04398c5bdb545a5be0a6437ecc2eb53d1dbd29edafcfff4b3ddf0a41 -size 4625017888 diff --git a/model-00002-of-00073.safetensors b/model-00002-of-00073.safetensors deleted file mode 100644 index 6ca79580f7ca04e9f4cb6c384d7c2ed5ae0b3126..0000000000000000000000000000000000000000 --- a/model-00002-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:330a7853eafe265073156dbc3478fdf4c91be23e7927ac82fb66f041ea2caecc -size 4248207608 diff --git a/model-00003-of-00014.safetensors b/model-00003-of-00014.safetensors deleted file mode 100644 index b6c1b637b64b5e1a4e84895543a2a5e9cc4567b7..0000000000000000000000000000000000000000 --- a/model-00003-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:47aee9e7b9d5bedb215042c01ccededd9bd9c30b0dddea862dc2506b9d6c74de -size 4115586752 diff --git a/model-00003-of-00073.safetensors b/model-00003-of-00073.safetensors deleted file mode 100644 index d3dbff71dc6d77387343622acfe419edc568e973..0000000000000000000000000000000000000000 --- a/model-00003-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ccad652f94cd74f6d5571ad1cf9a4a89132e0acdb3a372939d11a919c30203e6 -size 2177954704 diff --git a/model-00004-of-00014.safetensors b/model-00004-of-00014.safetensors deleted file mode 100644 index afc330d317da75954840a29baede067bbbbc20f4..0000000000000000000000000000000000000000 --- a/model-00004-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f6c2752acda607b1d5ca52df9e75c1b9b2761e6875ff10c9bd6ddac473c0262e -size 4625017896 diff --git a/model-00004-of-00073.safetensors b/model-00004-of-00073.safetensors deleted file mode 100644 index 2d82271f8cbe9a4a80fe44cebed2cd7adfbcf347..0000000000000000000000000000000000000000 --- a/model-00004-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:785b41a8fd53adbd0ca6448081a0e178eb1755fea67f5712e79aaa8aac326b7e -size 4248207608 diff --git a/model-00005-of-00014.safetensors b/model-00005-of-00014.safetensors deleted file mode 100644 index f798bf13d63288c04d9ae46104d2b161baa4d298..0000000000000000000000000000000000000000 --- a/model-00005-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c8dd401544c31cb93b8459eee7da20ea2a07626a59455d7d92b85257df9b46c -size 4115586696 diff --git a/model-00005-of-00073.safetensors b/model-00005-of-00073.safetensors deleted file mode 100644 index a409789dd3c3733165fad6bafce3349f4a832ef7..0000000000000000000000000000000000000000 --- a/model-00005-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c9c59e5882005748564a57d002cee9aa9bc0d7f47970cfeb2eedb859b5ad91f7 -size 2177954704 diff --git a/model-00006-of-00014.safetensors b/model-00006-of-00014.safetensors deleted file mode 100644 index 3dbd730581a2514b111231c9c09df5c2aac375c6..0000000000000000000000000000000000000000 --- a/model-00006-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:28d839f2e027985a8b14e45f2323798862eddb7770ee9800ea6b7c803abee489 -size 4625017856 diff --git a/model-00006-of-00073.safetensors b/model-00006-of-00073.safetensors deleted file mode 100644 index 50402fd0b56886a62cc5d5e905cf066f1bc43a7f..0000000000000000000000000000000000000000 --- a/model-00006-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4afeef53609f547bbe4dfd1ba068671c302f056c41c5258c2f08e5718024fcd -size 4248207608 diff --git a/model-00007-of-00014.safetensors b/model-00007-of-00014.safetensors deleted file mode 100644 index b112bc3b69d95159bfad56dbcc1c152ce3482b10..0000000000000000000000000000000000000000 --- a/model-00007-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8958c5f183c04f6ea959cfd90562b5128124154b2bbf979b8a22b9405b30ed8 -size 4060267176 diff --git a/model-00007-of-00073.safetensors b/model-00007-of-00073.safetensors deleted file mode 100644 index 68b085a9d203bf1461057d72531dfbc2737690fd..0000000000000000000000000000000000000000 --- a/model-00007-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d02dba1d5cf2ca878f46a11d51ee6cacb04cb911e0e58f9379469bdd84039302 -size 2177954704 diff --git a/model-00008-of-00014.safetensors b/model-00008-of-00014.safetensors deleted file mode 100644 index 265749f8e92661a91a55affc3ce6f2b6a3a56a99..0000000000000000000000000000000000000000 --- a/model-00008-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bf1f2a88868ffc37d520dcf77d26f0e823710b5e682d473ff10f6974fa3b7517 -size 4625017896 diff --git a/model-00008-of-00073.safetensors b/model-00008-of-00073.safetensors deleted file mode 100644 index 866af38dbbbb9ae5d3cad4d23c49e47580bc0510..0000000000000000000000000000000000000000 --- a/model-00008-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c6cb2f3ffff5bb15388d6b87cabffa8a08c2f6748b4186509c8859264fe6bae8 -size 4248207608 diff --git a/model-00009-of-00014.safetensors b/model-00009-of-00014.safetensors deleted file mode 100644 index 51713b9ccebf39dc86d49fa4f8fcf54eedaf8464..0000000000000000000000000000000000000000 --- a/model-00009-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f72d34a4004241b45c332b61f8ffa124e9a913bc1ab442b66e717d3e94e741ce -size 4170906304 diff --git a/model-00009-of-00073.safetensors b/model-00009-of-00073.safetensors deleted file mode 100644 index b73de47aaef29992caf8bbacb347203f7f7b271c..0000000000000000000000000000000000000000 --- a/model-00009-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:12d8ea1c16fb77b608258658638aed138252fd7c8f61e2adb9d2d430d0daf925 -size 2177954704 diff --git a/model-00010-of-00014.safetensors b/model-00010-of-00014.safetensors deleted file mode 100644 index b990a8080b2660b011d2e505556163b90c80dd33..0000000000000000000000000000000000000000 --- a/model-00010-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f48c867c2cb0a44bfc2f8768cb98e4aec9a350946fceacfebdcad5d32ad4a471 -size 4625017896 diff --git a/model-00010-of-00073.safetensors b/model-00010-of-00073.safetensors deleted file mode 100644 index aa511d117936c495d41952c1ba1676ba66a15867..0000000000000000000000000000000000000000 --- a/model-00010-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d780df3aed5ab8d3e0ac2679f0e9e9c4ddec6c28edf521eebaddfb54a4c01c6e -size 4248207608 diff --git a/model-00011-of-00014.safetensors b/model-00011-of-00014.safetensors deleted file mode 100644 index d81ede9c1c10d560516d500500977997ce238a43..0000000000000000000000000000000000000000 --- a/model-00011-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a06851b2cfd35f48722f823bc1ab8f7bcb4a878a5b8e975f4d3544f230454eeb -size 4115586752 diff --git a/model-00011-of-00073.safetensors b/model-00011-of-00073.safetensors deleted file mode 100644 index 309b65317389a003ab200403bfccb60896e4153a..0000000000000000000000000000000000000000 --- a/model-00011-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d8e26a9cde5ade1d139621c8c4965bf5609a05a86690e352c0acd991f15bf39d -size 2177954704 diff --git a/model-00012-of-00014.safetensors b/model-00012-of-00014.safetensors deleted file mode 100644 index 9263e23d9dbe806b366ff16ae6526d78a74b4b73..0000000000000000000000000000000000000000 --- a/model-00012-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3af33667c307e20ae2a7648ea52653de46dd0171601ec5c696e47a2f5d5bf1e4 -size 4064660808 diff --git a/model-00012-of-00073.safetensors b/model-00012-of-00073.safetensors deleted file mode 100644 index 23e3ee21df45c89477b20d17acd3860fc650daa3..0000000000000000000000000000000000000000 --- a/model-00012-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:839223b330ca91a30f2af17520db90eae73618d2511ca684e9c5627f14dafb5c -size 4248207608 diff --git a/model-00013-of-00014.safetensors b/model-00013-of-00014.safetensors deleted file mode 100644 index fbe9a99425691ebb42a4d519afb445585a9227b6..0000000000000000000000000000000000000000 --- a/model-00013-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c52f4cbe342e2cb0f1497ce059000973218b35c1e56816dd8683fe485bcdac82 -size 1902116864 diff --git a/model-00013-of-00073.safetensors b/model-00013-of-00073.safetensors deleted file mode 100644 index bfd38edeba7a128be23617a972ad6485292b5157..0000000000000000000000000000000000000000 --- a/model-00013-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c06b561df31023d8ab6ee29b6ae722cecbbeffe1690f28a9373f5dc9955c1bbe -size 2177954704 diff --git a/model-00014-of-00014.safetensors b/model-00014-of-00014.safetensors deleted file mode 100644 index 0cc0d7a6c234cd77a726881f98ddb0615ae95caa..0000000000000000000000000000000000000000 --- a/model-00014-of-00014.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:54b1be1609696c307cc5ca117b1fa54feaddebffa04e9c2db117652a01964230 -size 4115586736 diff --git a/model-00014-of-00073.safetensors b/model-00014-of-00073.safetensors deleted file mode 100644 index cf7b64a455fa205f64fcbe25bde782a443f75018..0000000000000000000000000000000000000000 --- a/model-00014-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:071b6de5fe46555142d2ae18536fc5eadd1237c8ba3576d3fcb4ee2a488aa081 -size 4248207608 diff --git a/model-00015-of-00073.safetensors b/model-00015-of-00073.safetensors deleted file mode 100644 index f40b535532fe91777ea42b4c8967304b7bbd76cf..0000000000000000000000000000000000000000 --- a/model-00015-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f3f5c51935e3418191641055ebdacb97b7d0b368ac06b4852e40707ea59c2637 -size 2177954704 diff --git a/model-00016-of-00073.safetensors b/model-00016-of-00073.safetensors deleted file mode 100644 index 979dc3dab1afdb3648fd4277cf6cfc58ac414916..0000000000000000000000000000000000000000 --- a/model-00016-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0416580a4650d6f6308a34a26e3bbae381abfcec97e0e0da30c79255be586f3 -size 4248207608 diff --git a/model-00017-of-00073.safetensors b/model-00017-of-00073.safetensors deleted file mode 100644 index fb74a263c14c92142a59e0d4853652cf3baa0984..0000000000000000000000000000000000000000 --- a/model-00017-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ddcd373d64204ce8acea5d464df161a6b97c5762ef418db7a6a33b6d190688e0 -size 2177954704 diff --git a/model-00018-of-00073.safetensors b/model-00018-of-00073.safetensors deleted file mode 100644 index f7980894dd4c5bebab5f93bb5066b2bb01645662..0000000000000000000000000000000000000000 --- a/model-00018-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87c76cc955ef820987d4a1e038d9c0829a5feec2b09168be23869176f6d2c599 -size 4248207608 diff --git a/model-00019-of-00073.safetensors b/model-00019-of-00073.safetensors deleted file mode 100644 index b77b2de4cb8fb7eb1b3461c1cdbd5e071f5a7b63..0000000000000000000000000000000000000000 --- a/model-00019-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09f12ba29a3e7827f700d5be3cf1250913babc9c8b4579c655cda949d7ec2302 -size 2177954704 diff --git a/model-00020-of-00073.safetensors b/model-00020-of-00073.safetensors deleted file mode 100644 index b02b13acc029bd6b219e9153b158cea7ec94d8f7..0000000000000000000000000000000000000000 --- a/model-00020-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc462f6dd2e3ca5b53f048469872bef103ca7421cb1cd99a36540b75dfa10529 -size 4248207608 diff --git a/model-00021-of-00073.safetensors b/model-00021-of-00073.safetensors deleted file mode 100644 index 78d4e63e3cb5c0c5aab5fbae26ff13aa1b369dc0..0000000000000000000000000000000000000000 --- a/model-00021-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f9e7f7ed117a3cfa773647e22e50b18af95d6afdfd3b27ed342bbb18a95b85ef -size 2177954656 diff --git a/model-00022-of-00073.safetensors b/model-00022-of-00073.safetensors deleted file mode 100644 index 62edf9e0380273e01b4a04966b23622b953c6471..0000000000000000000000000000000000000000 --- a/model-00022-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:96dbeda00ec25ed81926014a97fc3fe4fc9b29251b211a5ca02adbd7669e2f2d -size 4248207608 diff --git a/model-00023-of-00073.safetensors b/model-00023-of-00073.safetensors deleted file mode 100644 index b66c163be20994e8c3b424ddd74735bb03c63d99..0000000000000000000000000000000000000000 --- a/model-00023-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69700f9a3a690350be808f3aae3993b294d3e504e3f09e445960ced149bb6cf7 -size 2177954720 diff --git a/model-00024-of-00073.safetensors b/model-00024-of-00073.safetensors deleted file mode 100644 index b11aeda13887a2abe88c892d2d8a8d9a1e97f94f..0000000000000000000000000000000000000000 --- a/model-00024-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5c7c5cc68dd9bd22739bfc1241905466c9f94223cdd37c75df56e2b52fbb1433 -size 4248207608 diff --git a/model-00025-of-00073.safetensors b/model-00025-of-00073.safetensors deleted file mode 100644 index 748fd0ec4d2b7b05f55a5c454d58d4cb80fff164..0000000000000000000000000000000000000000 --- a/model-00025-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3e169503316f450cba5ddb647672f49bf664b08ea630532647fa2074b2a4fd8b -size 2177954720 diff --git a/model-00026-of-00073.safetensors b/model-00026-of-00073.safetensors deleted file mode 100644 index c3d0c87cf9ab0b4ddca04f72a8182d0f97accb7a..0000000000000000000000000000000000000000 --- a/model-00026-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fab908aeec305e091da1378a7fdbd4a36cb133346639e3ae2de646885e775a92 -size 4248207608 diff --git a/model-00027-of-00073.safetensors b/model-00027-of-00073.safetensors deleted file mode 100644 index 254427f4b8dda18098afe51034e9e8a70dcd6900..0000000000000000000000000000000000000000 --- a/model-00027-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ddf5ed49b9d8175d5df4378d07046ae55e2918a80828c33beed47c6b24537ab6 -size 2177954720 diff --git a/model-00028-of-00073.safetensors b/model-00028-of-00073.safetensors deleted file mode 100644 index 5e2e36f830f252625e3c4535b18f0fdf94d5dca1..0000000000000000000000000000000000000000 --- a/model-00028-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b38ae58be6b3cd695a31ed996a51e4b025fc18b0b3363d26a3035e4c9107f292 -size 4248207608 diff --git a/model-00029-of-00073.safetensors b/model-00029-of-00073.safetensors deleted file mode 100644 index 982c2a6f83d39cd451dd0d03d6b349ca13e3e24b..0000000000000000000000000000000000000000 --- a/model-00029-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:182d1533641ca2171285f062211ad318b69632ea9e5c8e74d4aa1bbe6774a345 -size 2177954720 diff --git a/model-00030-of-00073.safetensors b/model-00030-of-00073.safetensors deleted file mode 100644 index 619ecd2c3e5815835d5e9244eb705cfecc67a282..0000000000000000000000000000000000000000 --- a/model-00030-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:801e9bba14f900af03acac078d9e2f25f56ba17a3df0cf58f497bd9dc717a3a5 -size 4248207608 diff --git a/model-00031-of-00073.safetensors b/model-00031-of-00073.safetensors deleted file mode 100644 index 60a9081e2ed2e63aef327b41ea22f1ffac4b1065..0000000000000000000000000000000000000000 --- a/model-00031-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:77253965d6dab34b78ac8409c1443d402caa9a0913bb0f461eb6f242a44c1de6 -size 2177954720 diff --git a/model-00032-of-00073.safetensors b/model-00032-of-00073.safetensors deleted file mode 100644 index e088d1cf70d6ccc8d4ec2e53070d89bbe8f06d3d..0000000000000000000000000000000000000000 --- a/model-00032-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd4b467d698348f4167d8ce1f146571ce3a115bb34e72beb2513adde7e3c7013 -size 4248207608 diff --git a/model-00033-of-00073.safetensors b/model-00033-of-00073.safetensors deleted file mode 100644 index ece0fb9042209765d9d0857515a16ecc26b2dbe6..0000000000000000000000000000000000000000 --- a/model-00033-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:57a360500976d8d3f8397f954227dcd1ca00a892e1037a503cf2c71e888bd868 -size 2177954720 diff --git a/model-00034-of-00073.safetensors b/model-00034-of-00073.safetensors deleted file mode 100644 index b3473218ad725e2ead72901feaadbd8ca672e84d..0000000000000000000000000000000000000000 --- a/model-00034-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d4500db25d1e0cd4c8af3b64d35c282c791e7a00fd20a806ca9eb08d0d5af4b -size 4248207608 diff --git a/model-00035-of-00073.safetensors b/model-00035-of-00073.safetensors deleted file mode 100644 index 5037c15310216c2060d299b5736ad54be6d6293d..0000000000000000000000000000000000000000 --- a/model-00035-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b01ea9de9f48549c8b939def2d6aa7782e937f97b2dc63316b8d8fe62bc7fbf0 -size 2177954720 diff --git a/model-00036-of-00073.safetensors b/model-00036-of-00073.safetensors deleted file mode 100644 index 8c77cdef89394d095f4865afd08184115d82779e..0000000000000000000000000000000000000000 --- a/model-00036-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c01cd3c0bc5ff06a9a3f8b83eb7a31ca317e4b472d6502c8c2c95c751ad9071f -size 4248207608 diff --git a/model-00037-of-00073.safetensors b/model-00037-of-00073.safetensors deleted file mode 100644 index d8432b77ddd8da01f3e2fe58e716c6de61429c47..0000000000000000000000000000000000000000 --- a/model-00037-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:130920255d797fd142ff55d48967a6ed957b4f0558ceca87859ec38ca92664d7 -size 2177954720 diff --git a/model-00038-of-00073.safetensors b/model-00038-of-00073.safetensors deleted file mode 100644 index 633d9fe345c5542e2f2de6421298ff34abc2d748..0000000000000000000000000000000000000000 --- a/model-00038-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:706c5294efbb99d10148563a3a306d7db8a3c9db483e148ee5f6c6bb1f012b0c -size 4248207608 diff --git a/model-00039-of-00073.safetensors b/model-00039-of-00073.safetensors deleted file mode 100644 index cd3410544a998d57b29d39c4e352fa71a175029f..0000000000000000000000000000000000000000 --- a/model-00039-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4634048ccaf93f65aba56ab70564556924d4a389d9cbf142f54e61c4fdff9455 -size 2177954720 diff --git a/model-00040-of-00073.safetensors b/model-00040-of-00073.safetensors deleted file mode 100644 index 59b46802ae309ada93f5e5a2a79900df073f55c6..0000000000000000000000000000000000000000 --- a/model-00040-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:03a4451636a6c153fb133937597a001b68faaf30270207a401f0800815f3e32b -size 4248207608 diff --git a/model-00041-of-00073.safetensors b/model-00041-of-00073.safetensors deleted file mode 100644 index 5b7ae4fd5603f26370f933cb535c3b68f0a24f11..0000000000000000000000000000000000000000 --- a/model-00041-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2cf83320dc211af80cef0fcdb3e0d94db898a3acdc1054690c976bee57fb5956 -size 2177954720 diff --git a/model-00042-of-00073.safetensors b/model-00042-of-00073.safetensors deleted file mode 100644 index 38ddf5bacfc6b3276123c88538f5f65d605abb68..0000000000000000000000000000000000000000 --- a/model-00042-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b81f70422b73125f8a04e507fcb662e5bcad40c79babf80dd2a01e6ab615838 -size 4248207608 diff --git a/model-00043-of-00073.safetensors b/model-00043-of-00073.safetensors deleted file mode 100644 index 690b90dc2b519106fda47d7631601025d6217da8..0000000000000000000000000000000000000000 --- a/model-00043-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35651e3087546262ec16d14bc920ea2240d2a227702563b0db52b50ac9d5437f -size 2177954720 diff --git a/model-00044-of-00073.safetensors b/model-00044-of-00073.safetensors deleted file mode 100644 index e8bf115ae1b04ea22b203f131982067568bf5454..0000000000000000000000000000000000000000 --- a/model-00044-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99dd26c8737824484c95c12a71bfe9d877ccfee7030d6f7215f6aa8020ec1883 -size 4248207608 diff --git a/model-00045-of-00073.safetensors b/model-00045-of-00073.safetensors deleted file mode 100644 index e259f31bda0d09983587e1a9629b9e174648df74..0000000000000000000000000000000000000000 --- a/model-00045-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:726cc5dad9399469bb7b3bd5214779b91d9bf0ad4a0e003d8066cec09a25c14a -size 2177954720 diff --git a/model-00046-of-00073.safetensors b/model-00046-of-00073.safetensors deleted file mode 100644 index 33170bdd9887e7d7e16a95fe53b1651925a8f9b5..0000000000000000000000000000000000000000 --- a/model-00046-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b25c699bfefc316cdb3c7014bf5ed65f103119cef2250dfe2f51cbc5a8c984ad -size 4248207608 diff --git a/model-00047-of-00073.safetensors b/model-00047-of-00073.safetensors deleted file mode 100644 index 4d53a4d625145a85e271b32ffd7eaca14b6d75d7..0000000000000000000000000000000000000000 --- a/model-00047-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4e055c90fa9d9a884ff0dc78b229c76771813fa055edd57e72be727fcef90d05 -size 2177954720 diff --git a/model-00048-of-00073.safetensors b/model-00048-of-00073.safetensors deleted file mode 100644 index e664814bfc93124ab5c5c65dab8f96b78f4227c4..0000000000000000000000000000000000000000 --- a/model-00048-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26920c082406875325b7104c3fc38a263d6b3d289a734ebcc5f8fbc440f04acd -size 4248207608 diff --git a/model-00049-of-00073.safetensors b/model-00049-of-00073.safetensors deleted file mode 100644 index 121f58bb0fb14a3225e4b15a9948b1da747516e9..0000000000000000000000000000000000000000 --- a/model-00049-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b1678d54158879646f62ff322712e2b62648bf8ac9cc037b881cf6c4fa0bf098 -size 2177954720 diff --git a/model-00050-of-00073.safetensors b/model-00050-of-00073.safetensors deleted file mode 100644 index 34673773752258b916bc88b3b3dbbff130e070ad..0000000000000000000000000000000000000000 --- a/model-00050-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca83dc8bbfa9759aabae63346ca1c2c0a45a9a27a29ef4b372daf12df04b7fc5 -size 4248207608 diff --git a/model-00051-of-00073.safetensors b/model-00051-of-00073.safetensors deleted file mode 100644 index 80accad8bd17f2ddf754b3acb047eb3d89181336..0000000000000000000000000000000000000000 --- a/model-00051-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89f9cd3ecc23ee3092843f05af93f11c277770fff745b62f760018b753451c34 -size 2177954720 diff --git a/model-00052-of-00073.safetensors b/model-00052-of-00073.safetensors deleted file mode 100644 index 701b3df82d9b62b1dda07f582f84cb40ac518cc8..0000000000000000000000000000000000000000 --- a/model-00052-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:602d8c69f9f3a9f01ecadf09568be3d12fcd6ad204e86c2611de72a36d64eaa3 -size 4248207608 diff --git a/model-00053-of-00073.safetensors b/model-00053-of-00073.safetensors deleted file mode 100644 index b84d2a5d88ee96a4a7c2b93bcc149bd06933eb5e..0000000000000000000000000000000000000000 --- a/model-00053-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fed9bbf9e0b1bec5c9919605ddb3a4831e9fac1d22c64fee9ddc708a8dd0c9e9 -size 2177954720 diff --git a/model-00054-of-00073.safetensors b/model-00054-of-00073.safetensors deleted file mode 100644 index 928153264585a9716f3b6094430eb3efb876da8a..0000000000000000000000000000000000000000 --- a/model-00054-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c402365f13f21ba4d879e2e3f872684e4f2a6efe9fcc2416b72b803e3ae69c6a -size 4248207608 diff --git a/model-00055-of-00073.safetensors b/model-00055-of-00073.safetensors deleted file mode 100644 index 75183b13e87e17aa8cc4251a48426af88dcb0afa..0000000000000000000000000000000000000000 --- a/model-00055-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc93912edb6af0b8f5740154110d368dad10d5d9a79bfd439e4eddc61affd549 -size 2177954720 diff --git a/model-00056-of-00073.safetensors b/model-00056-of-00073.safetensors deleted file mode 100644 index 09c26812a88e39c6f05b9963511899d79cb8e72e..0000000000000000000000000000000000000000 --- a/model-00056-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5bb7be770966745bcf38c79d2943609064e47c7b97a8e4d5faab27ad16a4a522 -size 4248207608 diff --git a/model-00057-of-00073.safetensors b/model-00057-of-00073.safetensors deleted file mode 100644 index 829bcb1f6d601090816439fbc857572c6e5ef887..0000000000000000000000000000000000000000 --- a/model-00057-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca5206836236361e82646b4397e3a0fcffece1c3d40143be50d47f4724e4f25f -size 2177954720 diff --git a/model-00058-of-00073.safetensors b/model-00058-of-00073.safetensors deleted file mode 100644 index a13e78b7fc1add6bcb1b51bd03258efff4505e62..0000000000000000000000000000000000000000 --- a/model-00058-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c5e87b19c194268efb94cdcbba0b2a96fa31569ac4fb2467ed6bdb42b745013 -size 4248207608 diff --git a/model-00059-of-00073.safetensors b/model-00059-of-00073.safetensors deleted file mode 100644 index fcb5d46a39f83bd11f3cf79b68de2c7a1c03d372..0000000000000000000000000000000000000000 --- a/model-00059-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e9378a0f1bfaa3ee2483f32a699001f038794091f55874eb616115103b1d5f0 -size 2177954720 diff --git a/model-00060-of-00073.safetensors b/model-00060-of-00073.safetensors deleted file mode 100644 index 5e967de20ab0280640138abc79298ab269fc46b6..0000000000000000000000000000000000000000 --- a/model-00060-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89bf89236ee5f756410e89c6c13d8c0b9c9406a5c8c571a3f8d8404e9026130b -size 4248207608 diff --git a/model-00061-of-00073.safetensors b/model-00061-of-00073.safetensors deleted file mode 100644 index 891dfac75b333f733116dd8c655dee16c043ccfe..0000000000000000000000000000000000000000 --- a/model-00061-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1555d2ea08860e15df1175e1708af8f588df2ed72c9235dc2556f0662fec6ae9 -size 2177954720 diff --git a/model-00062-of-00073.safetensors b/model-00062-of-00073.safetensors deleted file mode 100644 index 8f5e3807891cf4b43a78453b52de1757a98d7111..0000000000000000000000000000000000000000 --- a/model-00062-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:879b29c5e616c1ca6066b6ecca749534f205c52cb37dd89fe4a23f876a80ef2c -size 4248207608 diff --git a/model-00063-of-00073.safetensors b/model-00063-of-00073.safetensors deleted file mode 100644 index 90727cb3438bc7488603510bedacaba85046def6..0000000000000000000000000000000000000000 --- a/model-00063-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:031db2eb404ab17b91059bd66eb5a6ab79ced7f54eab298518414547c28c1422 -size 2177954720 diff --git a/model-00064-of-00073.safetensors b/model-00064-of-00073.safetensors deleted file mode 100644 index b802cc218e98efaa311cb52721c1e94b428b73f7..0000000000000000000000000000000000000000 --- a/model-00064-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:07416b5a8e97597bd78cfc5dddd15155737b732b45c983bd4197cb5b9e6e9971 -size 4248207608 diff --git a/model-00065-of-00073.safetensors b/model-00065-of-00073.safetensors deleted file mode 100644 index 108fa73c0028b1053fd690b65b19a98bdc458714..0000000000000000000000000000000000000000 --- a/model-00065-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:44ed3379df65be77e9582831ad29c8eb17ebedc56ec4b62a99a892e557f4a1bc -size 2177954720 diff --git a/model-00066-of-00073.safetensors b/model-00066-of-00073.safetensors deleted file mode 100644 index 472d73e3f8381a07bedca895ab500a48abafdfe2..0000000000000000000000000000000000000000 --- a/model-00066-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d2d73da24ce2cca148fd32bda049e035f4a768bc4397a2d9bbf12753e1a3041 -size 4248207608 diff --git a/model-00067-of-00073.safetensors b/model-00067-of-00073.safetensors deleted file mode 100644 index 59da3ab33641be5d41adf5335f936c3bc5c94482..0000000000000000000000000000000000000000 --- a/model-00067-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:abdf3e672e63d4fe2e7e41be37e82348275bd3f02d205cb72198bc8d719b92b4 -size 2177954720 diff --git a/model-00068-of-00073.safetensors b/model-00068-of-00073.safetensors deleted file mode 100644 index a8d7441725ed412aa8534cf23bb2b9900fe247f8..0000000000000000000000000000000000000000 --- a/model-00068-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:84e0570dfa5e297b998d56f33740f63d2f8768d4e580d4faab27f9352c7601b9 -size 4248207608 diff --git a/model-00069-of-00073.safetensors b/model-00069-of-00073.safetensors deleted file mode 100644 index cdacc6a0e5c84e84a41911e20f8ebbead74fce38..0000000000000000000000000000000000000000 --- a/model-00069-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f463a221cf3e4d964086efa9e3979333a07a3277777d3c72bff28c20bb44bb75 -size 2177954720 diff --git a/model-00070-of-00073.safetensors b/model-00070-of-00073.safetensors deleted file mode 100644 index a89ebbd3b10f65a3727b451800b75a6b676517d7..0000000000000000000000000000000000000000 --- a/model-00070-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8a27f910d335741aa81cdcba6dbd2c08fc0b3f3d6fd3b830483892ea889a2b7c -size 4248207608 diff --git a/model-00071-of-00073.safetensors b/model-00071-of-00073.safetensors deleted file mode 100644 index d8192b8d8e5b2a808eae212f9c18f858238d422d..0000000000000000000000000000000000000000 --- a/model-00071-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c912313b07b81b233d5868e9c85611f27a25e46d21c180a631cb89496872e8bc -size 2177954720 diff --git a/model-00072-of-00073.safetensors b/model-00072-of-00073.safetensors deleted file mode 100644 index 7e59f53eec8db338c9a644760ff7de0835f4964d..0000000000000000000000000000000000000000 --- a/model-00072-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd5fda01c3978d13399bc7a40d66b7f829c1aecd10bab86b6d1c32f579571a08 -size 4248207608 diff --git a/model-00073-of-00073.safetensors b/model-00073-of-00073.safetensors deleted file mode 100644 index a47a886aac2f748957f45f71b5a21aed9fb6cf31..0000000000000000000000000000000000000000 --- a/model-00073-of-00073.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b54c33edab866f4acadd8dbc354a8e81334a398c7d2eacb4908e5dce995ac2c -size 3282388504 diff --git a/model.safetensors.index.json b/model.safetensors.index.json deleted file mode 100644 index 75a1a617c9945b6a2b4fe6205e77cc983537bbcc..0000000000000000000000000000000000000000 --- a/model.safetensors.index.json +++ /dev/null @@ -1,622 +0,0 @@ -{ - "metadata": { - "total_size": 233658313344 - }, - "weight_map": { - "model.embed_tokens.weight": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.sinks": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00073.safetensors", - "model.layers.0.self_attn.o_proj.bias": "model-00001-of-00073.safetensors", - "model.layers.0.mlp.router.weight": "model-00001-of-00073.safetensors", - "model.layers.0.mlp.router.bias": "model-00001-of-00073.safetensors", - "model.layers.0.mlp.experts.gate_up_proj": "model-00002-of-00073.safetensors", - "model.layers.0.mlp.experts.gate_up_proj_bias": "model-00002-of-00073.safetensors", - "model.layers.0.mlp.experts.down_proj": "model-00003-of-00073.safetensors", - "model.layers.0.mlp.experts.down_proj_bias": "model-00003-of-00073.safetensors", - "model.layers.0.input_layernorm.weight": "model-00003-of-00073.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.sinks": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.q_proj.bias": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.k_proj.bias": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.v_proj.bias": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00073.safetensors", - "model.layers.1.self_attn.o_proj.bias": "model-00003-of-00073.safetensors", - "model.layers.1.mlp.router.weight": "model-00003-of-00073.safetensors", - "model.layers.1.mlp.router.bias": "model-00003-of-00073.safetensors", - "model.layers.1.mlp.experts.gate_up_proj": "model-00004-of-00073.safetensors", - "model.layers.1.mlp.experts.gate_up_proj_bias": "model-00004-of-00073.safetensors", - "model.layers.1.mlp.experts.down_proj": "model-00005-of-00073.safetensors", - "model.layers.1.mlp.experts.down_proj_bias": "model-00005-of-00073.safetensors", - "model.layers.1.input_layernorm.weight": "model-00005-of-00073.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.sinks": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.q_proj.bias": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.k_proj.bias": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.v_proj.bias": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00005-of-00073.safetensors", - "model.layers.2.self_attn.o_proj.bias": "model-00005-of-00073.safetensors", - "model.layers.2.mlp.router.weight": "model-00005-of-00073.safetensors", - "model.layers.2.mlp.router.bias": "model-00005-of-00073.safetensors", - "model.layers.2.mlp.experts.gate_up_proj": "model-00006-of-00073.safetensors", - "model.layers.2.mlp.experts.gate_up_proj_bias": "model-00006-of-00073.safetensors", - "model.layers.2.mlp.experts.down_proj": "model-00007-of-00073.safetensors", - "model.layers.2.mlp.experts.down_proj_bias": "model-00007-of-00073.safetensors", - "model.layers.2.input_layernorm.weight": "model-00007-of-00073.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.sinks": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.q_proj.bias": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.k_proj.bias": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.v_proj.bias": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00007-of-00073.safetensors", - "model.layers.3.self_attn.o_proj.bias": "model-00007-of-00073.safetensors", - "model.layers.3.mlp.router.weight": "model-00007-of-00073.safetensors", - "model.layers.3.mlp.router.bias": "model-00007-of-00073.safetensors", - "model.layers.3.mlp.experts.gate_up_proj": "model-00008-of-00073.safetensors", - "model.layers.3.mlp.experts.gate_up_proj_bias": "model-00008-of-00073.safetensors", - "model.layers.3.mlp.experts.down_proj": "model-00009-of-00073.safetensors", - "model.layers.3.mlp.experts.down_proj_bias": "model-00009-of-00073.safetensors", - "model.layers.3.input_layernorm.weight": "model-00009-of-00073.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.sinks": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.q_proj.bias": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.k_proj.bias": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.v_proj.bias": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00009-of-00073.safetensors", - "model.layers.4.self_attn.o_proj.bias": "model-00009-of-00073.safetensors", - "model.layers.4.mlp.router.weight": "model-00009-of-00073.safetensors", - "model.layers.4.mlp.router.bias": "model-00009-of-00073.safetensors", - "model.layers.4.mlp.experts.gate_up_proj": "model-00010-of-00073.safetensors", - "model.layers.4.mlp.experts.gate_up_proj_bias": "model-00010-of-00073.safetensors", - "model.layers.4.mlp.experts.down_proj": "model-00011-of-00073.safetensors", - "model.layers.4.mlp.experts.down_proj_bias": "model-00011-of-00073.safetensors", - "model.layers.4.input_layernorm.weight": "model-00011-of-00073.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.sinks": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.q_proj.bias": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.k_proj.bias": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.v_proj.bias": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00011-of-00073.safetensors", - "model.layers.5.self_attn.o_proj.bias": "model-00011-of-00073.safetensors", - "model.layers.5.mlp.router.weight": "model-00011-of-00073.safetensors", - "model.layers.5.mlp.router.bias": "model-00011-of-00073.safetensors", - "model.layers.5.mlp.experts.gate_up_proj": "model-00012-of-00073.safetensors", - "model.layers.5.mlp.experts.gate_up_proj_bias": "model-00012-of-00073.safetensors", - "model.layers.5.mlp.experts.down_proj": "model-00013-of-00073.safetensors", - "model.layers.5.mlp.experts.down_proj_bias": "model-00013-of-00073.safetensors", - "model.layers.5.input_layernorm.weight": "model-00013-of-00073.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.sinks": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.q_proj.bias": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.k_proj.bias": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.v_proj.bias": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00013-of-00073.safetensors", - "model.layers.6.self_attn.o_proj.bias": "model-00013-of-00073.safetensors", - "model.layers.6.mlp.router.weight": "model-00013-of-00073.safetensors", - "model.layers.6.mlp.router.bias": "model-00013-of-00073.safetensors", - "model.layers.6.mlp.experts.gate_up_proj": "model-00014-of-00073.safetensors", - "model.layers.6.mlp.experts.gate_up_proj_bias": "model-00014-of-00073.safetensors", - "model.layers.6.mlp.experts.down_proj": "model-00015-of-00073.safetensors", - "model.layers.6.mlp.experts.down_proj_bias": "model-00015-of-00073.safetensors", - "model.layers.6.input_layernorm.weight": "model-00015-of-00073.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.sinks": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.q_proj.bias": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.k_proj.bias": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.v_proj.bias": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00015-of-00073.safetensors", - "model.layers.7.self_attn.o_proj.bias": "model-00015-of-00073.safetensors", - "model.layers.7.mlp.router.weight": "model-00015-of-00073.safetensors", - "model.layers.7.mlp.router.bias": "model-00015-of-00073.safetensors", - "model.layers.7.mlp.experts.gate_up_proj": "model-00016-of-00073.safetensors", - "model.layers.7.mlp.experts.gate_up_proj_bias": "model-00016-of-00073.safetensors", - "model.layers.7.mlp.experts.down_proj": "model-00017-of-00073.safetensors", - "model.layers.7.mlp.experts.down_proj_bias": "model-00017-of-00073.safetensors", - "model.layers.7.input_layernorm.weight": "model-00017-of-00073.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.sinks": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.q_proj.bias": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.k_proj.bias": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.v_proj.bias": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00017-of-00073.safetensors", - "model.layers.8.self_attn.o_proj.bias": "model-00017-of-00073.safetensors", - "model.layers.8.mlp.router.weight": "model-00017-of-00073.safetensors", - "model.layers.8.mlp.router.bias": "model-00017-of-00073.safetensors", - "model.layers.8.mlp.experts.gate_up_proj": "model-00018-of-00073.safetensors", - "model.layers.8.mlp.experts.gate_up_proj_bias": "model-00018-of-00073.safetensors", - "model.layers.8.mlp.experts.down_proj": "model-00019-of-00073.safetensors", - "model.layers.8.mlp.experts.down_proj_bias": "model-00019-of-00073.safetensors", - "model.layers.8.input_layernorm.weight": "model-00019-of-00073.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.sinks": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.q_proj.bias": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.k_proj.bias": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.v_proj.bias": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00019-of-00073.safetensors", - "model.layers.9.self_attn.o_proj.bias": "model-00019-of-00073.safetensors", - "model.layers.9.mlp.router.weight": "model-00019-of-00073.safetensors", - "model.layers.9.mlp.router.bias": "model-00019-of-00073.safetensors", - "model.layers.9.mlp.experts.gate_up_proj": "model-00020-of-00073.safetensors", - "model.layers.9.mlp.experts.gate_up_proj_bias": "model-00020-of-00073.safetensors", - "model.layers.9.mlp.experts.down_proj": "model-00021-of-00073.safetensors", - "model.layers.9.mlp.experts.down_proj_bias": "model-00021-of-00073.safetensors", - "model.layers.9.input_layernorm.weight": "model-00021-of-00073.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.sinks": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.q_proj.bias": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.k_proj.bias": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.v_proj.bias": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00021-of-00073.safetensors", - "model.layers.10.self_attn.o_proj.bias": "model-00021-of-00073.safetensors", - "model.layers.10.mlp.router.weight": "model-00021-of-00073.safetensors", - "model.layers.10.mlp.router.bias": "model-00021-of-00073.safetensors", - "model.layers.10.mlp.experts.gate_up_proj": "model-00022-of-00073.safetensors", - "model.layers.10.mlp.experts.gate_up_proj_bias": "model-00022-of-00073.safetensors", - "model.layers.10.mlp.experts.down_proj": "model-00023-of-00073.safetensors", - "model.layers.10.mlp.experts.down_proj_bias": "model-00023-of-00073.safetensors", - "model.layers.10.input_layernorm.weight": "model-00023-of-00073.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.sinks": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.q_proj.bias": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.k_proj.bias": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.v_proj.bias": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00023-of-00073.safetensors", - "model.layers.11.self_attn.o_proj.bias": "model-00023-of-00073.safetensors", - "model.layers.11.mlp.router.weight": "model-00023-of-00073.safetensors", - "model.layers.11.mlp.router.bias": "model-00023-of-00073.safetensors", - "model.layers.11.mlp.experts.gate_up_proj": "model-00024-of-00073.safetensors", - "model.layers.11.mlp.experts.gate_up_proj_bias": "model-00024-of-00073.safetensors", - "model.layers.11.mlp.experts.down_proj": "model-00025-of-00073.safetensors", - "model.layers.11.mlp.experts.down_proj_bias": "model-00025-of-00073.safetensors", - "model.layers.11.input_layernorm.weight": "model-00025-of-00073.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.sinks": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.q_proj.bias": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.k_proj.bias": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.v_proj.bias": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00025-of-00073.safetensors", - "model.layers.12.self_attn.o_proj.bias": "model-00025-of-00073.safetensors", - "model.layers.12.mlp.router.weight": "model-00025-of-00073.safetensors", - "model.layers.12.mlp.router.bias": "model-00025-of-00073.safetensors", - "model.layers.12.mlp.experts.gate_up_proj": "model-00026-of-00073.safetensors", - "model.layers.12.mlp.experts.gate_up_proj_bias": "model-00026-of-00073.safetensors", - "model.layers.12.mlp.experts.down_proj": "model-00027-of-00073.safetensors", - "model.layers.12.mlp.experts.down_proj_bias": "model-00027-of-00073.safetensors", - "model.layers.12.input_layernorm.weight": "model-00027-of-00073.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.sinks": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.q_proj.bias": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.k_proj.bias": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.v_proj.bias": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00027-of-00073.safetensors", - "model.layers.13.self_attn.o_proj.bias": "model-00027-of-00073.safetensors", - "model.layers.13.mlp.router.weight": "model-00027-of-00073.safetensors", - "model.layers.13.mlp.router.bias": "model-00027-of-00073.safetensors", - "model.layers.13.mlp.experts.gate_up_proj": "model-00028-of-00073.safetensors", - "model.layers.13.mlp.experts.gate_up_proj_bias": "model-00028-of-00073.safetensors", - "model.layers.13.mlp.experts.down_proj": "model-00029-of-00073.safetensors", - "model.layers.13.mlp.experts.down_proj_bias": "model-00029-of-00073.safetensors", - "model.layers.13.input_layernorm.weight": "model-00029-of-00073.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.sinks": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.q_proj.bias": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.k_proj.bias": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.v_proj.bias": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00029-of-00073.safetensors", - "model.layers.14.self_attn.o_proj.bias": "model-00029-of-00073.safetensors", - "model.layers.14.mlp.router.weight": "model-00029-of-00073.safetensors", - "model.layers.14.mlp.router.bias": "model-00029-of-00073.safetensors", - "model.layers.14.mlp.experts.gate_up_proj": "model-00030-of-00073.safetensors", - "model.layers.14.mlp.experts.gate_up_proj_bias": "model-00030-of-00073.safetensors", - "model.layers.14.mlp.experts.down_proj": "model-00031-of-00073.safetensors", - "model.layers.14.mlp.experts.down_proj_bias": "model-00031-of-00073.safetensors", - "model.layers.14.input_layernorm.weight": "model-00031-of-00073.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.sinks": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.q_proj.bias": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.k_proj.bias": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.v_proj.bias": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00031-of-00073.safetensors", - "model.layers.15.self_attn.o_proj.bias": "model-00031-of-00073.safetensors", - "model.layers.15.mlp.router.weight": "model-00031-of-00073.safetensors", - "model.layers.15.mlp.router.bias": "model-00031-of-00073.safetensors", - "model.layers.15.mlp.experts.gate_up_proj": "model-00032-of-00073.safetensors", - "model.layers.15.mlp.experts.gate_up_proj_bias": "model-00032-of-00073.safetensors", - "model.layers.15.mlp.experts.down_proj": "model-00033-of-00073.safetensors", - "model.layers.15.mlp.experts.down_proj_bias": "model-00033-of-00073.safetensors", - "model.layers.15.input_layernorm.weight": "model-00033-of-00073.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.sinks": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.q_proj.bias": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.k_proj.bias": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.v_proj.bias": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00033-of-00073.safetensors", - "model.layers.16.self_attn.o_proj.bias": "model-00033-of-00073.safetensors", - "model.layers.16.mlp.router.weight": "model-00033-of-00073.safetensors", - "model.layers.16.mlp.router.bias": "model-00033-of-00073.safetensors", - "model.layers.16.mlp.experts.gate_up_proj": "model-00034-of-00073.safetensors", - "model.layers.16.mlp.experts.gate_up_proj_bias": "model-00034-of-00073.safetensors", - "model.layers.16.mlp.experts.down_proj": "model-00035-of-00073.safetensors", - "model.layers.16.mlp.experts.down_proj_bias": "model-00035-of-00073.safetensors", - "model.layers.16.input_layernorm.weight": "model-00035-of-00073.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.sinks": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.q_proj.bias": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.k_proj.bias": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.v_proj.bias": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00035-of-00073.safetensors", - "model.layers.17.self_attn.o_proj.bias": "model-00035-of-00073.safetensors", - "model.layers.17.mlp.router.weight": "model-00035-of-00073.safetensors", - "model.layers.17.mlp.router.bias": "model-00035-of-00073.safetensors", - "model.layers.17.mlp.experts.gate_up_proj": "model-00036-of-00073.safetensors", - "model.layers.17.mlp.experts.gate_up_proj_bias": "model-00036-of-00073.safetensors", - "model.layers.17.mlp.experts.down_proj": "model-00037-of-00073.safetensors", - "model.layers.17.mlp.experts.down_proj_bias": "model-00037-of-00073.safetensors", - "model.layers.17.input_layernorm.weight": "model-00037-of-00073.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.sinks": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.q_proj.bias": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.k_proj.bias": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.v_proj.bias": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00037-of-00073.safetensors", - "model.layers.18.self_attn.o_proj.bias": "model-00037-of-00073.safetensors", - "model.layers.18.mlp.router.weight": "model-00037-of-00073.safetensors", - "model.layers.18.mlp.router.bias": "model-00037-of-00073.safetensors", - "model.layers.18.mlp.experts.gate_up_proj": "model-00038-of-00073.safetensors", - "model.layers.18.mlp.experts.gate_up_proj_bias": "model-00038-of-00073.safetensors", - "model.layers.18.mlp.experts.down_proj": "model-00039-of-00073.safetensors", - "model.layers.18.mlp.experts.down_proj_bias": "model-00039-of-00073.safetensors", - "model.layers.18.input_layernorm.weight": "model-00039-of-00073.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.sinks": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.q_proj.bias": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.k_proj.bias": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.v_proj.bias": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00039-of-00073.safetensors", - "model.layers.19.self_attn.o_proj.bias": "model-00039-of-00073.safetensors", - "model.layers.19.mlp.router.weight": "model-00039-of-00073.safetensors", - "model.layers.19.mlp.router.bias": "model-00039-of-00073.safetensors", - "model.layers.19.mlp.experts.gate_up_proj": "model-00040-of-00073.safetensors", - "model.layers.19.mlp.experts.gate_up_proj_bias": "model-00040-of-00073.safetensors", - "model.layers.19.mlp.experts.down_proj": "model-00041-of-00073.safetensors", - "model.layers.19.mlp.experts.down_proj_bias": "model-00041-of-00073.safetensors", - "model.layers.19.input_layernorm.weight": "model-00041-of-00073.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.sinks": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.q_proj.bias": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.k_proj.bias": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.v_proj.bias": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00041-of-00073.safetensors", - "model.layers.20.self_attn.o_proj.bias": "model-00041-of-00073.safetensors", - "model.layers.20.mlp.router.weight": "model-00041-of-00073.safetensors", - "model.layers.20.mlp.router.bias": "model-00041-of-00073.safetensors", - "model.layers.20.mlp.experts.gate_up_proj": "model-00042-of-00073.safetensors", - "model.layers.20.mlp.experts.gate_up_proj_bias": "model-00042-of-00073.safetensors", - "model.layers.20.mlp.experts.down_proj": "model-00043-of-00073.safetensors", - "model.layers.20.mlp.experts.down_proj_bias": "model-00043-of-00073.safetensors", - "model.layers.20.input_layernorm.weight": "model-00043-of-00073.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.sinks": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.q_proj.bias": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.k_proj.bias": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.v_proj.bias": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00043-of-00073.safetensors", - "model.layers.21.self_attn.o_proj.bias": "model-00043-of-00073.safetensors", - "model.layers.21.mlp.router.weight": "model-00043-of-00073.safetensors", - "model.layers.21.mlp.router.bias": "model-00043-of-00073.safetensors", - "model.layers.21.mlp.experts.gate_up_proj": "model-00044-of-00073.safetensors", - "model.layers.21.mlp.experts.gate_up_proj_bias": "model-00044-of-00073.safetensors", - "model.layers.21.mlp.experts.down_proj": "model-00045-of-00073.safetensors", - "model.layers.21.mlp.experts.down_proj_bias": "model-00045-of-00073.safetensors", - "model.layers.21.input_layernorm.weight": "model-00045-of-00073.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.sinks": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.q_proj.bias": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.k_proj.bias": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.v_proj.bias": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00045-of-00073.safetensors", - "model.layers.22.self_attn.o_proj.bias": "model-00045-of-00073.safetensors", - "model.layers.22.mlp.router.weight": "model-00045-of-00073.safetensors", - "model.layers.22.mlp.router.bias": "model-00045-of-00073.safetensors", - "model.layers.22.mlp.experts.gate_up_proj": "model-00046-of-00073.safetensors", - "model.layers.22.mlp.experts.gate_up_proj_bias": "model-00046-of-00073.safetensors", - "model.layers.22.mlp.experts.down_proj": "model-00047-of-00073.safetensors", - "model.layers.22.mlp.experts.down_proj_bias": "model-00047-of-00073.safetensors", - "model.layers.22.input_layernorm.weight": "model-00047-of-00073.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.sinks": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.q_proj.bias": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.k_proj.bias": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.v_proj.bias": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00047-of-00073.safetensors", - "model.layers.23.self_attn.o_proj.bias": "model-00047-of-00073.safetensors", - "model.layers.23.mlp.router.weight": "model-00047-of-00073.safetensors", - "model.layers.23.mlp.router.bias": "model-00047-of-00073.safetensors", - "model.layers.23.mlp.experts.gate_up_proj": "model-00048-of-00073.safetensors", - "model.layers.23.mlp.experts.gate_up_proj_bias": "model-00048-of-00073.safetensors", - "model.layers.23.mlp.experts.down_proj": "model-00049-of-00073.safetensors", - "model.layers.23.mlp.experts.down_proj_bias": "model-00049-of-00073.safetensors", - "model.layers.23.input_layernorm.weight": "model-00049-of-00073.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.sinks": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.q_proj.bias": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.k_proj.bias": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.v_proj.bias": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00049-of-00073.safetensors", - "model.layers.24.self_attn.o_proj.bias": "model-00049-of-00073.safetensors", - "model.layers.24.mlp.router.weight": "model-00049-of-00073.safetensors", - "model.layers.24.mlp.router.bias": "model-00049-of-00073.safetensors", - "model.layers.24.mlp.experts.gate_up_proj": "model-00050-of-00073.safetensors", - "model.layers.24.mlp.experts.gate_up_proj_bias": "model-00050-of-00073.safetensors", - "model.layers.24.mlp.experts.down_proj": "model-00051-of-00073.safetensors", - "model.layers.24.mlp.experts.down_proj_bias": "model-00051-of-00073.safetensors", - "model.layers.24.input_layernorm.weight": "model-00051-of-00073.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.sinks": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.q_proj.bias": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.k_proj.bias": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.v_proj.bias": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00051-of-00073.safetensors", - "model.layers.25.self_attn.o_proj.bias": "model-00051-of-00073.safetensors", - "model.layers.25.mlp.router.weight": "model-00051-of-00073.safetensors", - "model.layers.25.mlp.router.bias": "model-00051-of-00073.safetensors", - "model.layers.25.mlp.experts.gate_up_proj": "model-00052-of-00073.safetensors", - "model.layers.25.mlp.experts.gate_up_proj_bias": "model-00052-of-00073.safetensors", - "model.layers.25.mlp.experts.down_proj": "model-00053-of-00073.safetensors", - "model.layers.25.mlp.experts.down_proj_bias": "model-00053-of-00073.safetensors", - "model.layers.25.input_layernorm.weight": "model-00053-of-00073.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.sinks": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.q_proj.bias": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.k_proj.bias": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.v_proj.bias": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00053-of-00073.safetensors", - "model.layers.26.self_attn.o_proj.bias": "model-00053-of-00073.safetensors", - "model.layers.26.mlp.router.weight": "model-00053-of-00073.safetensors", - "model.layers.26.mlp.router.bias": "model-00053-of-00073.safetensors", - "model.layers.26.mlp.experts.gate_up_proj": "model-00054-of-00073.safetensors", - "model.layers.26.mlp.experts.gate_up_proj_bias": "model-00054-of-00073.safetensors", - "model.layers.26.mlp.experts.down_proj": "model-00055-of-00073.safetensors", - "model.layers.26.mlp.experts.down_proj_bias": "model-00055-of-00073.safetensors", - "model.layers.26.input_layernorm.weight": "model-00055-of-00073.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.sinks": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.q_proj.bias": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.k_proj.bias": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.v_proj.bias": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00055-of-00073.safetensors", - "model.layers.27.self_attn.o_proj.bias": "model-00055-of-00073.safetensors", - "model.layers.27.mlp.router.weight": "model-00055-of-00073.safetensors", - "model.layers.27.mlp.router.bias": "model-00055-of-00073.safetensors", - "model.layers.27.mlp.experts.gate_up_proj": "model-00056-of-00073.safetensors", - "model.layers.27.mlp.experts.gate_up_proj_bias": "model-00056-of-00073.safetensors", - "model.layers.27.mlp.experts.down_proj": "model-00057-of-00073.safetensors", - "model.layers.27.mlp.experts.down_proj_bias": "model-00057-of-00073.safetensors", - "model.layers.27.input_layernorm.weight": "model-00057-of-00073.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.sinks": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.q_proj.bias": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.k_proj.bias": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.v_proj.bias": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00057-of-00073.safetensors", - "model.layers.28.self_attn.o_proj.bias": "model-00057-of-00073.safetensors", - "model.layers.28.mlp.router.weight": "model-00057-of-00073.safetensors", - "model.layers.28.mlp.router.bias": "model-00057-of-00073.safetensors", - "model.layers.28.mlp.experts.gate_up_proj": "model-00058-of-00073.safetensors", - "model.layers.28.mlp.experts.gate_up_proj_bias": "model-00058-of-00073.safetensors", - "model.layers.28.mlp.experts.down_proj": "model-00059-of-00073.safetensors", - "model.layers.28.mlp.experts.down_proj_bias": "model-00059-of-00073.safetensors", - "model.layers.28.input_layernorm.weight": "model-00059-of-00073.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.sinks": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.q_proj.bias": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.k_proj.bias": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.v_proj.bias": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00059-of-00073.safetensors", - "model.layers.29.self_attn.o_proj.bias": "model-00059-of-00073.safetensors", - "model.layers.29.mlp.router.weight": "model-00059-of-00073.safetensors", - "model.layers.29.mlp.router.bias": "model-00059-of-00073.safetensors", - "model.layers.29.mlp.experts.gate_up_proj": "model-00060-of-00073.safetensors", - "model.layers.29.mlp.experts.gate_up_proj_bias": "model-00060-of-00073.safetensors", - "model.layers.29.mlp.experts.down_proj": "model-00061-of-00073.safetensors", - "model.layers.29.mlp.experts.down_proj_bias": "model-00061-of-00073.safetensors", - "model.layers.29.input_layernorm.weight": "model-00061-of-00073.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.sinks": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.q_proj.bias": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.k_proj.bias": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.v_proj.bias": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00061-of-00073.safetensors", - "model.layers.30.self_attn.o_proj.bias": "model-00061-of-00073.safetensors", - "model.layers.30.mlp.router.weight": "model-00061-of-00073.safetensors", - "model.layers.30.mlp.router.bias": "model-00061-of-00073.safetensors", - "model.layers.30.mlp.experts.gate_up_proj": "model-00062-of-00073.safetensors", - "model.layers.30.mlp.experts.gate_up_proj_bias": "model-00062-of-00073.safetensors", - "model.layers.30.mlp.experts.down_proj": "model-00063-of-00073.safetensors", - "model.layers.30.mlp.experts.down_proj_bias": "model-00063-of-00073.safetensors", - "model.layers.30.input_layernorm.weight": "model-00063-of-00073.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.sinks": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.q_proj.bias": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.k_proj.bias": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.v_proj.bias": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00063-of-00073.safetensors", - "model.layers.31.self_attn.o_proj.bias": "model-00063-of-00073.safetensors", - "model.layers.31.mlp.router.weight": "model-00063-of-00073.safetensors", - "model.layers.31.mlp.router.bias": "model-00063-of-00073.safetensors", - "model.layers.31.mlp.experts.gate_up_proj": "model-00064-of-00073.safetensors", - "model.layers.31.mlp.experts.gate_up_proj_bias": "model-00064-of-00073.safetensors", - "model.layers.31.mlp.experts.down_proj": "model-00065-of-00073.safetensors", - "model.layers.31.mlp.experts.down_proj_bias": "model-00065-of-00073.safetensors", - "model.layers.31.input_layernorm.weight": "model-00065-of-00073.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.sinks": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.q_proj.weight": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.q_proj.bias": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.k_proj.weight": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.k_proj.bias": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.v_proj.weight": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.v_proj.bias": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.o_proj.weight": "model-00065-of-00073.safetensors", - "model.layers.32.self_attn.o_proj.bias": "model-00065-of-00073.safetensors", - "model.layers.32.mlp.router.weight": "model-00065-of-00073.safetensors", - "model.layers.32.mlp.router.bias": "model-00065-of-00073.safetensors", - "model.layers.32.mlp.experts.gate_up_proj": "model-00066-of-00073.safetensors", - "model.layers.32.mlp.experts.gate_up_proj_bias": "model-00066-of-00073.safetensors", - "model.layers.32.mlp.experts.down_proj": "model-00067-of-00073.safetensors", - "model.layers.32.mlp.experts.down_proj_bias": "model-00067-of-00073.safetensors", - "model.layers.32.input_layernorm.weight": "model-00067-of-00073.safetensors", - "model.layers.32.post_attention_layernorm.weight": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.sinks": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.q_proj.weight": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.q_proj.bias": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.k_proj.weight": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.k_proj.bias": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.v_proj.weight": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.v_proj.bias": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.o_proj.weight": "model-00067-of-00073.safetensors", - "model.layers.33.self_attn.o_proj.bias": "model-00067-of-00073.safetensors", - "model.layers.33.mlp.router.weight": "model-00067-of-00073.safetensors", - "model.layers.33.mlp.router.bias": "model-00067-of-00073.safetensors", - "model.layers.33.mlp.experts.gate_up_proj": "model-00068-of-00073.safetensors", - "model.layers.33.mlp.experts.gate_up_proj_bias": "model-00068-of-00073.safetensors", - "model.layers.33.mlp.experts.down_proj": "model-00069-of-00073.safetensors", - "model.layers.33.mlp.experts.down_proj_bias": "model-00069-of-00073.safetensors", - "model.layers.33.input_layernorm.weight": "model-00069-of-00073.safetensors", - "model.layers.33.post_attention_layernorm.weight": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.sinks": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.q_proj.weight": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.q_proj.bias": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.k_proj.weight": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.k_proj.bias": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.v_proj.weight": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.v_proj.bias": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.o_proj.weight": "model-00069-of-00073.safetensors", - "model.layers.34.self_attn.o_proj.bias": "model-00069-of-00073.safetensors", - "model.layers.34.mlp.router.weight": "model-00069-of-00073.safetensors", - "model.layers.34.mlp.router.bias": "model-00069-of-00073.safetensors", - "model.layers.34.mlp.experts.gate_up_proj": "model-00070-of-00073.safetensors", - "model.layers.34.mlp.experts.gate_up_proj_bias": "model-00070-of-00073.safetensors", - "model.layers.34.mlp.experts.down_proj": "model-00071-of-00073.safetensors", - "model.layers.34.mlp.experts.down_proj_bias": "model-00071-of-00073.safetensors", - "model.layers.34.input_layernorm.weight": "model-00071-of-00073.safetensors", - "model.layers.34.post_attention_layernorm.weight": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.sinks": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.q_proj.weight": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.q_proj.bias": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.k_proj.weight": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.k_proj.bias": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.v_proj.weight": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.v_proj.bias": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.o_proj.weight": "model-00071-of-00073.safetensors", - "model.layers.35.self_attn.o_proj.bias": "model-00071-of-00073.safetensors", - "model.layers.35.mlp.router.weight": "model-00071-of-00073.safetensors", - "model.layers.35.mlp.router.bias": "model-00071-of-00073.safetensors", - "model.layers.35.mlp.experts.gate_up_proj": "model-00072-of-00073.safetensors", - "model.layers.35.mlp.experts.gate_up_proj_bias": "model-00072-of-00073.safetensors", - "model.layers.35.mlp.experts.down_proj": "model-00073-of-00073.safetensors", - "model.layers.35.mlp.experts.down_proj_bias": "model-00073-of-00073.safetensors", - "model.layers.35.input_layernorm.weight": "model-00073-of-00073.safetensors", - "model.layers.35.post_attention_layernorm.weight": "model-00073-of-00073.safetensors", - "model.norm.weight": "model-00073-of-00073.safetensors", - "lm_head.weight": "model-00073-of-00073.safetensors" - } -} \ No newline at end of file