diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b64037f2ff06eb3581699be0bf3d6448a49f6430 --- /dev/null +++ b/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "CohereForAI/aya-101", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 10240, + "d_kv": 64, + "d_model": 4096, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 24, + "num_heads": 64, + "num_layers": 24, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.36.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b8de7c0849ca645b144f2d2a2015d4de250b4a6d --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.36.2" +} diff --git a/model-00001-of-00058.safetensors b/model-00001-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44550ea951229f11c841c0727f173ea9ee267af1 --- /dev/null +++ b/model-00001-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee5060b747ac7e2c1be9cd97c43f993341d6cc442c2e7ec44d8f6b0cdcbc135 +size 4097835136 diff --git a/model-00002-of-00058.safetensors b/model-00002-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e3a628c055a1caf361ad304e04e731c9204f082 --- /dev/null +++ b/model-00002-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc6a6cf4b9706b03379d15cf37d27088d75496b488ba120ff1b54115520eafc +size 838903144 diff --git a/model-00003-of-00058.safetensors b/model-00003-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..511ccee2f826446bb7f1a2896e28fb0ea9a89980 --- /dev/null +++ b/model-00003-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a05afaa3b956436c6e35d9d84244fc5bcac25c08e11b344d4b7307d8fe32aa +size 838894816 diff --git a/model-00004-of-00058.safetensors b/model-00004-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7aa284a5b1ccaf04c10f4712a4d7d4316e65167 --- /dev/null +++ b/model-00004-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc1477aa9f6223bc3a1c6e05c08675e7b037a0f34ed8d287d306e0bc4a34c55c +size 838894816 diff --git a/model-00005-of-00058.safetensors b/model-00005-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ced73f6b1b3f703e7c5c05551833fbf51afa395 --- /dev/null +++ b/model-00005-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ed435ff4e52ff98fbe90ad6cfac524327badccb4f58789addd366852060ec0 +size 838911312 diff --git a/model-00006-of-00058.safetensors b/model-00006-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37d9a8e04b2ab53ebcf984dc75c978ed84e85b96 --- /dev/null +++ b/model-00006-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded232c5ff01a67979c9a3b7134d9a544d8e7f6b8b49cee9fe618d774d425671 +size 771785832 diff --git a/model-00007-of-00058.safetensors b/model-00007-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57bba2aa4e8025db41d429290026ccbb1a645bb4 --- /dev/null +++ b/model-00007-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65651279806c5ba01db2d88b7547b33d757838d322b4b973f0533678cacb3a0 +size 771785832 diff --git a/model-00008-of-00058.safetensors b/model-00008-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef295dc0036820690a78ffe96780880edbc7f768 --- /dev/null +++ b/model-00008-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3ad0089dbfb62a4461cd98cb1b588235cc509b6c2c94baf34d2bcfeb8ba4a5 +size 771785832 diff --git a/model-00009-of-00058.safetensors b/model-00009-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b01c82d2a6a0ae45c19ab20f5fb8445aa80bb7a --- /dev/null +++ b/model-00009-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eafebbd578ff50b822cfbb6e415e58eceeb8b90ebe7732e0fa70028240ea9264 +size 771785832 diff --git a/model-00010-of-00058.safetensors b/model-00010-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..874891627ce4854e20bc342dcde6422d2b9284a9 --- /dev/null +++ b/model-00010-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f96b41dcaf37c51d228cb7bb6dd6e871a18f4bab35c2450ec4a8557309a21827 +size 771785832 diff --git a/model-00011-of-00058.safetensors b/model-00011-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78b4edf9806049f7302250564bf902c12fe8994d --- /dev/null +++ b/model-00011-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae73ffd22765f358d77f406f3f1d78996828fef9981036bfde85a87801d97b0 +size 771785832 diff --git a/model-00012-of-00058.safetensors b/model-00012-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf2a54a3da42051ea6d6bddc72caa4b38c0620ff --- /dev/null +++ b/model-00012-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:114cd99b4c605f31ad71feacbcce4da22dd3dec7270edc18662e85da2ecc206e +size 771785840 diff --git a/model-00013-of-00058.safetensors b/model-00013-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..742ae3f7c7a5f611f00293764e18a0526ac051be --- /dev/null +++ b/model-00013-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75e93617d9a9e229675df8678476b64a7da7261d4b70c5cc79bb48436f5c704 +size 771785840 diff --git a/model-00014-of-00058.safetensors b/model-00014-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fb2a62dced99c8e6ee9a9d19a7957e2d06741f8 --- /dev/null +++ b/model-00014-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebbf2c9cdc98a42961a8703194b0b17d5792b7bf4ebee580324ac0c2581f4865 +size 771785840 diff --git a/model-00015-of-00058.safetensors b/model-00015-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c64239d5f9e401cde187c75050149c656faa989 --- /dev/null +++ b/model-00015-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd94e6654301436f21593bb47a74788b3e9eafe3b0a73428783a00ae9984c018 +size 771785840 diff --git a/model-00016-of-00058.safetensors b/model-00016-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27bcec2abffb4d9a896da34c8c598b8912eabdc8 --- /dev/null +++ b/model-00016-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe3d0d091a8ef2dd49fe37a5a23ce21cc690d9bec799a7543d9c20d63b5ade3 +size 771785840 diff --git a/model-00017-of-00058.safetensors b/model-00017-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3dd209dddd87a76ef02bc279a326228db4167e27 --- /dev/null +++ b/model-00017-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62fd739f7024acabefe0ea168e63d5275ebeecbbcaa8c9d228f7e637f77edd75 +size 771785840 diff --git a/model-00018-of-00058.safetensors b/model-00018-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfaca6230e0125a4d686bab55d6286a6ee8e98ff --- /dev/null +++ b/model-00018-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ecb869ffa17e7e6056fcfcaba2ecd4a854e0d4fdafcbdd64cd62314f7153537 +size 771785840 diff --git a/model-00019-of-00058.safetensors b/model-00019-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cfc4e67462eb5b2284593413f7beeec01edbcf7 --- /dev/null +++ b/model-00019-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed4e57f19a24c26d05b8b8a89906a5fa834d5e9acbb3e1378c87bfe45987120 +size 771785840 diff --git a/model-00020-of-00058.safetensors b/model-00020-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..beefcbe167026309aeaedbbd5e011c116591f05b --- /dev/null +++ b/model-00020-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae31052dc9294d98526888e4b81545017c1a11bc2b0bf18f39d6878c3c9fbfd +size 771785840 diff --git a/model-00021-of-00058.safetensors b/model-00021-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08a06aff568fb084cd89faa1088ae14484e11a30 --- /dev/null +++ b/model-00021-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c454fe1f9908d53b06b85ee4efb5ee95fa1dd8dc6fa56323a9b6796ad9d8f6 +size 771785840 diff --git a/model-00022-of-00058.safetensors b/model-00022-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df21ef86a8b3b22e0c8760b8e392295f9c4f5769 --- /dev/null +++ b/model-00022-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:278bf79e73b12529614eb6eae2863eb4db644ad5dd1f7b6081e9abba2b7f8692 +size 771785840 diff --git a/model-00023-of-00058.safetensors b/model-00023-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27babf719fe309b8fd21aba4644ad4465ffef800 --- /dev/null +++ b/model-00023-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cecc02c74b05dad3754c76f56eef4875bf6f16cb8d9265a414687e2de91c8e10 +size 771785840 diff --git a/model-00024-of-00058.safetensors b/model-00024-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5d61174db311b7dfe1f8505927c6ee9d26a3230 --- /dev/null +++ b/model-00024-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4676fd29a15463bb30a1d580709cea57ee1a612c1150c8dd87291fb538f006 +size 771785840 diff --git a/model-00025-of-00058.safetensors b/model-00025-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecb736231e7ca7d9f8ebccb640377f309eea72f1 --- /dev/null +++ b/model-00025-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1675e6458803892fa69702dc5d71d53ce2a03a6e9a68e5db1b0a1922566e5d1 +size 838919640 diff --git a/model-00026-of-00058.safetensors b/model-00026-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79f8724871687edaad0de2089a1d30ae2da3839d --- /dev/null +++ b/model-00026-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f8a66a3f378c7e87a4911b116a7a63c7e00e3b7d38d955d00da993dc5c0a879 +size 838894824 diff --git a/model-00027-of-00058.safetensors b/model-00027-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4485386828ae9d878edf0d817ef29469f6eb78a --- /dev/null +++ b/model-00027-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68248e87be6aa20c413261e8a47e06ebeea41dd33d22617391a9ba5f8d24c3ef +size 738231520 diff --git a/model-00028-of-00058.safetensors b/model-00028-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..468c366f8ecbf41a64f524ca28d7697a83eea073 --- /dev/null +++ b/model-00028-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aded28e602ba7497fe84f3595e6f87183b72a47f532b7f81164ccf3195aeab68 +size 704693704 diff --git a/model-00029-of-00058.safetensors b/model-00029-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d743ee7290833f81b23492b590f5da8d7e5e7f06 --- /dev/null +++ b/model-00029-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb61c6be6a65b8ecb356a8c78fc47935cef9f3b1e18ea1a2a777e08730cde4df +size 838894816 diff --git a/model-00030-of-00058.safetensors b/model-00030-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..791d14c6ee7c05ad6825592a61c5e5cc58d3e9af --- /dev/null +++ b/model-00030-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777c8f9a5ac296282b025a4fe664fd718c1d3dc5da5cff9288ae82cd93d1602d +size 838894824 diff --git a/model-00031-of-00058.safetensors b/model-00031-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c21559afe1f48a60057d51e19840e7fb6cf87fe3 --- /dev/null +++ b/model-00031-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ac25648527804f241393fe0c2b01e811071f9124511e60cf48413f991a98bd +size 738231520 diff --git a/model-00032-of-00058.safetensors b/model-00032-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..101f1f79363386c3ba93529af4a08cb65fe6cfef --- /dev/null +++ b/model-00032-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b792023be12084c3327137ba48d4bb1ce060a3e651fafdfbb090e4ce1d85506 +size 704693704 diff --git a/model-00033-of-00058.safetensors b/model-00033-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f662e10a7df168c416a44aa428cf60a2751e7d9 --- /dev/null +++ b/model-00033-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4dc32aaaff354f03ddde730e1efd7dc2908f24c8bcec12ca156d50ed03d4e4a +size 838894816 diff --git a/model-00034-of-00058.safetensors b/model-00034-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80b79200ebeb45b4f2d18ba13daf4079ed08ccaa --- /dev/null +++ b/model-00034-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3591afe70cfb8dc911750d114b2b72a687835ab4eeae8c214840be3607722f16 +size 838894824 diff --git a/model-00035-of-00058.safetensors b/model-00035-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a10cd96ec8ede202fe6d9ea0ffdbf14473a0117 --- /dev/null +++ b/model-00035-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26c449662d5b6359c41b131a4eafa6257db948eef198f453d9e220a0fa44b72 +size 738231520 diff --git a/model-00036-of-00058.safetensors b/model-00036-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d19a00fe7b7ebf1d0fe2b3c3ef2be76ff85f6b1c --- /dev/null +++ b/model-00036-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a483d7c4913ce639d67bb9f8f1ee238f91654460240fbeb65f2090b392c1e9 +size 704693704 diff --git a/model-00037-of-00058.safetensors b/model-00037-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c12c1a506ca1718f26fc666ce4b0071233d5969 --- /dev/null +++ b/model-00037-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a06773b745e76a91802e3afbf07e153884561770b020da1e6bbeb38e63b3067 +size 838894816 diff --git a/model-00038-of-00058.safetensors b/model-00038-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df1b4a621127f416c3d62130937ebb92622a6137 --- /dev/null +++ b/model-00038-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b906cab98c13c7a05fa26552be1f0797ba1a8dcf7ad3218c2cd83933758efd7 +size 838894824 diff --git a/model-00039-of-00058.safetensors b/model-00039-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..297405901d6a7acec45d9f62417257c1098331c7 --- /dev/null +++ b/model-00039-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a2d93cfb543368fc54c9033ed6cb46cb2474a771e49fe1ef690d2b5534f400 +size 738231536 diff --git a/model-00040-of-00058.safetensors b/model-00040-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e75a2c89132cafefd0e680604168205c3cb311f6 --- /dev/null +++ b/model-00040-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3edb4cb42f0212f3f50f67b7be0c2aef7329afb51bc330d2899b022bb905e73 +size 704693720 diff --git a/model-00041-of-00058.safetensors b/model-00041-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31301bf8f813d0e9637ba988aaabeabb9d72dee6 --- /dev/null +++ b/model-00041-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752539296b750fe6d73fa93ee71e1cfdcbe18f229b3d0766ba371df873f9710b +size 838894832 diff --git a/model-00042-of-00058.safetensors b/model-00042-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e66d3163438c3283c4ef65a3bfb3d21544fe11e --- /dev/null +++ b/model-00042-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11e4e96a379c079868284857981448505ff408e37afeea6221792d2e3e39e05 +size 838894832 diff --git a/model-00043-of-00058.safetensors b/model-00043-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c43285dc48b06d486df1ac5070256a1edfaa429 --- /dev/null +++ b/model-00043-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70cc615c7bd3b2cc50155873392afff29ac41bd8dc5412a6172c73e5492cc3a5 +size 738231536 diff --git a/model-00044-of-00058.safetensors b/model-00044-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a5114a8e1f10529d925894f61116494ab4c23c6 --- /dev/null +++ b/model-00044-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1562c9c77c11c8eef4631d5c98d8dda9f0ab5fdd7c34f39dcb7163a28befde8d +size 704693720 diff --git a/model-00045-of-00058.safetensors b/model-00045-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aac97f538386db1ae4d137d6091bdbccae487927 --- /dev/null +++ b/model-00045-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4012e5fd75fda64c6d66a2a0f41b9084b0271a0a34fd4fffb4663024f0a300b9 +size 838894832 diff --git a/model-00046-of-00058.safetensors b/model-00046-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d01f367549cecc6c125bcd2c62de63f8df70fb9c --- /dev/null +++ b/model-00046-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96514604172115763f9c02d65e68429924d90899bcce54f254d69fb96b752449 +size 838894832 diff --git a/model-00047-of-00058.safetensors b/model-00047-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3489f78fe868c4bd7b18d3a9e1ad1892c795f5b5 --- /dev/null +++ b/model-00047-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f007b3876baaebb7e38f0b7c8a9486a4c068cb17c6a975142c59d34e541583c9 +size 738231536 diff --git a/model-00048-of-00058.safetensors b/model-00048-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22e30b331d962f64d93766e6f4b90c6ac5761acf --- /dev/null +++ b/model-00048-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c8165111d11ff4166d2ae5ea25149fbdbc6b16428c3b185bb4adb5828efddb +size 704693720 diff --git a/model-00049-of-00058.safetensors b/model-00049-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa996abce5550d7495987ff30de2267ed7db6e12 --- /dev/null +++ b/model-00049-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5fea1b915c69ae8a41d3cc7308de05a3754cbe94bf9a96d309db3186196f0b5 +size 838894832 diff --git a/model-00050-of-00058.safetensors b/model-00050-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..515caab3efa7ae15c38f74a22e5d3c71fb410275 --- /dev/null +++ b/model-00050-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93da747408a271187929ee16cf1173d360a44f077af4648e6d54244ec368fdf2 +size 838894832 diff --git a/model-00051-of-00058.safetensors b/model-00051-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0fe69ebca574345a54816bfd9983a81859149a6 --- /dev/null +++ b/model-00051-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215db6f83da7d7eea10fd1901987f42a3527261a6586abc73a4eab61c6eb8d5c +size 738231536 diff --git a/model-00052-of-00058.safetensors b/model-00052-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09fcd758b6f06ed335b8f746a12aa4ff63cfbade --- /dev/null +++ b/model-00052-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf1f40b0cbe9af39ffb55010073522dae766ce683b1f9d7e1eb1db976af4e2ba +size 704693720 diff --git a/model-00053-of-00058.safetensors b/model-00053-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..545d1a535172af90f0f0d395c6a602d3a597d9cd --- /dev/null +++ b/model-00053-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0767d5df6537475141fa7d361d518f93fcbdfd0c011b126508898e40d1efd84 +size 838894832 diff --git a/model-00054-of-00058.safetensors b/model-00054-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aeb6bde3ca08b54823e315a3caf4a033ade910b4 --- /dev/null +++ b/model-00054-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cbfe8cb67654f46bfaa070121d27868e89932a1b76416248a200c5006c673fc +size 838894832 diff --git a/model-00055-of-00058.safetensors b/model-00055-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30978f6a6d666bda43525c2fc260089f62ba1924 --- /dev/null +++ b/model-00055-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b9fb5160990a58d684d084266b84c7259b6e507bffe327eac80c4b11b159f83 +size 738231536 diff --git a/model-00056-of-00058.safetensors b/model-00056-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec74ec756aa593baa100b49e252a64a00da269b4 --- /dev/null +++ b/model-00056-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e8d0b8cb2be98e368be31d884d6b8afb53d5664056dd90b17d7005a098a15e5 +size 704693720 diff --git a/model-00057-of-00058.safetensors b/model-00057-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05f2422212fc18284d0b5c745dc539140bc47ab4 --- /dev/null +++ b/model-00057-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f2113f343becc1a3d91a857572895e0aeec38d5e31a996f4ab791e2aee872c +size 503349880 diff --git a/model-00058-of-00058.safetensors b/model-00058-of-00058.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a300afc1d9eb403d3f16c494443bb089c30e3a32 --- /dev/null +++ b/model-00058-of-00058.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af58c4bfa00af282c826f2d1322be3ee5c264e976f56808357d37e800a2d7fca +size 4097835136 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..36826d2cf891b00b2ef1d920754c49eb9357db91 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,565 @@ +{ + "metadata": { + "total_size": 51684229120 + }, + "weight_map": { + "decoder.block.0.layer.0.SelfAttention.k.weight": "model-00025-of-00058.safetensors", + "decoder.block.0.layer.0.SelfAttention.o.weight": "model-00025-of-00058.safetensors", + "decoder.block.0.layer.0.SelfAttention.q.weight": "model-00025-of-00058.safetensors", + "decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "model-00025-of-00058.safetensors", + "decoder.block.0.layer.0.SelfAttention.v.weight": "model-00025-of-00058.safetensors", + "decoder.block.0.layer.0.layer_norm.weight": "model-00025-of-00058.safetensors", + "decoder.block.0.layer.1.EncDecAttention.k.weight": "model-00026-of-00058.safetensors", + "decoder.block.0.layer.1.EncDecAttention.o.weight": "model-00026-of-00058.safetensors", + "decoder.block.0.layer.1.EncDecAttention.q.weight": "model-00025-of-00058.safetensors", + "decoder.block.0.layer.1.EncDecAttention.v.weight": "model-00026-of-00058.safetensors", + "decoder.block.0.layer.1.layer_norm.weight": "model-00026-of-00058.safetensors", + "decoder.block.0.layer.2.DenseReluDense.wi_0.weight": "model-00026-of-00058.safetensors", + "decoder.block.0.layer.2.DenseReluDense.wi_1.weight": "model-00026-of-00058.safetensors", + "decoder.block.0.layer.2.DenseReluDense.wo.weight": "model-00026-of-00058.safetensors", + "decoder.block.0.layer.2.layer_norm.weight": "model-00026-of-00058.safetensors", + "decoder.block.1.layer.0.SelfAttention.k.weight": "model-00026-of-00058.safetensors", + "decoder.block.1.layer.0.SelfAttention.o.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.0.SelfAttention.q.weight": "model-00026-of-00058.safetensors", + "decoder.block.1.layer.0.SelfAttention.v.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.0.layer_norm.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.1.EncDecAttention.k.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.1.EncDecAttention.o.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.1.EncDecAttention.q.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.1.EncDecAttention.v.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.1.layer_norm.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.2.DenseReluDense.wi_0.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.2.DenseReluDense.wi_1.weight": "model-00027-of-00058.safetensors", + "decoder.block.1.layer.2.DenseReluDense.wo.weight": "model-00028-of-00058.safetensors", + "decoder.block.1.layer.2.layer_norm.weight": "model-00028-of-00058.safetensors", + "decoder.block.10.layer.0.SelfAttention.k.weight": "model-00038-of-00058.safetensors", + "decoder.block.10.layer.0.SelfAttention.o.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.0.SelfAttention.q.weight": "model-00038-of-00058.safetensors", + "decoder.block.10.layer.0.SelfAttention.v.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.0.layer_norm.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.1.EncDecAttention.k.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.1.EncDecAttention.o.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.1.EncDecAttention.q.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.1.EncDecAttention.v.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.1.layer_norm.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.2.DenseReluDense.wi_0.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.2.DenseReluDense.wi_1.weight": "model-00039-of-00058.safetensors", + "decoder.block.10.layer.2.DenseReluDense.wo.weight": "model-00040-of-00058.safetensors", + "decoder.block.10.layer.2.layer_norm.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.0.SelfAttention.k.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.0.SelfAttention.o.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.0.SelfAttention.q.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.0.SelfAttention.v.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.0.layer_norm.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.1.EncDecAttention.k.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.1.EncDecAttention.o.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.1.EncDecAttention.q.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.1.EncDecAttention.v.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.1.layer_norm.weight": "model-00040-of-00058.safetensors", + "decoder.block.11.layer.2.DenseReluDense.wi_0.weight": "model-00041-of-00058.safetensors", + "decoder.block.11.layer.2.DenseReluDense.wi_1.weight": "model-00041-of-00058.safetensors", + "decoder.block.11.layer.2.DenseReluDense.wo.weight": "model-00041-of-00058.safetensors", + "decoder.block.11.layer.2.layer_norm.weight": "model-00041-of-00058.safetensors", + "decoder.block.12.layer.0.SelfAttention.k.weight": "model-00041-of-00058.safetensors", + "decoder.block.12.layer.0.SelfAttention.o.weight": "model-00041-of-00058.safetensors", + "decoder.block.12.layer.0.SelfAttention.q.weight": "model-00041-of-00058.safetensors", + "decoder.block.12.layer.0.SelfAttention.v.weight": "model-00041-of-00058.safetensors", + "decoder.block.12.layer.0.layer_norm.weight": "model-00041-of-00058.safetensors", + "decoder.block.12.layer.1.EncDecAttention.k.weight": "model-00042-of-00058.safetensors", + "decoder.block.12.layer.1.EncDecAttention.o.weight": "model-00042-of-00058.safetensors", + "decoder.block.12.layer.1.EncDecAttention.q.weight": "model-00041-of-00058.safetensors", + "decoder.block.12.layer.1.EncDecAttention.v.weight": "model-00042-of-00058.safetensors", + "decoder.block.12.layer.1.layer_norm.weight": "model-00042-of-00058.safetensors", + "decoder.block.12.layer.2.DenseReluDense.wi_0.weight": "model-00042-of-00058.safetensors", + "decoder.block.12.layer.2.DenseReluDense.wi_1.weight": "model-00042-of-00058.safetensors", + "decoder.block.12.layer.2.DenseReluDense.wo.weight": "model-00042-of-00058.safetensors", + "decoder.block.12.layer.2.layer_norm.weight": "model-00042-of-00058.safetensors", + "decoder.block.13.layer.0.SelfAttention.k.weight": "model-00042-of-00058.safetensors", + "decoder.block.13.layer.0.SelfAttention.o.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.0.SelfAttention.q.weight": "model-00042-of-00058.safetensors", + "decoder.block.13.layer.0.SelfAttention.v.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.0.layer_norm.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.1.EncDecAttention.k.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.1.EncDecAttention.o.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.1.EncDecAttention.q.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.1.EncDecAttention.v.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.1.layer_norm.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.2.DenseReluDense.wi_0.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.2.DenseReluDense.wi_1.weight": "model-00043-of-00058.safetensors", + "decoder.block.13.layer.2.DenseReluDense.wo.weight": "model-00044-of-00058.safetensors", + "decoder.block.13.layer.2.layer_norm.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.0.SelfAttention.k.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.0.SelfAttention.o.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.0.SelfAttention.q.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.0.SelfAttention.v.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.0.layer_norm.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.1.EncDecAttention.k.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.1.EncDecAttention.o.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.1.EncDecAttention.q.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.1.EncDecAttention.v.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.1.layer_norm.weight": "model-00044-of-00058.safetensors", + "decoder.block.14.layer.2.DenseReluDense.wi_0.weight": "model-00045-of-00058.safetensors", + "decoder.block.14.layer.2.DenseReluDense.wi_1.weight": "model-00045-of-00058.safetensors", + "decoder.block.14.layer.2.DenseReluDense.wo.weight": "model-00045-of-00058.safetensors", + "decoder.block.14.layer.2.layer_norm.weight": "model-00045-of-00058.safetensors", + "decoder.block.15.layer.0.SelfAttention.k.weight": "model-00045-of-00058.safetensors", + "decoder.block.15.layer.0.SelfAttention.o.weight": "model-00045-of-00058.safetensors", + "decoder.block.15.layer.0.SelfAttention.q.weight": "model-00045-of-00058.safetensors", + "decoder.block.15.layer.0.SelfAttention.v.weight": "model-00045-of-00058.safetensors", + "decoder.block.15.layer.0.layer_norm.weight": "model-00045-of-00058.safetensors", + "decoder.block.15.layer.1.EncDecAttention.k.weight": "model-00046-of-00058.safetensors", + "decoder.block.15.layer.1.EncDecAttention.o.weight": "model-00046-of-00058.safetensors", + "decoder.block.15.layer.1.EncDecAttention.q.weight": "model-00045-of-00058.safetensors", + "decoder.block.15.layer.1.EncDecAttention.v.weight": "model-00046-of-00058.safetensors", + "decoder.block.15.layer.1.layer_norm.weight": "model-00046-of-00058.safetensors", + "decoder.block.15.layer.2.DenseReluDense.wi_0.weight": "model-00046-of-00058.safetensors", + "decoder.block.15.layer.2.DenseReluDense.wi_1.weight": "model-00046-of-00058.safetensors", + "decoder.block.15.layer.2.DenseReluDense.wo.weight": "model-00046-of-00058.safetensors", + "decoder.block.15.layer.2.layer_norm.weight": "model-00046-of-00058.safetensors", + "decoder.block.16.layer.0.SelfAttention.k.weight": "model-00046-of-00058.safetensors", + "decoder.block.16.layer.0.SelfAttention.o.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.0.SelfAttention.q.weight": "model-00046-of-00058.safetensors", + "decoder.block.16.layer.0.SelfAttention.v.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.0.layer_norm.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.1.EncDecAttention.k.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.1.EncDecAttention.o.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.1.EncDecAttention.q.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.1.EncDecAttention.v.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.1.layer_norm.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.2.DenseReluDense.wi_0.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.2.DenseReluDense.wi_1.weight": "model-00047-of-00058.safetensors", + "decoder.block.16.layer.2.DenseReluDense.wo.weight": "model-00048-of-00058.safetensors", + "decoder.block.16.layer.2.layer_norm.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.0.SelfAttention.k.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.0.SelfAttention.o.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.0.SelfAttention.q.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.0.SelfAttention.v.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.0.layer_norm.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.1.EncDecAttention.k.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.1.EncDecAttention.o.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.1.EncDecAttention.q.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.1.EncDecAttention.v.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.1.layer_norm.weight": "model-00048-of-00058.safetensors", + "decoder.block.17.layer.2.DenseReluDense.wi_0.weight": "model-00049-of-00058.safetensors", + "decoder.block.17.layer.2.DenseReluDense.wi_1.weight": "model-00049-of-00058.safetensors", + "decoder.block.17.layer.2.DenseReluDense.wo.weight": "model-00049-of-00058.safetensors", + "decoder.block.17.layer.2.layer_norm.weight": "model-00049-of-00058.safetensors", + "decoder.block.18.layer.0.SelfAttention.k.weight": "model-00049-of-00058.safetensors", + "decoder.block.18.layer.0.SelfAttention.o.weight": "model-00049-of-00058.safetensors", + "decoder.block.18.layer.0.SelfAttention.q.weight": "model-00049-of-00058.safetensors", + "decoder.block.18.layer.0.SelfAttention.v.weight": "model-00049-of-00058.safetensors", + "decoder.block.18.layer.0.layer_norm.weight": "model-00049-of-00058.safetensors", + "decoder.block.18.layer.1.EncDecAttention.k.weight": "model-00050-of-00058.safetensors", + "decoder.block.18.layer.1.EncDecAttention.o.weight": "model-00050-of-00058.safetensors", + "decoder.block.18.layer.1.EncDecAttention.q.weight": "model-00049-of-00058.safetensors", + "decoder.block.18.layer.1.EncDecAttention.v.weight": "model-00050-of-00058.safetensors", + "decoder.block.18.layer.1.layer_norm.weight": "model-00050-of-00058.safetensors", + "decoder.block.18.layer.2.DenseReluDense.wi_0.weight": "model-00050-of-00058.safetensors", + "decoder.block.18.layer.2.DenseReluDense.wi_1.weight": "model-00050-of-00058.safetensors", + "decoder.block.18.layer.2.DenseReluDense.wo.weight": "model-00050-of-00058.safetensors", + "decoder.block.18.layer.2.layer_norm.weight": "model-00050-of-00058.safetensors", + "decoder.block.19.layer.0.SelfAttention.k.weight": "model-00050-of-00058.safetensors", + "decoder.block.19.layer.0.SelfAttention.o.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.0.SelfAttention.q.weight": "model-00050-of-00058.safetensors", + "decoder.block.19.layer.0.SelfAttention.v.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.0.layer_norm.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.1.EncDecAttention.k.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.1.EncDecAttention.o.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.1.EncDecAttention.q.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.1.EncDecAttention.v.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.1.layer_norm.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.2.DenseReluDense.wi_0.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.2.DenseReluDense.wi_1.weight": "model-00051-of-00058.safetensors", + "decoder.block.19.layer.2.DenseReluDense.wo.weight": "model-00052-of-00058.safetensors", + "decoder.block.19.layer.2.layer_norm.weight": "model-00052-of-00058.safetensors", + "decoder.block.2.layer.0.SelfAttention.k.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.0.SelfAttention.o.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.0.SelfAttention.q.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.0.SelfAttention.v.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.0.layer_norm.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.1.EncDecAttention.k.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.1.EncDecAttention.o.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.1.EncDecAttention.q.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.1.EncDecAttention.v.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.1.layer_norm.weight": "model-00028-of-00058.safetensors", + "decoder.block.2.layer.2.DenseReluDense.wi_0.weight": "model-00029-of-00058.safetensors", + "decoder.block.2.layer.2.DenseReluDense.wi_1.weight": "model-00029-of-00058.safetensors", + "decoder.block.2.layer.2.DenseReluDense.wo.weight": "model-00029-of-00058.safetensors", + "decoder.block.2.layer.2.layer_norm.weight": "model-00029-of-00058.safetensors", + "decoder.block.20.layer.0.SelfAttention.k.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.0.SelfAttention.o.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.0.SelfAttention.q.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.0.SelfAttention.v.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.0.layer_norm.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.1.EncDecAttention.k.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.1.EncDecAttention.o.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.1.EncDecAttention.q.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.1.EncDecAttention.v.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.1.layer_norm.weight": "model-00052-of-00058.safetensors", + "decoder.block.20.layer.2.DenseReluDense.wi_0.weight": "model-00053-of-00058.safetensors", + "decoder.block.20.layer.2.DenseReluDense.wi_1.weight": "model-00053-of-00058.safetensors", + "decoder.block.20.layer.2.DenseReluDense.wo.weight": "model-00053-of-00058.safetensors", + "decoder.block.20.layer.2.layer_norm.weight": "model-00053-of-00058.safetensors", + "decoder.block.21.layer.0.SelfAttention.k.weight": "model-00053-of-00058.safetensors", + "decoder.block.21.layer.0.SelfAttention.o.weight": "model-00053-of-00058.safetensors", + "decoder.block.21.layer.0.SelfAttention.q.weight": "model-00053-of-00058.safetensors", + "decoder.block.21.layer.0.SelfAttention.v.weight": "model-00053-of-00058.safetensors", + "decoder.block.21.layer.0.layer_norm.weight": "model-00053-of-00058.safetensors", + "decoder.block.21.layer.1.EncDecAttention.k.weight": "model-00054-of-00058.safetensors", + "decoder.block.21.layer.1.EncDecAttention.o.weight": "model-00054-of-00058.safetensors", + "decoder.block.21.layer.1.EncDecAttention.q.weight": "model-00053-of-00058.safetensors", + "decoder.block.21.layer.1.EncDecAttention.v.weight": "model-00054-of-00058.safetensors", + "decoder.block.21.layer.1.layer_norm.weight": "model-00054-of-00058.safetensors", + "decoder.block.21.layer.2.DenseReluDense.wi_0.weight": "model-00054-of-00058.safetensors", + "decoder.block.21.layer.2.DenseReluDense.wi_1.weight": "model-00054-of-00058.safetensors", + "decoder.block.21.layer.2.DenseReluDense.wo.weight": "model-00054-of-00058.safetensors", + "decoder.block.21.layer.2.layer_norm.weight": "model-00054-of-00058.safetensors", + "decoder.block.22.layer.0.SelfAttention.k.weight": "model-00054-of-00058.safetensors", + "decoder.block.22.layer.0.SelfAttention.o.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.0.SelfAttention.q.weight": "model-00054-of-00058.safetensors", + "decoder.block.22.layer.0.SelfAttention.v.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.0.layer_norm.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.1.EncDecAttention.k.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.1.EncDecAttention.o.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.1.EncDecAttention.q.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.1.EncDecAttention.v.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.1.layer_norm.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.2.DenseReluDense.wi_0.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.2.DenseReluDense.wi_1.weight": "model-00055-of-00058.safetensors", + "decoder.block.22.layer.2.DenseReluDense.wo.weight": "model-00056-of-00058.safetensors", + "decoder.block.22.layer.2.layer_norm.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.0.SelfAttention.k.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.0.SelfAttention.o.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.0.SelfAttention.q.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.0.SelfAttention.v.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.0.layer_norm.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.1.EncDecAttention.k.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.1.EncDecAttention.o.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.1.EncDecAttention.q.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.1.EncDecAttention.v.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.1.layer_norm.weight": "model-00056-of-00058.safetensors", + "decoder.block.23.layer.2.DenseReluDense.wi_0.weight": "model-00057-of-00058.safetensors", + "decoder.block.23.layer.2.DenseReluDense.wi_1.weight": "model-00057-of-00058.safetensors", + "decoder.block.23.layer.2.DenseReluDense.wo.weight": "model-00057-of-00058.safetensors", + "decoder.block.23.layer.2.layer_norm.weight": "model-00057-of-00058.safetensors", + "decoder.block.3.layer.0.SelfAttention.k.weight": "model-00029-of-00058.safetensors", + "decoder.block.3.layer.0.SelfAttention.o.weight": "model-00029-of-00058.safetensors", + "decoder.block.3.layer.0.SelfAttention.q.weight": "model-00029-of-00058.safetensors", + "decoder.block.3.layer.0.SelfAttention.v.weight": "model-00029-of-00058.safetensors", + "decoder.block.3.layer.0.layer_norm.weight": "model-00029-of-00058.safetensors", + "decoder.block.3.layer.1.EncDecAttention.k.weight": "model-00030-of-00058.safetensors", + "decoder.block.3.layer.1.EncDecAttention.o.weight": "model-00030-of-00058.safetensors", + "decoder.block.3.layer.1.EncDecAttention.q.weight": "model-00029-of-00058.safetensors", + "decoder.block.3.layer.1.EncDecAttention.v.weight": "model-00030-of-00058.safetensors", + "decoder.block.3.layer.1.layer_norm.weight": "model-00030-of-00058.safetensors", + "decoder.block.3.layer.2.DenseReluDense.wi_0.weight": "model-00030-of-00058.safetensors", + "decoder.block.3.layer.2.DenseReluDense.wi_1.weight": "model-00030-of-00058.safetensors", + "decoder.block.3.layer.2.DenseReluDense.wo.weight": "model-00030-of-00058.safetensors", + "decoder.block.3.layer.2.layer_norm.weight": "model-00030-of-00058.safetensors", + "decoder.block.4.layer.0.SelfAttention.k.weight": "model-00030-of-00058.safetensors", + "decoder.block.4.layer.0.SelfAttention.o.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.0.SelfAttention.q.weight": "model-00030-of-00058.safetensors", + "decoder.block.4.layer.0.SelfAttention.v.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.0.layer_norm.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.1.EncDecAttention.k.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.1.EncDecAttention.o.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.1.EncDecAttention.q.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.1.EncDecAttention.v.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.1.layer_norm.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.2.DenseReluDense.wi_0.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.2.DenseReluDense.wi_1.weight": "model-00031-of-00058.safetensors", + "decoder.block.4.layer.2.DenseReluDense.wo.weight": "model-00032-of-00058.safetensors", + "decoder.block.4.layer.2.layer_norm.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.0.SelfAttention.k.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.0.SelfAttention.o.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.0.SelfAttention.q.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.0.SelfAttention.v.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.0.layer_norm.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.1.EncDecAttention.k.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.1.EncDecAttention.o.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.1.EncDecAttention.q.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.1.EncDecAttention.v.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.1.layer_norm.weight": "model-00032-of-00058.safetensors", + "decoder.block.5.layer.2.DenseReluDense.wi_0.weight": "model-00033-of-00058.safetensors", + "decoder.block.5.layer.2.DenseReluDense.wi_1.weight": "model-00033-of-00058.safetensors", + "decoder.block.5.layer.2.DenseReluDense.wo.weight": "model-00033-of-00058.safetensors", + "decoder.block.5.layer.2.layer_norm.weight": "model-00033-of-00058.safetensors", + "decoder.block.6.layer.0.SelfAttention.k.weight": "model-00033-of-00058.safetensors", + "decoder.block.6.layer.0.SelfAttention.o.weight": "model-00033-of-00058.safetensors", + "decoder.block.6.layer.0.SelfAttention.q.weight": "model-00033-of-00058.safetensors", + "decoder.block.6.layer.0.SelfAttention.v.weight": "model-00033-of-00058.safetensors", + "decoder.block.6.layer.0.layer_norm.weight": "model-00033-of-00058.safetensors", + "decoder.block.6.layer.1.EncDecAttention.k.weight": "model-00034-of-00058.safetensors", + "decoder.block.6.layer.1.EncDecAttention.o.weight": "model-00034-of-00058.safetensors", + "decoder.block.6.layer.1.EncDecAttention.q.weight": "model-00033-of-00058.safetensors", + "decoder.block.6.layer.1.EncDecAttention.v.weight": "model-00034-of-00058.safetensors", + "decoder.block.6.layer.1.layer_norm.weight": "model-00034-of-00058.safetensors", + "decoder.block.6.layer.2.DenseReluDense.wi_0.weight": "model-00034-of-00058.safetensors", + "decoder.block.6.layer.2.DenseReluDense.wi_1.weight": "model-00034-of-00058.safetensors", + "decoder.block.6.layer.2.DenseReluDense.wo.weight": "model-00034-of-00058.safetensors", + "decoder.block.6.layer.2.layer_norm.weight": "model-00034-of-00058.safetensors", + "decoder.block.7.layer.0.SelfAttention.k.weight": "model-00034-of-00058.safetensors", + "decoder.block.7.layer.0.SelfAttention.o.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.0.SelfAttention.q.weight": "model-00034-of-00058.safetensors", + "decoder.block.7.layer.0.SelfAttention.v.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.0.layer_norm.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.1.EncDecAttention.k.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.1.EncDecAttention.o.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.1.EncDecAttention.q.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.1.EncDecAttention.v.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.1.layer_norm.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.2.DenseReluDense.wi_0.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.2.DenseReluDense.wi_1.weight": "model-00035-of-00058.safetensors", + "decoder.block.7.layer.2.DenseReluDense.wo.weight": "model-00036-of-00058.safetensors", + "decoder.block.7.layer.2.layer_norm.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.0.SelfAttention.k.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.0.SelfAttention.o.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.0.SelfAttention.q.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.0.SelfAttention.v.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.0.layer_norm.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.1.EncDecAttention.k.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.1.EncDecAttention.o.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.1.EncDecAttention.q.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.1.EncDecAttention.v.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.1.layer_norm.weight": "model-00036-of-00058.safetensors", + "decoder.block.8.layer.2.DenseReluDense.wi_0.weight": "model-00037-of-00058.safetensors", + "decoder.block.8.layer.2.DenseReluDense.wi_1.weight": "model-00037-of-00058.safetensors", + "decoder.block.8.layer.2.DenseReluDense.wo.weight": "model-00037-of-00058.safetensors", + "decoder.block.8.layer.2.layer_norm.weight": "model-00037-of-00058.safetensors", + "decoder.block.9.layer.0.SelfAttention.k.weight": "model-00037-of-00058.safetensors", + "decoder.block.9.layer.0.SelfAttention.o.weight": "model-00037-of-00058.safetensors", + "decoder.block.9.layer.0.SelfAttention.q.weight": "model-00037-of-00058.safetensors", + "decoder.block.9.layer.0.SelfAttention.v.weight": "model-00037-of-00058.safetensors", + "decoder.block.9.layer.0.layer_norm.weight": "model-00037-of-00058.safetensors", + "decoder.block.9.layer.1.EncDecAttention.k.weight": "model-00038-of-00058.safetensors", + "decoder.block.9.layer.1.EncDecAttention.o.weight": "model-00038-of-00058.safetensors", + "decoder.block.9.layer.1.EncDecAttention.q.weight": "model-00037-of-00058.safetensors", + "decoder.block.9.layer.1.EncDecAttention.v.weight": "model-00038-of-00058.safetensors", + "decoder.block.9.layer.1.layer_norm.weight": "model-00038-of-00058.safetensors", + "decoder.block.9.layer.2.DenseReluDense.wi_0.weight": "model-00038-of-00058.safetensors", + "decoder.block.9.layer.2.DenseReluDense.wi_1.weight": "model-00038-of-00058.safetensors", + "decoder.block.9.layer.2.DenseReluDense.wo.weight": "model-00038-of-00058.safetensors", + "decoder.block.9.layer.2.layer_norm.weight": "model-00038-of-00058.safetensors", + "decoder.final_layer_norm.weight": "model-00057-of-00058.safetensors", + "encoder.block.0.layer.0.SelfAttention.k.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.0.SelfAttention.o.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.0.SelfAttention.q.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.0.SelfAttention.v.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.0.layer_norm.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.1.DenseReluDense.wo.weight": "model-00002-of-00058.safetensors", + "encoder.block.0.layer.1.layer_norm.weight": "model-00002-of-00058.safetensors", + "encoder.block.1.layer.0.SelfAttention.k.weight": "model-00003-of-00058.safetensors", + "encoder.block.1.layer.0.SelfAttention.o.weight": "model-00003-of-00058.safetensors", + "encoder.block.1.layer.0.SelfAttention.q.weight": "model-00002-of-00058.safetensors", + "encoder.block.1.layer.0.SelfAttention.v.weight": "model-00003-of-00058.safetensors", + "encoder.block.1.layer.0.layer_norm.weight": "model-00003-of-00058.safetensors", + "encoder.block.1.layer.1.DenseReluDense.wi_0.weight": "model-00003-of-00058.safetensors", + "encoder.block.1.layer.1.DenseReluDense.wi_1.weight": "model-00003-of-00058.safetensors", + "encoder.block.1.layer.1.DenseReluDense.wo.weight": "model-00003-of-00058.safetensors", + "encoder.block.1.layer.1.layer_norm.weight": "model-00003-of-00058.safetensors", + "encoder.block.10.layer.0.SelfAttention.k.weight": "model-00011-of-00058.safetensors", + "encoder.block.10.layer.0.SelfAttention.o.weight": "model-00011-of-00058.safetensors", + "encoder.block.10.layer.0.SelfAttention.q.weight": "model-00011-of-00058.safetensors", + "encoder.block.10.layer.0.SelfAttention.v.weight": "model-00011-of-00058.safetensors", + "encoder.block.10.layer.0.layer_norm.weight": "model-00011-of-00058.safetensors", + "encoder.block.10.layer.1.DenseReluDense.wi_0.weight": "model-00012-of-00058.safetensors", + "encoder.block.10.layer.1.DenseReluDense.wi_1.weight": "model-00012-of-00058.safetensors", + "encoder.block.10.layer.1.DenseReluDense.wo.weight": "model-00012-of-00058.safetensors", + "encoder.block.10.layer.1.layer_norm.weight": "model-00012-of-00058.safetensors", + "encoder.block.11.layer.0.SelfAttention.k.weight": "model-00012-of-00058.safetensors", + "encoder.block.11.layer.0.SelfAttention.o.weight": "model-00012-of-00058.safetensors", + "encoder.block.11.layer.0.SelfAttention.q.weight": "model-00012-of-00058.safetensors", + "encoder.block.11.layer.0.SelfAttention.v.weight": "model-00012-of-00058.safetensors", + "encoder.block.11.layer.0.layer_norm.weight": "model-00012-of-00058.safetensors", + "encoder.block.11.layer.1.DenseReluDense.wi_0.weight": "model-00013-of-00058.safetensors", + "encoder.block.11.layer.1.DenseReluDense.wi_1.weight": "model-00013-of-00058.safetensors", + "encoder.block.11.layer.1.DenseReluDense.wo.weight": "model-00013-of-00058.safetensors", + "encoder.block.11.layer.1.layer_norm.weight": "model-00013-of-00058.safetensors", + "encoder.block.12.layer.0.SelfAttention.k.weight": "model-00013-of-00058.safetensors", + "encoder.block.12.layer.0.SelfAttention.o.weight": "model-00013-of-00058.safetensors", + "encoder.block.12.layer.0.SelfAttention.q.weight": "model-00013-of-00058.safetensors", + "encoder.block.12.layer.0.SelfAttention.v.weight": "model-00013-of-00058.safetensors", + "encoder.block.12.layer.0.layer_norm.weight": "model-00013-of-00058.safetensors", + "encoder.block.12.layer.1.DenseReluDense.wi_0.weight": "model-00014-of-00058.safetensors", + "encoder.block.12.layer.1.DenseReluDense.wi_1.weight": "model-00014-of-00058.safetensors", + "encoder.block.12.layer.1.DenseReluDense.wo.weight": "model-00014-of-00058.safetensors", + "encoder.block.12.layer.1.layer_norm.weight": "model-00014-of-00058.safetensors", + "encoder.block.13.layer.0.SelfAttention.k.weight": "model-00014-of-00058.safetensors", + "encoder.block.13.layer.0.SelfAttention.o.weight": "model-00014-of-00058.safetensors", + "encoder.block.13.layer.0.SelfAttention.q.weight": "model-00014-of-00058.safetensors", + "encoder.block.13.layer.0.SelfAttention.v.weight": "model-00014-of-00058.safetensors", + "encoder.block.13.layer.0.layer_norm.weight": "model-00014-of-00058.safetensors", + "encoder.block.13.layer.1.DenseReluDense.wi_0.weight": "model-00015-of-00058.safetensors", + "encoder.block.13.layer.1.DenseReluDense.wi_1.weight": "model-00015-of-00058.safetensors", + "encoder.block.13.layer.1.DenseReluDense.wo.weight": "model-00015-of-00058.safetensors", + "encoder.block.13.layer.1.layer_norm.weight": "model-00015-of-00058.safetensors", + "encoder.block.14.layer.0.SelfAttention.k.weight": "model-00015-of-00058.safetensors", + "encoder.block.14.layer.0.SelfAttention.o.weight": "model-00015-of-00058.safetensors", + "encoder.block.14.layer.0.SelfAttention.q.weight": "model-00015-of-00058.safetensors", + "encoder.block.14.layer.0.SelfAttention.v.weight": "model-00015-of-00058.safetensors", + "encoder.block.14.layer.0.layer_norm.weight": "model-00015-of-00058.safetensors", + "encoder.block.14.layer.1.DenseReluDense.wi_0.weight": "model-00016-of-00058.safetensors", + "encoder.block.14.layer.1.DenseReluDense.wi_1.weight": "model-00016-of-00058.safetensors", + "encoder.block.14.layer.1.DenseReluDense.wo.weight": "model-00016-of-00058.safetensors", + "encoder.block.14.layer.1.layer_norm.weight": "model-00016-of-00058.safetensors", + "encoder.block.15.layer.0.SelfAttention.k.weight": "model-00016-of-00058.safetensors", + "encoder.block.15.layer.0.SelfAttention.o.weight": "model-00016-of-00058.safetensors", + "encoder.block.15.layer.0.SelfAttention.q.weight": "model-00016-of-00058.safetensors", + "encoder.block.15.layer.0.SelfAttention.v.weight": "model-00016-of-00058.safetensors", + "encoder.block.15.layer.0.layer_norm.weight": "model-00016-of-00058.safetensors", + "encoder.block.15.layer.1.DenseReluDense.wi_0.weight": "model-00017-of-00058.safetensors", + "encoder.block.15.layer.1.DenseReluDense.wi_1.weight": "model-00017-of-00058.safetensors", + "encoder.block.15.layer.1.DenseReluDense.wo.weight": "model-00017-of-00058.safetensors", + "encoder.block.15.layer.1.layer_norm.weight": "model-00017-of-00058.safetensors", + "encoder.block.16.layer.0.SelfAttention.k.weight": "model-00017-of-00058.safetensors", + "encoder.block.16.layer.0.SelfAttention.o.weight": "model-00017-of-00058.safetensors", + "encoder.block.16.layer.0.SelfAttention.q.weight": "model-00017-of-00058.safetensors", + "encoder.block.16.layer.0.SelfAttention.v.weight": "model-00017-of-00058.safetensors", + "encoder.block.16.layer.0.layer_norm.weight": "model-00017-of-00058.safetensors", + "encoder.block.16.layer.1.DenseReluDense.wi_0.weight": "model-00018-of-00058.safetensors", + "encoder.block.16.layer.1.DenseReluDense.wi_1.weight": "model-00018-of-00058.safetensors", + "encoder.block.16.layer.1.DenseReluDense.wo.weight": "model-00018-of-00058.safetensors", + "encoder.block.16.layer.1.layer_norm.weight": "model-00018-of-00058.safetensors", + "encoder.block.17.layer.0.SelfAttention.k.weight": "model-00018-of-00058.safetensors", + "encoder.block.17.layer.0.SelfAttention.o.weight": "model-00018-of-00058.safetensors", + "encoder.block.17.layer.0.SelfAttention.q.weight": "model-00018-of-00058.safetensors", + "encoder.block.17.layer.0.SelfAttention.v.weight": "model-00018-of-00058.safetensors", + "encoder.block.17.layer.0.layer_norm.weight": "model-00018-of-00058.safetensors", + "encoder.block.17.layer.1.DenseReluDense.wi_0.weight": "model-00019-of-00058.safetensors", + "encoder.block.17.layer.1.DenseReluDense.wi_1.weight": "model-00019-of-00058.safetensors", + "encoder.block.17.layer.1.DenseReluDense.wo.weight": "model-00019-of-00058.safetensors", + "encoder.block.17.layer.1.layer_norm.weight": "model-00019-of-00058.safetensors", + "encoder.block.18.layer.0.SelfAttention.k.weight": "model-00019-of-00058.safetensors", + "encoder.block.18.layer.0.SelfAttention.o.weight": "model-00019-of-00058.safetensors", + "encoder.block.18.layer.0.SelfAttention.q.weight": "model-00019-of-00058.safetensors", + "encoder.block.18.layer.0.SelfAttention.v.weight": "model-00019-of-00058.safetensors", + "encoder.block.18.layer.0.layer_norm.weight": "model-00019-of-00058.safetensors", + "encoder.block.18.layer.1.DenseReluDense.wi_0.weight": "model-00020-of-00058.safetensors", + "encoder.block.18.layer.1.DenseReluDense.wi_1.weight": "model-00020-of-00058.safetensors", + "encoder.block.18.layer.1.DenseReluDense.wo.weight": "model-00020-of-00058.safetensors", + "encoder.block.18.layer.1.layer_norm.weight": "model-00020-of-00058.safetensors", + "encoder.block.19.layer.0.SelfAttention.k.weight": "model-00020-of-00058.safetensors", + "encoder.block.19.layer.0.SelfAttention.o.weight": "model-00020-of-00058.safetensors", + "encoder.block.19.layer.0.SelfAttention.q.weight": "model-00020-of-00058.safetensors", + "encoder.block.19.layer.0.SelfAttention.v.weight": "model-00020-of-00058.safetensors", + "encoder.block.19.layer.0.layer_norm.weight": "model-00020-of-00058.safetensors", + "encoder.block.19.layer.1.DenseReluDense.wi_0.weight": "model-00021-of-00058.safetensors", + "encoder.block.19.layer.1.DenseReluDense.wi_1.weight": "model-00021-of-00058.safetensors", + "encoder.block.19.layer.1.DenseReluDense.wo.weight": "model-00021-of-00058.safetensors", + "encoder.block.19.layer.1.layer_norm.weight": "model-00021-of-00058.safetensors", + "encoder.block.2.layer.0.SelfAttention.k.weight": "model-00003-of-00058.safetensors", + "encoder.block.2.layer.0.SelfAttention.o.weight": "model-00004-of-00058.safetensors", + "encoder.block.2.layer.0.SelfAttention.q.weight": "model-00003-of-00058.safetensors", + "encoder.block.2.layer.0.SelfAttention.v.weight": "model-00004-of-00058.safetensors", + "encoder.block.2.layer.0.layer_norm.weight": "model-00004-of-00058.safetensors", + "encoder.block.2.layer.1.DenseReluDense.wi_0.weight": "model-00004-of-00058.safetensors", + "encoder.block.2.layer.1.DenseReluDense.wi_1.weight": "model-00004-of-00058.safetensors", + "encoder.block.2.layer.1.DenseReluDense.wo.weight": "model-00004-of-00058.safetensors", + "encoder.block.2.layer.1.layer_norm.weight": "model-00004-of-00058.safetensors", + "encoder.block.20.layer.0.SelfAttention.k.weight": "model-00021-of-00058.safetensors", + "encoder.block.20.layer.0.SelfAttention.o.weight": "model-00021-of-00058.safetensors", + "encoder.block.20.layer.0.SelfAttention.q.weight": "model-00021-of-00058.safetensors", + "encoder.block.20.layer.0.SelfAttention.v.weight": "model-00021-of-00058.safetensors", + "encoder.block.20.layer.0.layer_norm.weight": "model-00021-of-00058.safetensors", + "encoder.block.20.layer.1.DenseReluDense.wi_0.weight": "model-00022-of-00058.safetensors", + "encoder.block.20.layer.1.DenseReluDense.wi_1.weight": "model-00022-of-00058.safetensors", + "encoder.block.20.layer.1.DenseReluDense.wo.weight": "model-00022-of-00058.safetensors", + "encoder.block.20.layer.1.layer_norm.weight": "model-00022-of-00058.safetensors", + "encoder.block.21.layer.0.SelfAttention.k.weight": "model-00022-of-00058.safetensors", + "encoder.block.21.layer.0.SelfAttention.o.weight": "model-00022-of-00058.safetensors", + "encoder.block.21.layer.0.SelfAttention.q.weight": "model-00022-of-00058.safetensors", + "encoder.block.21.layer.0.SelfAttention.v.weight": "model-00022-of-00058.safetensors", + "encoder.block.21.layer.0.layer_norm.weight": "model-00022-of-00058.safetensors", + "encoder.block.21.layer.1.DenseReluDense.wi_0.weight": "model-00023-of-00058.safetensors", + "encoder.block.21.layer.1.DenseReluDense.wi_1.weight": "model-00023-of-00058.safetensors", + "encoder.block.21.layer.1.DenseReluDense.wo.weight": "model-00023-of-00058.safetensors", + "encoder.block.21.layer.1.layer_norm.weight": "model-00023-of-00058.safetensors", + "encoder.block.22.layer.0.SelfAttention.k.weight": "model-00023-of-00058.safetensors", + "encoder.block.22.layer.0.SelfAttention.o.weight": "model-00023-of-00058.safetensors", + "encoder.block.22.layer.0.SelfAttention.q.weight": "model-00023-of-00058.safetensors", + "encoder.block.22.layer.0.SelfAttention.v.weight": "model-00023-of-00058.safetensors", + "encoder.block.22.layer.0.layer_norm.weight": "model-00023-of-00058.safetensors", + "encoder.block.22.layer.1.DenseReluDense.wi_0.weight": "model-00024-of-00058.safetensors", + "encoder.block.22.layer.1.DenseReluDense.wi_1.weight": "model-00024-of-00058.safetensors", + "encoder.block.22.layer.1.DenseReluDense.wo.weight": "model-00024-of-00058.safetensors", + "encoder.block.22.layer.1.layer_norm.weight": "model-00024-of-00058.safetensors", + "encoder.block.23.layer.0.SelfAttention.k.weight": "model-00024-of-00058.safetensors", + "encoder.block.23.layer.0.SelfAttention.o.weight": "model-00024-of-00058.safetensors", + "encoder.block.23.layer.0.SelfAttention.q.weight": "model-00024-of-00058.safetensors", + "encoder.block.23.layer.0.SelfAttention.v.weight": "model-00024-of-00058.safetensors", + "encoder.block.23.layer.0.layer_norm.weight": "model-00024-of-00058.safetensors", + "encoder.block.23.layer.1.DenseReluDense.wi_0.weight": "model-00025-of-00058.safetensors", + "encoder.block.23.layer.1.DenseReluDense.wi_1.weight": "model-00025-of-00058.safetensors", + "encoder.block.23.layer.1.DenseReluDense.wo.weight": "model-00025-of-00058.safetensors", + "encoder.block.23.layer.1.layer_norm.weight": "model-00025-of-00058.safetensors", + "encoder.block.3.layer.0.SelfAttention.k.weight": "model-00004-of-00058.safetensors", + "encoder.block.3.layer.0.SelfAttention.o.weight": "model-00005-of-00058.safetensors", + "encoder.block.3.layer.0.SelfAttention.q.weight": "model-00004-of-00058.safetensors", + "encoder.block.3.layer.0.SelfAttention.v.weight": "model-00004-of-00058.safetensors", + "encoder.block.3.layer.0.layer_norm.weight": "model-00005-of-00058.safetensors", + "encoder.block.3.layer.1.DenseReluDense.wi_0.weight": "model-00005-of-00058.safetensors", + "encoder.block.3.layer.1.DenseReluDense.wi_1.weight": "model-00005-of-00058.safetensors", + "encoder.block.3.layer.1.DenseReluDense.wo.weight": "model-00005-of-00058.safetensors", + "encoder.block.3.layer.1.layer_norm.weight": "model-00005-of-00058.safetensors", + "encoder.block.4.layer.0.SelfAttention.k.weight": "model-00005-of-00058.safetensors", + "encoder.block.4.layer.0.SelfAttention.o.weight": "model-00005-of-00058.safetensors", + "encoder.block.4.layer.0.SelfAttention.q.weight": "model-00005-of-00058.safetensors", + "encoder.block.4.layer.0.SelfAttention.v.weight": "model-00005-of-00058.safetensors", + "encoder.block.4.layer.0.layer_norm.weight": "model-00005-of-00058.safetensors", + "encoder.block.4.layer.1.DenseReluDense.wi_0.weight": "model-00006-of-00058.safetensors", + "encoder.block.4.layer.1.DenseReluDense.wi_1.weight": "model-00006-of-00058.safetensors", + "encoder.block.4.layer.1.DenseReluDense.wo.weight": "model-00006-of-00058.safetensors", + "encoder.block.4.layer.1.layer_norm.weight": "model-00006-of-00058.safetensors", + "encoder.block.5.layer.0.SelfAttention.k.weight": "model-00006-of-00058.safetensors", + "encoder.block.5.layer.0.SelfAttention.o.weight": "model-00006-of-00058.safetensors", + "encoder.block.5.layer.0.SelfAttention.q.weight": "model-00006-of-00058.safetensors", + "encoder.block.5.layer.0.SelfAttention.v.weight": "model-00006-of-00058.safetensors", + "encoder.block.5.layer.0.layer_norm.weight": "model-00006-of-00058.safetensors", + "encoder.block.5.layer.1.DenseReluDense.wi_0.weight": "model-00007-of-00058.safetensors", + "encoder.block.5.layer.1.DenseReluDense.wi_1.weight": "model-00007-of-00058.safetensors", + "encoder.block.5.layer.1.DenseReluDense.wo.weight": "model-00007-of-00058.safetensors", + "encoder.block.5.layer.1.layer_norm.weight": "model-00007-of-00058.safetensors", + "encoder.block.6.layer.0.SelfAttention.k.weight": "model-00007-of-00058.safetensors", + "encoder.block.6.layer.0.SelfAttention.o.weight": "model-00007-of-00058.safetensors", + "encoder.block.6.layer.0.SelfAttention.q.weight": "model-00007-of-00058.safetensors", + "encoder.block.6.layer.0.SelfAttention.v.weight": "model-00007-of-00058.safetensors", + "encoder.block.6.layer.0.layer_norm.weight": "model-00007-of-00058.safetensors", + "encoder.block.6.layer.1.DenseReluDense.wi_0.weight": "model-00008-of-00058.safetensors", + "encoder.block.6.layer.1.DenseReluDense.wi_1.weight": "model-00008-of-00058.safetensors", + "encoder.block.6.layer.1.DenseReluDense.wo.weight": "model-00008-of-00058.safetensors", + "encoder.block.6.layer.1.layer_norm.weight": "model-00008-of-00058.safetensors", + "encoder.block.7.layer.0.SelfAttention.k.weight": "model-00008-of-00058.safetensors", + "encoder.block.7.layer.0.SelfAttention.o.weight": "model-00008-of-00058.safetensors", + "encoder.block.7.layer.0.SelfAttention.q.weight": "model-00008-of-00058.safetensors", + "encoder.block.7.layer.0.SelfAttention.v.weight": "model-00008-of-00058.safetensors", + "encoder.block.7.layer.0.layer_norm.weight": "model-00008-of-00058.safetensors", + "encoder.block.7.layer.1.DenseReluDense.wi_0.weight": "model-00009-of-00058.safetensors", + "encoder.block.7.layer.1.DenseReluDense.wi_1.weight": "model-00009-of-00058.safetensors", + "encoder.block.7.layer.1.DenseReluDense.wo.weight": "model-00009-of-00058.safetensors", + "encoder.block.7.layer.1.layer_norm.weight": "model-00009-of-00058.safetensors", + "encoder.block.8.layer.0.SelfAttention.k.weight": "model-00009-of-00058.safetensors", + "encoder.block.8.layer.0.SelfAttention.o.weight": "model-00009-of-00058.safetensors", + "encoder.block.8.layer.0.SelfAttention.q.weight": "model-00009-of-00058.safetensors", + "encoder.block.8.layer.0.SelfAttention.v.weight": "model-00009-of-00058.safetensors", + "encoder.block.8.layer.0.layer_norm.weight": "model-00009-of-00058.safetensors", + "encoder.block.8.layer.1.DenseReluDense.wi_0.weight": "model-00010-of-00058.safetensors", + "encoder.block.8.layer.1.DenseReluDense.wi_1.weight": "model-00010-of-00058.safetensors", + "encoder.block.8.layer.1.DenseReluDense.wo.weight": "model-00010-of-00058.safetensors", + "encoder.block.8.layer.1.layer_norm.weight": "model-00010-of-00058.safetensors", + "encoder.block.9.layer.0.SelfAttention.k.weight": "model-00010-of-00058.safetensors", + "encoder.block.9.layer.0.SelfAttention.o.weight": "model-00010-of-00058.safetensors", + "encoder.block.9.layer.0.SelfAttention.q.weight": "model-00010-of-00058.safetensors", + "encoder.block.9.layer.0.SelfAttention.v.weight": "model-00010-of-00058.safetensors", + "encoder.block.9.layer.0.layer_norm.weight": "model-00010-of-00058.safetensors", + "encoder.block.9.layer.1.DenseReluDense.wi_0.weight": "model-00011-of-00058.safetensors", + "encoder.block.9.layer.1.DenseReluDense.wi_1.weight": "model-00011-of-00058.safetensors", + "encoder.block.9.layer.1.DenseReluDense.wo.weight": "model-00011-of-00058.safetensors", + "encoder.block.9.layer.1.layer_norm.weight": "model-00011-of-00058.safetensors", + "encoder.final_layer_norm.weight": "model-00025-of-00058.safetensors", + "lm_head.weight": "model-00058-of-00058.safetensors", + "shared.weight": "model-00001-of-00058.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..05592dd7745eb8b9b7e19753208af804f6393e45 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..ef3ae43c0ae4b2aedd634b4f363c71578aad9314 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e22a3bc6d8c5ed0f152cdfdb2473fcc41f5ac03b1c2df6e2e16998ce709bb9 +size 16315314 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e4c482eab17547e4adf2d991c695320cffcb8d3c --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,38 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "clean_up_tokenization_spaces": true, + "eos_token": "", + "extra_ids": 0, + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +}