reinit

Browse files

Files changed (15) hide show

.gitattributes +36 -0
README.md +130 -0
config.json +32 -0
generation_config.json +7 -0
pytorch_model-00001-of-00006.bin +3 -0
pytorch_model-00002-of-00006.bin +3 -0
pytorch_model-00003-of-00006.bin +3 -0
pytorch_model-00004-of-00006.bin +3 -0
pytorch_model-00005-of-00006.bin +3 -0
pytorch_model-00006-of-00006.bin +3 -0
pytorch_model.bin.index.json +613 -0
special_tokens_map.json +308 -0
spiece.model +3 -0
tokenizer.json +3 -0
tokenizer_config.json +314 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,130 @@

+---
+language:
+- multilingual
+- af
+- am
+- ar
+- az
+- be
+- bg
+- bn
+- ca
+- ceb
+- co
+- cs
+- cy
+- da
+- de
+- el
+- en
+- eo
+- es
+- et
+- eu
+- fa
+- fi
+- fil
+- fr
+- fy
+- ga
+- gd
+- gl
+- gu
+- ha
+- haw
+- hi
+- hmn
+- ht
+- hu
+- hy
+- ig
+- is
+- it
+- iw
+- ja
+- jv
+- ka
+- kk
+- km
+- kn
+- ko
+- ku
+- ky
+- la
+- lb
+- lo
+- lt
+- lv
+- mg
+- mi
+- mk
+- ml
+- mn
+- mr
+- ms
+- mt
+- my
+- ne
+- nl
+- no
+- ny
+- pa
+- pl
+- ps
+- pt
+- ro
+- ru
+- sd
+- si
+- sk
+- sl
+- sm
+- sn
+- so
+- sq
+- sr
+- st
+- su
+- sv
+- sw
+- ta
+- te
+- tg
+- th
+- tr
+- uk
+- und
+- ur
+- uz
+- vi
+- xh
+- yi
+- yo
+- zh
+- zu
+datasets:
+- mc4
+license: apache-2.0
+---
+[Google's UMT5](https://github.com/google-research/multilingual-t5)
+UMT5 is pretrained on the an updated version of [mC4](https://www.tensorflow.org/datasets/catalog/c4#c4multilingual) corpus, covering 107 languages:
+Afrikaans, Albanian, Amharic, Arabic, Armenian, Azerbaijani, Basque, Belarusian, Bengali, Bulgarian, Burmese, Catalan, Cebuano, Chichewa, Chinese, Corsican, Czech, Danish, Dutch, English, Esperanto, Estonian, Filipino, Finnish, French, Galician, Georgian, German, Greek, Gujarati, Haitian Creole, Hausa, Hawaiian, Hebrew, Hindi, Hmong, Hungarian, Icelandic, Igbo, Indonesian, Irish, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Kurdish, Kyrgyz, Lao, Latin, Latvian, Lithuanian, Luxembourgish, Macedonian, Malagasy, Malay, Malayalam, Maltese, Maori, Marathi, Mongolian, Nepali, Norwegian, Pashto, Persian, Polish, Portuguese, Punjabi, Romanian, Russian, Samoan, Scottish Gaelic, Serbian, Shona, Sindhi, Sinhala, Slovak, Slovenian, Somali, Sotho, Spanish, Sundanese, Swahili, Swedish, Tajik, Tamil, Telugu, Thai, Turkish, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, West Frisian, Xhosa, Yiddish, Yoruba, Zulu.
+**Note**: UMT5 was only pre-trained on mC4 excluding any supervised training. Therefore, this model has to be fine-tuned before it is useable on a downstream task.
+Pretraining Dataset: [mC4](https://www.tensorflow.org/datasets/catalog/c4#c4multilingual)
+Other Community Checkpoints: [here](https://huggingface.co/models?search=umt5)
+Paper: [UniMax, Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining](https://openreview.net/forum?id=kXwdL1cWOAi)
+Authors: *by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant*
+## Abstract
+*Pretrained multilingual large language models have typically used heuristic temperature-based sampling to balance between different languages. However previous work has not systematically evaluated the efficacy of different pretraining language distributions across model scales. In this paper, we propose a new sampling method, UniMax, that delivers more uniform coverage of head languages while mitigating overfitting on tail languages by explicitly capping the number of repeats over each language's corpus. We perform an extensive series of ablations testing a range of sampling strategies on a suite of multilingual benchmarks, while varying model scale. We find that UniMax outperforms standard temperature-based sampling, and the benefits persist as scale increases. As part of our contribution, we release: (i) an improved and refreshed mC4 multilingual corpus consisting of 29 trillion characters across 107 languages, and (ii) a suite of pretrained umT5 model checkpoints trained with UniMax sampling.*

config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_name_or_path": ".",
+  "architectures": [
+    "UMT5ForConditionalGeneration"
+  ],
+  "d_ff": 10240,
+  "d_kv": 64,
+  "d_model": 4096,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "num_decoder_layers": 24,
+  "num_heads": 64,
+  "num_layers": 24,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "scalable_attention": true,
+  "tie_word_embeddings": false,
+  "tokenizer_class": "T5Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0.dev0",
+  "use_cache": true,
+  "vocab_size": 256384
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.31.0.dev0"
+}

pytorch_model-00001-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:382094214dfe74d782769f61ad95cfe32fdd297ae51f16f9208afa180b355e61
+size 9871633465

pytorch_model-00002-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b49efce006c907ea93eb38658577d5c8d4e85b4bab398a0c6ba25141927529d4
+size 9966219296

pytorch_model-00003-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da3d39fffe6464247531c20696715860ccaabdaaad3d5a2979dc2e2fbb7789fc
+size 9999835818

pytorch_model-00004-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9da344dda8103ca0adadeca000913b1340f3df2e9e31aa1de30a8b5cd6e2499f
+size 9999829533

pytorch_model-00005-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ad06915eba0878cafe3f65540b35daeaeea0378ef55919da6f0769fa04a9179
+size 7852227059

pytorch_model-00006-of-00006.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5798b8aa388b4d3e530f18a9eefc38c831cc6a3c83289c06cabf3634c24799dd
+size 4200596394

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,613 @@

+{
+  "metadata": {
+    "total_size": 51890126848
+  },
+  "weight_map": {
+    "decoder.block.0.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.1.EncDecAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.1.EncDecAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.1.EncDecAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.1.EncDecAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.2.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.0.layer.2.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.1.EncDecAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.1.EncDecAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.1.EncDecAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.1.EncDecAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.2.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.1.layer.2.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.10.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.10.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.11.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.12.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.13.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.14.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.15.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.1.EncDecAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.16.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.16.layer.1.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.16.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.16.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.16.layer.2.DenseReluDense.wo.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.16.layer.2.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.0.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.1.EncDecAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.1.EncDecAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.1.EncDecAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.1.EncDecAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.1.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.2.DenseReluDense.wo.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.17.layer.2.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.0.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.1.EncDecAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.1.EncDecAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.1.EncDecAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.1.EncDecAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.1.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.2.DenseReluDense.wo.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.18.layer.2.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.0.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.1.EncDecAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.1.EncDecAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.1.EncDecAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.1.EncDecAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.1.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.2.DenseReluDense.wo.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.19.layer.2.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.2.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.1.EncDecAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.1.EncDecAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.1.EncDecAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.1.EncDecAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.2.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.2.layer.2.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.20.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.0.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.1.EncDecAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.1.EncDecAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.1.EncDecAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.1.EncDecAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.1.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.2.DenseReluDense.wo.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.20.layer.2.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.0.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.1.EncDecAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.1.EncDecAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.1.EncDecAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.1.EncDecAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.1.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.2.DenseReluDense.wo.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.21.layer.2.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.0.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.1.EncDecAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.1.EncDecAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.1.EncDecAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.1.EncDecAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.1.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.2.DenseReluDense.wo.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.22.layer.2.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.0.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.1.EncDecAttention.k.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.1.EncDecAttention.o.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.1.EncDecAttention.q.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.1.EncDecAttention.v.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.1.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.2.DenseReluDense.wo.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.23.layer.2.layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "decoder.block.3.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.1.EncDecAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.1.EncDecAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.1.EncDecAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.1.EncDecAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.2.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.3.layer.2.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.1.EncDecAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.1.EncDecAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.1.EncDecAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.1.EncDecAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.2.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.4.layer.2.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.1.EncDecAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.1.EncDecAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.1.EncDecAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.1.EncDecAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.2.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.5.layer.2.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.1.EncDecAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.1.EncDecAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.1.EncDecAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.1.EncDecAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "decoder.block.6.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.6.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.7.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.8.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.0.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.1.EncDecAttention.k.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.1.EncDecAttention.o.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.1.EncDecAttention.q.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.1.EncDecAttention.v.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.1.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.2.DenseReluDense.wo.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.block.9.layer.2.layer_norm.weight": "pytorch_model-00004-of-00006.bin",
+    "decoder.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
+    "decoder.final_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
+    "encoder.block.0.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.0.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.0.layer.1.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.0.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.1.layer.1.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.10.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.10.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.11.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.12.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.13.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.14.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.15.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.16.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.17.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.18.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.19.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.2.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.0.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.2.layer.1.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.20.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.20.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.20.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.20.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.20.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.20.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.20.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.20.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.20.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.20.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.21.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.22.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.0.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.23.layer.1.layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "encoder.block.3.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.0.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.3.layer.1.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.0.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.4.layer.1.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.0.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.5.layer.1.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.0.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.6.layer.1.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.7.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.7.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.7.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.7.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.7.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.7.layer.0.layer_norm.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.block.7.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.7.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.7.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.7.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.8.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.0.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.block.9.layer.1.layer_norm.weight": "pytorch_model-00002-of-00006.bin",
+    "encoder.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
+    "encoder.final_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
+    "lm_head.weight": "pytorch_model-00006-of-00006.bin",
+    "shared.weight": "pytorch_model-00001-of-00006.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,308 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_299>",
+    "<extra_id_298>",
+    "<extra_id_297>",
+    "<extra_id_296>",
+    "<extra_id_295>",
+    "<extra_id_294>",
+    "<extra_id_293>",
+    "<extra_id_292>",
+    "<extra_id_291>",
+    "<extra_id_290>",
+    "<extra_id_289>",
+    "<extra_id_288>",
+    "<extra_id_287>",
+    "<extra_id_286>",
+    "<extra_id_285>",
+    "<extra_id_284>",
+    "<extra_id_283>",
+    "<extra_id_282>",
+    "<extra_id_281>",
+    "<extra_id_280>",
+    "<extra_id_279>",
+    "<extra_id_278>",
+    "<extra_id_277>",
+    "<extra_id_276>",
+    "<extra_id_275>",
+    "<extra_id_274>",
+    "<extra_id_273>",
+    "<extra_id_272>",
+    "<extra_id_271>",
+    "<extra_id_270>",
+    "<extra_id_269>",
+    "<extra_id_268>",
+    "<extra_id_267>",
+    "<extra_id_266>",
+    "<extra_id_265>",
+    "<extra_id_264>",
+    "<extra_id_263>",
+    "<extra_id_262>",
+    "<extra_id_261>",
+    "<extra_id_260>",
+    "<extra_id_259>",
+    "<extra_id_258>",
+    "<extra_id_257>",
+    "<extra_id_256>",
+    "<extra_id_255>",
+    "<extra_id_254>",
+    "<extra_id_253>",
+    "<extra_id_252>",
+    "<extra_id_251>",
+    "<extra_id_250>",
+    "<extra_id_249>",
+    "<extra_id_248>",
+    "<extra_id_247>",
+    "<extra_id_246>",
+    "<extra_id_245>",
+    "<extra_id_244>",
+    "<extra_id_243>",
+    "<extra_id_242>",
+    "<extra_id_241>",
+    "<extra_id_240>",
+    "<extra_id_239>",
+    "<extra_id_238>",
+    "<extra_id_237>",
+    "<extra_id_236>",
+    "<extra_id_235>",
+    "<extra_id_234>",
+    "<extra_id_233>",
+    "<extra_id_232>",
+    "<extra_id_231>",
+    "<extra_id_230>",
+    "<extra_id_229>",
+    "<extra_id_228>",
+    "<extra_id_227>",
+    "<extra_id_226>",
+    "<extra_id_225>",
+    "<extra_id_224>",
+    "<extra_id_223>",
+    "<extra_id_222>",
+    "<extra_id_221>",
+    "<extra_id_220>",
+    "<extra_id_219>",
+    "<extra_id_218>",
+    "<extra_id_217>",
+    "<extra_id_216>",
+    "<extra_id_215>",
+    "<extra_id_214>",
+    "<extra_id_213>",
+    "<extra_id_212>",
+    "<extra_id_211>",
+    "<extra_id_210>",
+    "<extra_id_209>",
+    "<extra_id_208>",
+    "<extra_id_207>",
+    "<extra_id_206>",
+    "<extra_id_205>",
+    "<extra_id_204>",
+    "<extra_id_203>",
+    "<extra_id_202>",
+    "<extra_id_201>",
+    "<extra_id_200>",
+    "<extra_id_199>",
+    "<extra_id_198>",
+    "<extra_id_197>",
+    "<extra_id_196>",
+    "<extra_id_195>",
+    "<extra_id_194>",
+    "<extra_id_193>",
+    "<extra_id_192>",
+    "<extra_id_191>",
+    "<extra_id_190>",
+    "<extra_id_189>",
+    "<extra_id_188>",
+    "<extra_id_187>",
+    "<extra_id_186>",
+    "<extra_id_185>",
+    "<extra_id_184>",
+    "<extra_id_183>",
+    "<extra_id_182>",
+    "<extra_id_181>",
+    "<extra_id_180>",
+    "<extra_id_179>",
+    "<extra_id_178>",
+    "<extra_id_177>",
+    "<extra_id_176>",
+    "<extra_id_175>",
+    "<extra_id_174>",
+    "<extra_id_173>",
+    "<extra_id_172>",
+    "<extra_id_171>",
+    "<extra_id_170>",
+    "<extra_id_169>",
+    "<extra_id_168>",
+    "<extra_id_167>",
+    "<extra_id_166>",
+    "<extra_id_165>",
+    "<extra_id_164>",
+    "<extra_id_163>",
+    "<extra_id_162>",
+    "<extra_id_161>",
+    "<extra_id_160>",
+    "<extra_id_159>",
+    "<extra_id_158>",
+    "<extra_id_157>",
+    "<extra_id_156>",
+    "<extra_id_155>",
+    "<extra_id_154>",
+    "<extra_id_153>",
+    "<extra_id_152>",
+    "<extra_id_151>",
+    "<extra_id_150>",
+    "<extra_id_149>",
+    "<extra_id_148>",
+    "<extra_id_147>",
+    "<extra_id_146>",
+    "<extra_id_145>",
+    "<extra_id_144>",
+    "<extra_id_143>",
+    "<extra_id_142>",
+    "<extra_id_141>",
+    "<extra_id_140>",
+    "<extra_id_139>",
+    "<extra_id_138>",
+    "<extra_id_137>",
+    "<extra_id_136>",
+    "<extra_id_135>",
+    "<extra_id_134>",
+    "<extra_id_133>",
+    "<extra_id_132>",
+    "<extra_id_131>",
+    "<extra_id_130>",
+    "<extra_id_129>",
+    "<extra_id_128>",
+    "<extra_id_127>",
+    "<extra_id_126>",
+    "<extra_id_125>",
+    "<extra_id_124>",
+    "<extra_id_123>",
+    "<extra_id_122>",
+    "<extra_id_121>",
+    "<extra_id_120>",
+    "<extra_id_119>",
+    "<extra_id_118>",
+    "<extra_id_117>",
+    "<extra_id_116>",
+    "<extra_id_115>",
+    "<extra_id_114>",
+    "<extra_id_113>",
+    "<extra_id_112>",
+    "<extra_id_111>",
+    "<extra_id_110>",
+    "<extra_id_109>",
+    "<extra_id_108>",
+    "<extra_id_107>",
+    "<extra_id_106>",
+    "<extra_id_105>",
+    "<extra_id_104>",
+    "<extra_id_103>",
+    "<extra_id_102>",
+    "<extra_id_101>",
+    "<extra_id_100>",
+    "<extra_id_99>",
+    "<extra_id_98>",
+    "<extra_id_97>",
+    "<extra_id_96>",
+    "<extra_id_95>",
+    "<extra_id_94>",
+    "<extra_id_93>",
+    "<extra_id_92>",
+    "<extra_id_91>",
+    "<extra_id_90>",
+    "<extra_id_89>",
+    "<extra_id_88>",
+    "<extra_id_87>",
+    "<extra_id_86>",
+    "<extra_id_85>",
+    "<extra_id_84>",
+    "<extra_id_83>",
+    "<extra_id_82>",
+    "<extra_id_81>",
+    "<extra_id_80>",
+    "<extra_id_79>",
+    "<extra_id_78>",
+    "<extra_id_77>",
+    "<extra_id_76>",
+    "<extra_id_75>",
+    "<extra_id_74>",
+    "<extra_id_73>",
+    "<extra_id_72>",
+    "<extra_id_71>",
+    "<extra_id_70>",
+    "<extra_id_69>",
+    "<extra_id_68>",
+    "<extra_id_67>",
+    "<extra_id_66>",
+    "<extra_id_65>",
+    "<extra_id_64>",
+    "<extra_id_63>",
+    "<extra_id_62>",
+    "<extra_id_61>",
+    "<extra_id_60>",
+    "<extra_id_59>",
+    "<extra_id_58>",
+    "<extra_id_57>",
+    "<extra_id_56>",
+    "<extra_id_55>",
+    "<extra_id_54>",
+    "<extra_id_53>",
+    "<extra_id_52>",
+    "<extra_id_51>",
+    "<extra_id_50>",
+    "<extra_id_49>",
+    "<extra_id_48>",
+    "<extra_id_47>",
+    "<extra_id_46>",
+    "<extra_id_45>",
+    "<extra_id_44>",
+    "<extra_id_43>",
+    "<extra_id_42>",
+    "<extra_id_41>",
+    "<extra_id_40>",
+    "<extra_id_39>",
+    "<extra_id_38>",
+    "<extra_id_37>",
+    "<extra_id_36>",
+    "<extra_id_35>",
+    "<extra_id_34>",
+    "<extra_id_33>",
+    "<extra_id_32>",
+    "<extra_id_31>",
+    "<extra_id_30>",
+    "<extra_id_29>",
+    "<extra_id_28>",
+    "<extra_id_27>",
+    "<extra_id_26>",
+    "<extra_id_25>",
+    "<extra_id_24>",
+    "<extra_id_23>",
+    "<extra_id_22>",
+    "<extra_id_21>",
+    "<extra_id_20>",
+    "<extra_id_19>",
+    "<extra_id_18>",
+    "<extra_id_17>",
+    "<extra_id_16>",
+    "<extra_id_15>",
+    "<extra_id_14>",
+    "<extra_id_13>",
+    "<extra_id_12>",
+    "<extra_id_11>",
+    "<extra_id_10>",
+    "<extra_id_9>",
+    "<extra_id_8>",
+    "<extra_id_7>",
+    "<extra_id_6>",
+    "<extra_id_5>",
+    "<extra_id_4>",
+    "<extra_id_3>",
+    "<extra_id_2>",
+    "<extra_id_1>",
+    "<extra_id_0>"
+  ],
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3909a67b780650b35cf529ac782ad2b6b26e6d1f849d3fbb6a872905f452458
+size 4548313

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af904105ce1071b1202bba0059a841f4a7b85b48b6ec179c4948e3483476e0dd
+size 16853013

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,314 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>",
+    "<extra_id_100>",
+    "<extra_id_101>",
+    "<extra_id_102>",
+    "<extra_id_103>",
+    "<extra_id_104>",
+    "<extra_id_105>",
+    "<extra_id_106>",
+    "<extra_id_107>",
+    "<extra_id_108>",
+    "<extra_id_109>",
+    "<extra_id_110>",
+    "<extra_id_111>",
+    "<extra_id_112>",
+    "<extra_id_113>",
+    "<extra_id_114>",
+    "<extra_id_115>",
+    "<extra_id_116>",
+    "<extra_id_117>",
+    "<extra_id_118>",
+    "<extra_id_119>",
+    "<extra_id_120>",
+    "<extra_id_121>",
+    "<extra_id_122>",
+    "<extra_id_123>",
+    "<extra_id_124>",
+    "<extra_id_125>",
+    "<extra_id_126>",
+    "<extra_id_127>",
+    "<extra_id_128>",
+    "<extra_id_129>",
+    "<extra_id_130>",
+    "<extra_id_131>",
+    "<extra_id_132>",
+    "<extra_id_133>",
+    "<extra_id_134>",
+    "<extra_id_135>",
+    "<extra_id_136>",
+    "<extra_id_137>",
+    "<extra_id_138>",
+    "<extra_id_139>",
+    "<extra_id_140>",
+    "<extra_id_141>",
+    "<extra_id_142>",
+    "<extra_id_143>",
+    "<extra_id_144>",
+    "<extra_id_145>",
+    "<extra_id_146>",
+    "<extra_id_147>",
+    "<extra_id_148>",
+    "<extra_id_149>",
+    "<extra_id_150>",
+    "<extra_id_151>",
+    "<extra_id_152>",
+    "<extra_id_153>",
+    "<extra_id_154>",
+    "<extra_id_155>",
+    "<extra_id_156>",
+    "<extra_id_157>",
+    "<extra_id_158>",
+    "<extra_id_159>",
+    "<extra_id_160>",
+    "<extra_id_161>",
+    "<extra_id_162>",
+    "<extra_id_163>",
+    "<extra_id_164>",
+    "<extra_id_165>",
+    "<extra_id_166>",
+    "<extra_id_167>",
+    "<extra_id_168>",
+    "<extra_id_169>",
+    "<extra_id_170>",
+    "<extra_id_171>",
+    "<extra_id_172>",
+    "<extra_id_173>",
+    "<extra_id_174>",
+    "<extra_id_175>",
+    "<extra_id_176>",
+    "<extra_id_177>",
+    "<extra_id_178>",
+    "<extra_id_179>",
+    "<extra_id_180>",
+    "<extra_id_181>",
+    "<extra_id_182>",
+    "<extra_id_183>",
+    "<extra_id_184>",
+    "<extra_id_185>",
+    "<extra_id_186>",
+    "<extra_id_187>",
+    "<extra_id_188>",
+    "<extra_id_189>",
+    "<extra_id_190>",
+    "<extra_id_191>",
+    "<extra_id_192>",
+    "<extra_id_193>",
+    "<extra_id_194>",
+    "<extra_id_195>",
+    "<extra_id_196>",
+    "<extra_id_197>",
+    "<extra_id_198>",
+    "<extra_id_199>",
+    "<extra_id_200>",
+    "<extra_id_201>",
+    "<extra_id_202>",
+    "<extra_id_203>",
+    "<extra_id_204>",
+    "<extra_id_205>",
+    "<extra_id_206>",
+    "<extra_id_207>",
+    "<extra_id_208>",
+    "<extra_id_209>",
+    "<extra_id_210>",
+    "<extra_id_211>",
+    "<extra_id_212>",
+    "<extra_id_213>",
+    "<extra_id_214>",
+    "<extra_id_215>",
+    "<extra_id_216>",
+    "<extra_id_217>",
+    "<extra_id_218>",
+    "<extra_id_219>",
+    "<extra_id_220>",
+    "<extra_id_221>",
+    "<extra_id_222>",
+    "<extra_id_223>",
+    "<extra_id_224>",
+    "<extra_id_225>",
+    "<extra_id_226>",
+    "<extra_id_227>",
+    "<extra_id_228>",
+    "<extra_id_229>",
+    "<extra_id_230>",
+    "<extra_id_231>",
+    "<extra_id_232>",
+    "<extra_id_233>",
+    "<extra_id_234>",
+    "<extra_id_235>",
+    "<extra_id_236>",
+    "<extra_id_237>",
+    "<extra_id_238>",
+    "<extra_id_239>",
+    "<extra_id_240>",
+    "<extra_id_241>",
+    "<extra_id_242>",
+    "<extra_id_243>",
+    "<extra_id_244>",
+    "<extra_id_245>",
+    "<extra_id_246>",
+    "<extra_id_247>",
+    "<extra_id_248>",
+    "<extra_id_249>",
+    "<extra_id_250>",
+    "<extra_id_251>",
+    "<extra_id_252>",
+    "<extra_id_253>",
+    "<extra_id_254>",
+    "<extra_id_255>",
+    "<extra_id_256>",
+    "<extra_id_257>",
+    "<extra_id_258>",
+    "<extra_id_259>",
+    "<extra_id_260>",
+    "<extra_id_261>",
+    "<extra_id_262>",
+    "<extra_id_263>",
+    "<extra_id_264>",
+    "<extra_id_265>",
+    "<extra_id_266>",
+    "<extra_id_267>",
+    "<extra_id_268>",
+    "<extra_id_269>",
+    "<extra_id_270>",
+    "<extra_id_271>",
+    "<extra_id_272>",
+    "<extra_id_273>",
+    "<extra_id_274>",
+    "<extra_id_275>",
+    "<extra_id_276>",
+    "<extra_id_277>",
+    "<extra_id_278>",
+    "<extra_id_279>",
+    "<extra_id_280>",
+    "<extra_id_281>",
+    "<extra_id_282>",
+    "<extra_id_283>",
+    "<extra_id_284>",
+    "<extra_id_285>",
+    "<extra_id_286>",
+    "<extra_id_287>",
+    "<extra_id_288>",
+    "<extra_id_289>",
+    "<extra_id_290>",
+    "<extra_id_291>",
+    "<extra_id_292>",
+    "<extra_id_293>",
+    "<extra_id_294>",
+    "<extra_id_295>",
+    "<extra_id_296>",
+    "<extra_id_297>",
+    "<extra_id_298>",
+    "<extra_id_299>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 300,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}