aoiandroid commited on 26 days ago

Commit

ecb101f

verified ·

1 Parent(s): 3da9842

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Helsinki-NLP-opus-mt-en-to/tokenizer.json +0 -0
Helsinki-NLP-opus-mt-en-to/tokenizer_config.json +39 -0
Helsinki-NLP-opus-mt-en-to/vocab.json +0 -0
Helsinki-NLP-opus-mt-en-toi/README.md +25 -0
Helsinki-NLP-opus-mt-en-toi/config.json +56 -0
Helsinki-NLP-opus-mt-en-toi/generation_config.json +16 -0
Helsinki-NLP-opus-mt-en-toi/special_tokens_map.json +5 -0
Helsinki-NLP-opus-mt-en-toi/tokenizer.json +0 -0
Helsinki-NLP-opus-mt-en-toi/tokenizer_config.json +39 -0
Helsinki-NLP-opus-mt-en-toi/vocab.json +0 -0
Helsinki-NLP-opus-mt-en-tpi/README.md +25 -0
Helsinki-NLP-opus-mt-en-tpi/config.json +56 -0
Helsinki-NLP-opus-mt-en-tpi/generation_config.json +16 -0
Helsinki-NLP-opus-mt-en-tpi/special_tokens_map.json +5 -0
Helsinki-NLP-opus-mt-en-tpi/tokenizer.json +0 -0
Helsinki-NLP-opus-mt-en-tpi/tokenizer_config.json +39 -0
Helsinki-NLP-opus-mt-en-tpi/vocab.json +0 -0
Helsinki-NLP-opus-mt-en-trk/README.md +123 -0
Helsinki-NLP-opus-mt-en-trk/config.json +56 -0
Helsinki-NLP-opus-mt-en-trk/generation_config.json +16 -0
Helsinki-NLP-opus-mt-en-trk/special_tokens_map.json +5 -0
Helsinki-NLP-opus-mt-en-trk/tokenizer.json +0 -0
Helsinki-NLP-opus-mt-en-trk/tokenizer_config.json +39 -0
Helsinki-NLP-opus-mt-en-trk/vocab.json +0 -0
Helsinki-NLP-opus-mt-en-ts/README.md +25 -0
Helsinki-NLP-opus-mt-en-ts/config.json +56 -0
Helsinki-NLP-opus-mt-en-ts/generation_config.json +16 -0
Helsinki-NLP-opus-mt-en-ts/special_tokens_map.json +5 -0
Helsinki-NLP-opus-mt-en-ts/tokenizer.json +0 -0
Helsinki-NLP-opus-mt-en-ts/tokenizer_config.json +39 -0
Helsinki-NLP-opus-mt-en-ts/vocab.json +0 -0
Helsinki-NLP-opus-mt-en-tut/README.md +121 -0
Helsinki-NLP-opus-mt-en-tut/config.json +56 -0
Helsinki-NLP-opus-mt-en-tut/generation_config.json +16 -0
Helsinki-NLP-opus-mt-en-tut/special_tokens_map.json +5 -0
Helsinki-NLP-opus-mt-en-tut/tokenizer.json +0 -0
Helsinki-NLP-opus-mt-en-tut/tokenizer_config.json +39 -0
Helsinki-NLP-opus-mt-en-tut/vocab.json +0 -0
Helsinki-NLP-opus-mt-en-tvl/README.md +25 -0
Helsinki-NLP-opus-mt-en-tvl/config.json +56 -0
Helsinki-NLP-opus-mt-en-tvl/generation_config.json +16 -0
Helsinki-NLP-opus-mt-en-tvl/special_tokens_map.json +5 -0
Helsinki-NLP-opus-mt-en-tvl/tokenizer.json +0 -0
Helsinki-NLP-opus-mt-en-tvl/tokenizer_config.json +39 -0
Helsinki-NLP-opus-mt-en-tvl/vocab.json +0 -0
Helsinki-NLP-opus-mt-en-tw/README.md +25 -0
Helsinki-NLP-opus-mt-en-tw/config.json +56 -0
Helsinki-NLP-opus-mt-en-tw/generation_config.json +16 -0
Helsinki-NLP-opus-mt-en-tw/special_tokens_map.json +5 -0
Helsinki-NLP-opus-mt-en-tw/tokenizer.json +0 -0

Helsinki-NLP-opus-mt-en-to/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-to/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57445": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "en",
+  "sp_model_kwargs": {},
+  "target_lang": "to",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-to/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-toi/README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+---
+tags:
+- translation
+license: apache-2.0
+---
+### opus-mt-en-toi
+* source languages: en
+* target languages: toi
+*  OPUS readme: [en-toi](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/models/en-toi/README.md)
+*  dataset: opus
+* model: transformer-align
+* pre-processing: normalization + SentencePiece
+* download original weights: [opus-2020-01-08.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-toi/opus-2020-01-08.zip)
+* test set translations: [opus-2020-01-08.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-toi/opus-2020-01-08.test.txt)
+* test set scores: [opus-2020-01-08.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-toi/opus-2020-01-08.eval.txt)
+## Benchmarks
+| testset               | BLEU  | chr-F |
+|-----------------------|-------|-------|
+| JW300.en.toi 	| 32.8 	| 0.598 |

Helsinki-NLP-opus-mt-en-toi/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 61050,
+  "decoder_vocab_size": 61051,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "is_transformers_support_available": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": null,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": null,
+  "num_hidden_layers": 6,
+  "pad_token_id": 61050,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "vocab_size": 61051
+}

Helsinki-NLP-opus-mt-en-toi/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      61050
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 61050,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 61050,
+  "renormalize_logits": true,
+  "transformers_version": "4.53.3"
+}

Helsinki-NLP-opus-mt-en-toi/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-toi/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-toi/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "61050": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "en",
+  "sp_model_kwargs": {},
+  "target_lang": "toi",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-toi/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-tpi/README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+---
+tags:
+- translation
+license: apache-2.0
+---
+### opus-mt-en-tpi
+* source languages: en
+* target languages: tpi
+*  OPUS readme: [en-tpi](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/models/en-tpi/README.md)
+*  dataset: opus
+* model: transformer-align
+* pre-processing: normalization + SentencePiece
+* download original weights: [opus-2020-01-08.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-tpi/opus-2020-01-08.zip)
+* test set translations: [opus-2020-01-08.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tpi/opus-2020-01-08.test.txt)
+* test set scores: [opus-2020-01-08.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tpi/opus-2020-01-08.eval.txt)
+## Benchmarks
+| testset               | BLEU  | chr-F |
+|-----------------------|-------|-------|
+| JW300.en.tpi 	| 38.7 	| 0.568 |

Helsinki-NLP-opus-mt-en-tpi/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 49237,
+  "decoder_vocab_size": 49238,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "is_transformers_support_available": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": null,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": null,
+  "num_hidden_layers": 6,
+  "pad_token_id": 49237,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "vocab_size": 49238
+}

Helsinki-NLP-opus-mt-en-tpi/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      49237
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 49237,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 49237,
+  "renormalize_logits": true,
+  "transformers_version": "4.53.3"
+}

Helsinki-NLP-opus-mt-en-tpi/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-tpi/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-tpi/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49237": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "en",
+  "sp_model_kwargs": {},
+  "target_lang": "tpi",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-tpi/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-trk/README.md ADDED Viewed

	@@ -0,0 +1,123 @@

+---
+language:
+- en
+- tt
+- cv
+- tk
+- tr
+- ba
+- trk
+tags:
+- translation
+license: apache-2.0
+---
+### eng-trk
+* source group: English
+* target group: Turkic languages
+*  OPUS readme: [eng-trk](https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/eng-trk/README.md)
+*  model: transformer
+* source language(s): eng
+* target language(s): aze_Latn bak chv crh crh_Latn kaz_Cyrl kaz_Latn kir_Cyrl kjh kum ota_Arab ota_Latn sah tat tat_Arab tat_Latn tuk tuk_Latn tur tyv uig_Arab uig_Cyrl uzb_Cyrl uzb_Latn
+* model: transformer
+* pre-processing: normalization + SentencePiece (spm32k,spm32k)
+* a sentence initial language token is required in the form of `>>id<<` (id = valid target language ID)
+* download original weights: [opus2m-2020-08-01.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/eng-trk/opus2m-2020-08-01.zip)
+* test set translations: [opus2m-2020-08-01.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/eng-trk/opus2m-2020-08-01.test.txt)
+* test set scores: [opus2m-2020-08-01.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/eng-trk/opus2m-2020-08-01.eval.txt)
+## Benchmarks
+| testset               | BLEU  | chr-F |
+|-----------------------|-------|-------|
+| newsdev2016-entr-engtur.eng.tur 	| 10.1 	| 0.437 |
+| newstest2016-entr-engtur.eng.tur 	| 9.2 	| 0.410 |
+| newstest2017-entr-engtur.eng.tur 	| 9.0 	| 0.410 |
+| newstest2018-entr-engtur.eng.tur 	| 9.2 	| 0.413 |
+| Tatoeba-test.eng-aze.eng.aze 	| 26.8 	| 0.577 |
+| Tatoeba-test.eng-bak.eng.bak 	| 7.6 	| 0.308 |
+| Tatoeba-test.eng-chv.eng.chv 	| 4.3 	| 0.270 |
+| Tatoeba-test.eng-crh.eng.crh 	| 8.1 	| 0.330 |
+| Tatoeba-test.eng-kaz.eng.kaz 	| 11.1 	| 0.359 |
+| Tatoeba-test.eng-kir.eng.kir 	| 28.6 	| 0.524 |
+| Tatoeba-test.eng-kjh.eng.kjh 	| 1.0 	| 0.041 |
+| Tatoeba-test.eng-kum.eng.kum 	| 2.2 	| 0.075 |
+| Tatoeba-test.eng.multi 	| 19.9 	| 0.455 |
+| Tatoeba-test.eng-ota.eng.ota 	| 0.5 	| 0.065 |
+| Tatoeba-test.eng-sah.eng.sah 	| 0.7 	| 0.030 |
+| Tatoeba-test.eng-tat.eng.tat 	| 9.7 	| 0.316 |
+| Tatoeba-test.eng-tuk.eng.tuk 	| 5.9 	| 0.317 |
+| Tatoeba-test.eng-tur.eng.tur 	| 34.6 	| 0.623 |
+| Tatoeba-test.eng-tyv.eng.tyv 	| 5.4 	| 0.210 |
+| Tatoeba-test.eng-uig.eng.uig 	| 0.1 	| 0.155 |
+| Tatoeba-test.eng-uzb.eng.uzb 	| 3.4 	| 0.275 |
+### System Info:
+- hf_name: eng-trk
+- source_languages: eng
+- target_languages: trk
+- opus_readme_url: https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/eng-trk/README.md
+- original_repo: Tatoeba-Challenge
+- tags: ['translation']
+- languages: ['en', 'tt', 'cv', 'tk', 'tr', 'ba', 'trk']
+- src_constituents: {'eng'}
+- tgt_constituents: {'kir_Cyrl', 'tat_Latn', 'tat', 'chv', 'uzb_Cyrl', 'kaz_Latn', 'aze_Latn', 'crh', 'kjh', 'uzb_Latn', 'ota_Arab', 'tuk_Latn', 'tuk', 'tat_Arab', 'sah', 'tyv', 'tur', 'uig_Arab', 'crh_Latn', 'kaz_Cyrl', 'uig_Cyrl', 'kum', 'ota_Latn', 'bak'}
+- src_multilingual: False
+- tgt_multilingual: True
+- prepro:  normalization + SentencePiece (spm32k,spm32k)
+- url_model: https://object.pouta.csc.fi/Tatoeba-MT-models/eng-trk/opus2m-2020-08-01.zip
+- url_test_set: https://object.pouta.csc.fi/Tatoeba-MT-models/eng-trk/opus2m-2020-08-01.test.txt
+- src_alpha3: eng
+- tgt_alpha3: trk
+- short_pair: en-trk
+- chrF2_score: 0.455
+- bleu: 19.9
+- brevity_penalty: 1.0
+- ref_len: 57072.0
+- src_name: English
+- tgt_name: Turkic languages
+- train_date: 2020-08-01
+- src_alpha2: en
+- tgt_alpha2: trk
+- prefer_old: False
+- long_pair: eng-trk
+- helsinki_git_sha: 480fcbe0ee1bf4774bcbe6226ad9f58e63f6c535
+- transformers_git_sha: 2207e5d8cb224e954a7cba69fa4ac2309e9ff30b
+- port_machine: brutasse
+- port_time: 2020-08-21-14:41

Helsinki-NLP-opus-mt-en-trk/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 61673,
+  "decoder_vocab_size": 61674,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "extra_pos_embeddings": 61674,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "is_transformers_support_available": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": null,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": null,
+  "num_hidden_layers": 6,
+  "pad_token_id": 61673,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "vocab_size": 61674
+}

Helsinki-NLP-opus-mt-en-trk/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      61673
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 61673,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 61673,
+  "renormalize_logits": true,
+  "transformers_version": "4.53.3"
+}

Helsinki-NLP-opus-mt-en-trk/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-trk/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-trk/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "61673": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "eng",
+  "sp_model_kwargs": {},
+  "target_lang": "trk",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-trk/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-ts/README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+---
+tags:
+- translation
+license: apache-2.0
+---
+### opus-mt-en-ts
+* source languages: en
+* target languages: ts
+*  OPUS readme: [en-ts](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/models/en-ts/README.md)
+*  dataset: opus
+* model: transformer-align
+* pre-processing: normalization + SentencePiece
+* download original weights: [opus-2020-01-08.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-ts/opus-2020-01-08.zip)
+* test set translations: [opus-2020-01-08.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ts/opus-2020-01-08.test.txt)
+* test set scores: [opus-2020-01-08.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-ts/opus-2020-01-08.eval.txt)
+## Benchmarks
+| testset               | BLEU  | chr-F |
+|-----------------------|-------|-------|
+| JW300.en.ts 	| 43.4 	| 0.639 |

Helsinki-NLP-opus-mt-en-ts/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 57468,
+  "decoder_vocab_size": 57469,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "is_transformers_support_available": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": null,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": null,
+  "num_hidden_layers": 6,
+  "pad_token_id": 57468,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "vocab_size": 57469
+}

Helsinki-NLP-opus-mt-en-ts/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      57468
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 57468,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 57468,
+  "renormalize_logits": true,
+  "transformers_version": "4.53.3"
+}

Helsinki-NLP-opus-mt-en-ts/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-ts/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-ts/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "57468": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "en",
+  "sp_model_kwargs": {},
+  "target_lang": "ts",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-ts/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-tut/README.md ADDED Viewed

	@@ -0,0 +1,121 @@

+---
+language:
+- en
+- tut
+tags:
+- translation
+license: apache-2.0
+---
+### eng-tut
+* source group: English
+* target group: Altaic languages
+*  OPUS readme: [eng-tut](https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/eng-tut/README.md)
+*  model: transformer
+* source language(s): eng
+* target language(s): aze_Latn bak chv crh crh_Latn kaz_Cyrl kaz_Latn kir_Cyrl kjh kum mon nog ota_Arab ota_Latn sah tat tat_Arab tat_Latn tuk tuk_Latn tur tyv uig_Arab uig_Cyrl uzb_Cyrl uzb_Latn xal
+* model: transformer
+* pre-processing: normalization + SentencePiece (spm32k,spm32k)
+* a sentence initial language token is required in the form of `>>id<<` (id = valid target language ID)
+* download original weights: [opus2m-2020-08-02.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/eng-tut/opus2m-2020-08-02.zip)
+* test set translations: [opus2m-2020-08-02.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/eng-tut/opus2m-2020-08-02.test.txt)
+* test set scores: [opus2m-2020-08-02.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/eng-tut/opus2m-2020-08-02.eval.txt)
+## Benchmarks
+| testset               | BLEU  | chr-F |
+|-----------------------|-------|-------|
+| newsdev2016-entr-engtur.eng.tur 	| 10.4 	| 0.438 |
+| newstest2016-entr-engtur.eng.tur 	| 9.1 	| 0.414 |
+| newstest2017-entr-engtur.eng.tur 	| 9.5 	| 0.414 |
+| newstest2018-entr-engtur.eng.tur 	| 9.5 	| 0.415 |
+| Tatoeba-test.eng-aze.eng.aze 	| 27.2 	| 0.580 |
+| Tatoeba-test.eng-bak.eng.bak 	| 5.8 	| 0.298 |
+| Tatoeba-test.eng-chv.eng.chv 	| 4.6 	| 0.301 |
+| Tatoeba-test.eng-crh.eng.crh 	| 6.5 	| 0.342 |
+| Tatoeba-test.eng-kaz.eng.kaz 	| 11.8 	| 0.360 |
+| Tatoeba-test.eng-kir.eng.kir 	| 24.6 	| 0.499 |
+| Tatoeba-test.eng-kjh.eng.kjh 	| 2.2 	| 0.052 |
+| Tatoeba-test.eng-kum.eng.kum 	| 8.0 	| 0.229 |
+| Tatoeba-test.eng-mon.eng.mon 	| 10.3 	| 0.362 |
+| Tatoeba-test.eng.multi 	| 19.5 	| 0.451 |
+| Tatoeba-test.eng-nog.eng.nog 	| 1.5 	| 0.117 |
+| Tatoeba-test.eng-ota.eng.ota 	| 0.2 	| 0.035 |
+| Tatoeba-test.eng-sah.eng.sah 	| 0.7 	| 0.080 |
+| Tatoeba-test.eng-tat.eng.tat 	| 10.8 	| 0.320 |
+| Tatoeba-test.eng-tuk.eng.tuk 	| 5.6 	| 0.323 |
+| Tatoeba-test.eng-tur.eng.tur 	| 34.2 	| 0.623 |
+| Tatoeba-test.eng-tyv.eng.tyv 	| 8.1 	| 0.192 |
+| Tatoeba-test.eng-uig.eng.uig 	| 0.1 	| 0.158 |
+| Tatoeba-test.eng-uzb.eng.uzb 	| 4.2 	| 0.298 |
+| Tatoeba-test.eng-xal.eng.xal 	| 0.1 	| 0.061 |
+### System Info:
+- hf_name: eng-tut
+- source_languages: eng
+- target_languages: tut
+- opus_readme_url: https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/eng-tut/README.md
+- original_repo: Tatoeba-Challenge
+- tags: ['translation']
+- languages: ['en', 'tut']
+- src_constituents: {'eng'}
+- tgt_constituents: set()
+- src_multilingual: False
+- tgt_multilingual: True
+- prepro:  normalization + SentencePiece (spm32k,spm32k)
+- url_model: https://object.pouta.csc.fi/Tatoeba-MT-models/eng-tut/opus2m-2020-08-02.zip
+- url_test_set: https://object.pouta.csc.fi/Tatoeba-MT-models/eng-tut/opus2m-2020-08-02.test.txt
+- src_alpha3: eng
+- tgt_alpha3: tut
+- short_pair: en-tut
+- chrF2_score: 0.451
+- bleu: 19.5
+- brevity_penalty: 1.0
+- ref_len: 57472.0
+- src_name: English
+- tgt_name: Altaic languages
+- train_date: 2020-08-02
+- src_alpha2: en
+- tgt_alpha2: tut
+- prefer_old: False
+- long_pair: eng-tut
+- helsinki_git_sha: 480fcbe0ee1bf4774bcbe6226ad9f58e63f6c535
+- transformers_git_sha: 2207e5d8cb224e954a7cba69fa4ac2309e9ff30b
+- port_machine: brutasse
+- port_time: 2020-08-21-14:41

Helsinki-NLP-opus-mt-en-tut/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 61656,
+  "decoder_vocab_size": 61657,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "extra_pos_embeddings": 61657,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "is_transformers_support_available": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": null,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": null,
+  "num_hidden_layers": 6,
+  "pad_token_id": 61656,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "vocab_size": 61657
+}

Helsinki-NLP-opus-mt-en-tut/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      61656
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 61656,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 61656,
+  "renormalize_logits": true,
+  "transformers_version": "4.53.3"
+}

Helsinki-NLP-opus-mt-en-tut/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-tut/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-tut/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "61656": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "eng",
+  "sp_model_kwargs": {},
+  "target_lang": "tut",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-tut/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-tvl/README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+---
+tags:
+- translation
+license: apache-2.0
+---
+### opus-mt-en-tvl
+* source languages: en
+* target languages: tvl
+*  OPUS readme: [en-tvl](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/models/en-tvl/README.md)
+*  dataset: opus
+* model: transformer-align
+* pre-processing: normalization + SentencePiece
+* download original weights: [opus-2020-01-20.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/opus-2020-01-20.zip)
+* test set translations: [opus-2020-01-20.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/opus-2020-01-20.test.txt)
+* test set scores: [opus-2020-01-20.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tvl/opus-2020-01-20.eval.txt)
+## Benchmarks
+| testset               | BLEU  | chr-F |
+|-----------------------|-------|-------|
+| JW300.en.tvl 	| 46.9 	| 0.625 |

Helsinki-NLP-opus-mt-en-tvl/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 38379,
+  "decoder_vocab_size": 38380,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "is_transformers_support_available": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": null,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": null,
+  "num_hidden_layers": 6,
+  "pad_token_id": 38379,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "vocab_size": 38380
+}

Helsinki-NLP-opus-mt-en-tvl/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      38379
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 38379,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 38379,
+  "renormalize_logits": true,
+  "transformers_version": "4.53.3"
+}

Helsinki-NLP-opus-mt-en-tvl/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-tvl/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-tvl/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "38379": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "en",
+  "sp_model_kwargs": {},
+  "target_lang": "tvl",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-tvl/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Helsinki-NLP-opus-mt-en-tw/README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+---
+tags:
+- translation
+license: apache-2.0
+---
+### opus-mt-en-tw
+* source languages: en
+* target languages: tw
+*  OPUS readme: [en-tw](https://github.com/Helsinki-NLP/OPUS-MT-train/blob/master/models/en-tw/README.md)
+*  dataset: opus
+* model: transformer-align
+* pre-processing: normalization + SentencePiece
+* download original weights: [opus-2020-01-08.zip](https://object.pouta.csc.fi/OPUS-MT-models/en-tw/opus-2020-01-08.zip)
+* test set translations: [opus-2020-01-08.test.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tw/opus-2020-01-08.test.txt)
+* test set scores: [opus-2020-01-08.eval.txt](https://object.pouta.csc.fi/OPUS-MT-models/en-tw/opus-2020-01-08.eval.txt)
+## Benchmarks
+| testset               | BLEU  | chr-F |
+|-----------------------|-------|-------|
+| JW300.en.tw 	| 38.2 	| 0.577 |

Helsinki-NLP-opus-mt-en-tw/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 56999,
+  "decoder_vocab_size": 57000,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "is_transformers_support_available": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": null,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": null,
+  "num_hidden_layers": 6,
+  "pad_token_id": 56999,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "vocab_size": 57000
+}

Helsinki-NLP-opus-mt-en-tw/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      56999
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 56999,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 56999,
+  "renormalize_logits": true,
+  "transformers_version": "4.53.3"
+}

Helsinki-NLP-opus-mt-en-tw/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

Helsinki-NLP-opus-mt-en-tw/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff