thaile1809 commited on Mar 20, 2025

Commit

cd8eb51

verified ·

1 Parent(s): 1d5a9ff

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

packages/de_en/model/model.bin +3 -0
packages/en_ga/sentencepiece.model +3 -0
packages/en_hi/sentencepiece.model +3 -0
packages/en_hi/stanza/en/tokenize/ewt.pt +3 -0
packages/en_it/README.md +6 -0
packages/en_it/metadata.json +8 -0
packages/en_it/model/shared_vocabulary.txt +0 -0
packages/en_it/stanza/resources.json +0 -0
packages/en_sv/README.md +28 -0
packages/en_sv/metadata.json +8 -0
packages/en_sv/model/shared_vocabulary.txt +0 -0
packages/en_sv/stanza/resources.json +0 -0
packages/hi_en/sentencepiece.model +3 -0
packages/it_en/README.md +6 -0
packages/it_en/metadata.json +8 -0
packages/it_en/model/shared_vocabulary.txt +0 -0
packages/it_en/stanza/resources.json +0 -0
packages/translate-az_en-1_5/model/shared_vocabulary.txt +0 -0
packages/translate-az_en-1_5/stanza/resources.json +0 -0
packages/translate-en_az-1_5/README.md +30 -0
packages/translate-en_az-1_5/metadata.json +8 -0
packages/translate-en_az-1_5/model/shared_vocabulary.txt +0 -0
packages/translate-en_az-1_5/stanza/resources.json +0 -0
packages/translate-en_eo-1_5/README.md +20 -0
packages/translate-en_eo-1_5/metadata.json +8 -0
packages/translate-en_eo-1_5/model/shared_vocabulary.txt +0 -0
packages/translate-en_eo-1_5/stanza/resources.json +0 -0
packages/translate-en_fi-1_9/README.md +9 -0
packages/translate-en_fi-1_9/metadata.json +8 -0
packages/translate-en_gl-1_9/README.md +9 -0
packages/translate-en_gl-1_9/metadata.json +8 -0
packages/translate-en_gl-1_9/model/config.json +10 -0
packages/translate-en_gl-1_9/model/shared_vocabulary.json +0 -0
packages/translate-en_gl-1_9/stanza/resources.json +0 -0
packages/translate-en_id-1_9/README.md +9 -0
packages/translate-en_id-1_9/metadata.json +8 -0
packages/translate-en_id-1_9/model/config.json +10 -0
packages/translate-en_id-1_9/model/shared_vocabulary.json +0 -0
packages/translate-en_id-1_9/stanza/resources.json +0 -0
packages/translate-en_lt-1_9/README.md +9 -0
packages/translate-en_lt-1_9/metadata.json +1 -0
packages/translate-en_lt-1_9/model/config.json +9 -0
packages/translate-en_lt-1_9/model/shared_vocabulary.json +0 -0
packages/translate-en_lt-1_9/stanza/resources.json +0 -0
packages/translate-en_ms-1_9/README.md +9 -0
packages/translate-en_ms-1_9/metadata.json +1 -0
packages/translate-en_ms-1_9/model/config.json +9 -0
packages/translate-en_ms-1_9/model/shared_vocabulary.json +0 -0
packages/translate-en_ms-1_9/stanza/resources.json +0 -0
packages/translate-en_nl-1_8/sentencepiece.model +3 -0

packages/de_en/model/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15e178b71fbc11b1903fc108f809ebb0a26a0e47b870fc2ea8e78c536c38488b
+size 94392546

packages/en_ga/sentencepiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:836b2909e490da47fa8a0cd01c1af17c7532a41f465e3a1ee27fc8035171b3ee
+size 809857

packages/en_hi/sentencepiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ade76ff21f0b3eac117be85e21cce21ff2453eccc150e84cfd53eb2068a30954
+size 857173

packages/en_hi/stanza/en/tokenize/ewt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bc5a1523f4e60107640ea44d2cb27c787bc339ba0c93be6c7dbf744d5635cd6
+size 630886

packages/en_it/README.md ADDED Viewed

	@@ -0,0 +1,6 @@

+# English-Italian
+Trained on data from Opus (EUbookshop, OpenSubtitles, ParaCrawl, WikiMatrix).
+Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/blob/master/LICENSE).

packages/en_it/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "package_version": "1.0",
+    "argos_version": "1.0",
+    "from_code": "en",
+    "from_name": "English",
+    "to_code": "it",
+    "to_name": "Italian"
+}

packages/en_it/model/shared_vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/en_it/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/en_sv/README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+# English-Swedish
+Data compiled by [Opus](https://opus.nlpl.eu/).
+Dictionary data from Wiktionary using [Wiktextract](https://github.com/tatuylonen/wiktextract).
+Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/).
+Credits:
+Holger Schwenk, Guillaume Wenzek, Sergey Edunov, Edouard Grave, Armand Joulin and Angela Fan, CCMatrix: Mining Billions of High-Quality Parallel Sentences on the WEB
+Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky, Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, and Armand Joulin. Beyond English-Centric Multilingual Machine Translation
+Holger Schwenk, Vishrav Chaudhary, Shuo Sun, Hongyu Gong and Paco Guzman, WikiMatrix: Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia, arXiv, July 11 2019.
+@inproceedings{elkishky_ccaligned_2020,
+author = {El-Kishky, Ahmed and Chaudhary, Vishrav and Guzmán, Francisco and Koehn, Philipp},
+booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)},
+month = {November},
+title = {{CCAligned}: A Massive Collection of Cross-lingual Web-Document Pairs},
+year = {2020}
+address = "Online",
+publisher = "Association for Computational Linguistics",
+url = "https://www.aclweb.org/anthology/2020.emnlp-main.480",
+doi = "10.18653/v1/2020.emnlp-main.480",
+pages = "5960--5969"
+}

packages/en_sv/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "package_version": "1.5",
+    "argos_version": "1.5",
+    "from_code": "en",
+    "from_name": "English",
+    "to_code": "sv",
+    "to_name": "Swedish"
+}

packages/en_sv/model/shared_vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/en_sv/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/hi_en/sentencepiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:688499a9e74a4dfb401c8b1b82c01414ebe0fcb24bf45beb881f4bcab4f4aa9c
+size 857779

packages/it_en/README.md ADDED Viewed

	@@ -0,0 +1,6 @@

+# Italian-English
+Trained on data from Opus (EUbookshop, OpenSubtitles, ParaCrawl, WikiMatrix).
+Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/blob/master/LICENSE).

packages/it_en/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "package_version": "1.0",
+    "argos_version": "1.0",
+    "from_code": "it",
+    "from_name": "Italian",
+    "to_code": "en",
+    "to_name": "English"
+}

packages/it_en/model/shared_vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/it_en/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-az_en-1_5/model/shared_vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-az_en-1_5/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_az-1_5/README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+# English-Azerbaijani
+Data compiled by [Opus](https://opus.nlpl.eu/).
+Dictionary data from Wiktionary using [Wiktextract](https://github.com/tatuylonen/wiktextract).
+Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/).
+Credits:
+El-Kishky, Ahmed and Chaudhary, Vishrav and Guzmn, Francisco and Koehn, Philipp - Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)
+Holger Schwenk, Guillaume Wenzek, Sergey Edunov, Edouard Grave, Armand Joulin and Angela Fan, CCMatrix: Mining Billions of High-Quality Parallel Sentences on the WEB
+gourmet project at https://gourmet-project.eu
+J. Tiedemann, 2012, Parallel Data, Tools and Interfaces in OPUS. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)
+J. Tiedemann, 2012, Parallel Data, Tools and Interfaces in OPUS. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)
+Reimers, Nils and Gurevych, Iryna - Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation
+Holger Schwenk, Vishrav Chaudhary, Shuo Sun, Hongyu Gong and Paco Guzman, WikiMatrix: Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia, arXiv, July 11 2019.
+J. Tiedemann, 2012, Parallel Data, Tools and Interfaces in OPUS. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)
+J. Tiedemann, 2012, Parallel Data, Tools and Interfaces in OPUS. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)
+El-Kishky, Ahmed and Renduchintala, Adi and Cross, James and Guzmn, Francisco and Koehn, Philipp - {XLEnt}: Mining Cross-lingual Entities with Lexical-Semantic-Phonetic Word Alignment

packages/translate-en_az-1_5/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "package_version": "1.5",
+    "argos_version": "1.5",
+    "from_code": "en",
+    "from_name": "English",
+    "to_code": "az",
+    "to_name": "Azerbaijani"
+}

packages/translate-en_az-1_5/model/shared_vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_az-1_5/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_eo-1_5/README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+# English-Esperanto
+Data compiled by [Opus](https://opus.nlpl.eu/).
+Dictionary data from Wiktionary using [Wiktextract](https://github.com/tatuylonen/wiktextract).
+Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/).
+Credits:
+J. Tiedemann, 2012, Parallel Data, Tools and Interfaces in OPUS. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)
+P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016)
+Holger Schwenk, Vishrav Chaudhary, Shuo Sun, Hongyu Gong and Paco Guzman, WikiMatrix: Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia, arXiv, July 11 2019.
+El-Kishky, Ahmed and Renduchintala, Adi and Cross, James and Guzmán, Francisco and Koehn, Philipp - XLEnt: Mining Cross-lingual Entities with Lexical-Semantic-Phonetic Word Alignment
+Holger Schwenk, Guillaume Wenzek, Sergey Edunov, Edouard Grave, Armand Joulin and Angela Fan, CCMatrix: Mining Billions of High-Quality Parallel Sentences on the WEB

packages/translate-en_eo-1_5/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "package_version": "1.5",
+    "argos_version": "1.5",
+    "from_code": "en",
+    "from_name": "English",
+    "to_code": "eo",
+    "to_name": "Esperanto"
+}

packages/translate-en_eo-1_5/model/shared_vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_eo-1_5/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_fi-1_9/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# English - Finnish version 1.0
+Authors: Jörg Tiedemann and Santhosh Thottingal
+Title: "OPUS-MT — Building open translation services for the World"
+Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
+Year: 2020
+Location: Lisbon, Portugal
+The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0

packages/translate-en_fi-1_9/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "package_version": "1.9",
+    "argos_version": "1.9.0",
+    "from_code": "en",
+    "from_name": "English",
+    "to_code": "fi",
+    "to_name": "Finnish"
+}

packages/translate-en_gl-1_9/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# English-Galician
+Data compiled by [Opus](https://opus.nlpl.eu/).
+Dictionary data from Wiktionary using [Wiktextract](https://github.com/tatuylonen/wiktextract).
+Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/).
+Credits:

packages/translate-en_gl-1_9/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "package_version": "1.9",
+    "argos_version": "1.5",
+    "from_code": "en",
+    "from_name": "English",
+    "to_code": "gl",
+    "to_name": "Galician"
+}

packages/translate-en_gl-1_9/model/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_source_bos": false,
+  "add_source_eos": false,
+  "bos_token": "<s>",
+  "decoder_start_token": "<s>",
+  "eos_token": "</s>",
+  "layer_norm_epsilon": null,
+  "multi_query_attention": false,
+  "unk_token": "<unk>"
+}

packages/translate-en_gl-1_9/model/shared_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_gl-1_9/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_id-1_9/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# English - Indonesian version 1.0
+Authors: Jörg Tiedemann and Santhosh Thottingal
+Title: "OPUS-MT — Building open translation services for the World"
+Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
+Year: 2020
+Location: Lisbon, Portugal
+The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0

packages/translate-en_id-1_9/metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "package_version": "1.9",
+    "argos_version": "1.9.0",
+    "from_code": "en",
+    "from_name": "English",
+    "to_code": "id",
+    "to_name": "Indonesian"
+}

packages/translate-en_id-1_9/model/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "add_source_bos": false,
+  "add_source_eos": true,
+  "bos_token": "<s>",
+  "decoder_start_token": "<s>",
+  "eos_token": "</s>",
+  "layer_norm_epsilon": null,
+  "multi_query_attention": false,
+  "unk_token": "<unk>"
+}

packages/translate-en_id-1_9/model/shared_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_id-1_9/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_lt-1_9/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# English - Lithuanian version 1.9
+Authors: Jörg Tiedemann and Santhosh Thottingal
+Title: "OPUS-MT — Building open translation services for the World"
+Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
+Year: 2020
+Location: Lisbon, Portugal
+The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0

packages/translate-en_lt-1_9/metadata.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"package_version": "1.9", "argos_version": "1.9.0", "from_code": "en", "from_name": "English", "to_code": "lt", "to_name": "Lithuanian"}

packages/translate-en_lt-1_9/model/config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "add_source_bos": false,
+  "add_source_eos": true,
+  "bos_token": "<s>",
+  "decoder_start_token": "<s>",
+  "eos_token": "</s>",
+  "layer_norm_epsilon": null,
+  "unk_token": "<unk>"
+}

packages/translate-en_lt-1_9/model/shared_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_lt-1_9/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_ms-1_9/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# English - Malay version 1.9
+Authors: Jörg Tiedemann and Santhosh Thottingal
+Title: "OPUS-MT — Building open translation services for the World"
+Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
+Year: 2020
+Location: Lisbon, Portugal
+The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0

packages/translate-en_ms-1_9/metadata.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"package_version": "1.9", "argos_version": "1.9.0", "from_code": "en", "from_name": "English", "to_code": "ms", "to_name": "Malay"}

packages/translate-en_ms-1_9/model/config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "add_source_bos": true,
+    "add_source_eos": true,
+    "bos_token": ">>msa<<",
+    "decoder_start_token": "<s>",
+    "eos_token": "</s>",
+    "layer_norm_epsilon": null,
+    "unk_token": "<unk>"
+}

packages/translate-en_ms-1_9/model/shared_vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_ms-1_9/stanza/resources.json ADDED Viewed

The diff for this file is too large to render. See raw diff

packages/translate-en_nl-1_8/sentencepiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d05781fde84b8793191528a7a176e1b17f96265388f602379ee6ae3139f59141
+size 789525