thaile1809 commited on
Commit
0cb8280
·
verified ·
1 Parent(s): 97b7260

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. packages/en_es/README.md +6 -0
  2. packages/ja_en/README.md +38 -0
  3. packages/ja_en/metadata.json +8 -0
  4. packages/ja_en/model/shared_vocabulary.txt +0 -0
  5. packages/ja_en/stanza/resources.json +0 -0
  6. packages/translate-bn_en-1_9/README.md +9 -0
  7. packages/translate-bn_en-1_9/metadata.json +1 -0
  8. packages/translate-bn_en-1_9/model/config.json +9 -0
  9. packages/translate-bn_en-1_9/model/shared_vocabulary.json +0 -0
  10. packages/translate-bn_en-1_9/stanza/resources.json +0 -0
  11. packages/translate-el_en-1_9/README.md +9 -0
  12. packages/translate-el_en-1_9/metadata.json +1 -0
  13. packages/translate-el_en-1_9/model/config.json +9 -0
  14. packages/translate-el_en-1_9/model/shared_vocabulary.json +0 -0
  15. packages/translate-el_en-1_9/stanza/resources.json +0 -0
  16. packages/translate-en_ca-1_9/README.md +10 -0
  17. packages/translate-en_ca-1_9/metadata.json +1 -0
  18. packages/translate-en_ca-1_9/model/config.json +9 -0
  19. packages/translate-en_ca-1_9/model/shared_vocabulary.json +0 -0
  20. packages/translate-en_ca-1_9/stanza/resources.json +0 -0
  21. packages/translate-en_cs-1_9_6/README.md +9 -0
  22. packages/translate-en_cs-1_9_6/metadata.json +8 -0
  23. packages/translate-en_cs-1_9_6/model/config.json +10 -0
  24. packages/translate-en_cs-1_9_6/model/shared_vocabulary.json +0 -0
  25. packages/translate-en_cs-1_9_6/stanza/resources.json +0 -0
  26. packages/translate-en_el-1_9/README.md +9 -0
  27. packages/translate-en_el-1_9/metadata.json +1 -0
  28. packages/translate-en_el-1_9/model/config.json +9 -0
  29. packages/translate-en_el-1_9/model/shared_vocabulary.json +0 -0
  30. packages/translate-en_el-1_9/stanza/resources.json +0 -0
  31. packages/translate-en_eu-1_9/README.md +9 -0
  32. packages/translate-en_eu-1_9/metadata.json +8 -0
  33. packages/translate-en_eu-1_9/model/config.json +10 -0
  34. packages/translate-en_eu-1_9/model/shared_vocabulary.json +0 -0
  35. packages/translate-en_eu-1_9/stanza/resources.json +0 -0
  36. packages/translate-en_fr-1_9/README.md +9 -0
  37. packages/translate-en_fr-1_9/metadata.json +1 -0
  38. packages/translate-en_fr-1_9/model/config.json +9 -0
  39. packages/translate-en_fr-1_9/model/shared_vocabulary.json +0 -0
  40. packages/translate-en_fr-1_9/stanza/resources.json +0 -0
  41. packages/translate-en_hu-1_9/README.md +9 -0
  42. packages/translate-en_hu-1_9/metadata.json +8 -0
  43. packages/translate-en_hu-1_9/model/config.json +10 -0
  44. packages/translate-en_hu-1_9/model/shared_vocabulary.json +0 -0
  45. packages/translate-en_hu-1_9/stanza/resources.json +0 -0
  46. packages/translate-en_lv-1_9/README.md +9 -0
  47. packages/translate-en_lv-1_9/metadata.json +1 -0
  48. packages/translate-en_lv-1_9/model/config.json +9 -0
  49. packages/translate-en_lv-1_9/model/shared_vocabulary.json +0 -0
  50. packages/translate-en_lv-1_9/stanza/resources.json +0 -0
packages/en_es/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # English-Spanish
2
+
3
+ Trained on [OpenSubtitles](opus.nlpl.eu/OpenSubtitles.php), [ParaCrawl](http://opus.nlpl.eu/ParaCrawl.php), and [UNPC](http://opus.nlpl.eu/UNPC.php) parallel corpuses compiled by [Opus](http://opus.nlpl.eu/index.php)
4
+
5
+ Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/blob/master/LICENSE).
6
+
packages/ja_en/README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # English-Japanese
2
+
3
+ Data compiled by [Opus](https://opus.nlpl.eu/).
4
+
5
+ Dictionary data from Wiktionary using [Wiktextract](https://github.com/tatuylonen/wiktextract).
6
+
7
+ Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/).
8
+
9
+ Credits:
10
+ @inproceedings{elkishky_ccaligned_2020,
11
+ author = {El-Kishky, Ahmed and Chaudhary, Vishrav and Guzmán, Francisco and Koehn, Philipp},
12
+ booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)},
13
+ month = {November},
14
+ title = {{CCAligned}: A Massive Collection of Cross-lingual Web-Document Pairs},
15
+ year = {2020}
16
+ address = "Online",
17
+ publisher = "Association for Computational Linguistics",
18
+ url = "https://www.aclweb.org/anthology/2020.emnlp-main.480",
19
+ doi = "10.18653/v1/2020.emnlp-main.480",
20
+ pages = "5960--5969"
21
+ }
22
+
23
+ Holger Schwenk, Vishrav Chaudhary, Shuo Sun, Hongyu Gong and Paco Guzman, WikiMatrix: Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia, arXiv, July 11 2019.
24
+
25
+ @inproceedings{elkishky_ccaligned_2020,
26
+ author = {El-Kishky, Ahmed and Chaudhary, Vishrav and Guzmán, Francisco and Koehn, Philipp},
27
+ booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP 2020)},
28
+ month = {November},
29
+ title = {{CCAligned}: A Massive Collection of Cross-lingual Web-Document Pairs},
30
+ year = {2020}
31
+ address = "Online",
32
+ publisher = "Association for Computational Linguistics",
33
+ url = "https://www.aclweb.org/anthology/2020.emnlp-main.480",
34
+ doi = "10.18653/v1/2020.emnlp-main.480",
35
+ pages = "5960--5969"
36
+ }
37
+
38
+ P. Lison and J. Tiedemann, 2016, OpenSubtitles2016: Extracting Large Parallel Corpora from Movie and TV Subtitles. In Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016)
packages/ja_en/metadata.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "package_version": "1.1",
3
+ "argos_version": "1.1",
4
+ "from_code": "ja",
5
+ "from_name": "Japanese",
6
+ "to_code": "en",
7
+ "to_name": "English"
8
+ }
packages/ja_en/model/shared_vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff
 
packages/ja_en/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-bn_en-1_9/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Bengali - English version 1.9
2
+
3
+ Authors: Jörg Tiedemann and Santhosh Thottingal
4
+ Title: "OPUS-MT — Building open translation services for the World"
5
+ Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
6
+ Year: 2020
7
+ Location: Lisbon, Portugal
8
+
9
+ The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0
packages/translate-bn_en-1_9/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"package_version": "1.9", "argos_version": "1.9.0", "from_code": "bn", "from_name": "Bengali", "to_code": "en", "to_name": "English"}
packages/translate-bn_en-1_9/model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
packages/translate-bn_en-1_9/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-bn_en-1_9/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-el_en-1_9/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Greek - English version 1.9
2
+
3
+ Authors: Jörg Tiedemann and Santhosh Thottingal
4
+ Title: "OPUS-MT — Building open translation services for the World"
5
+ Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
6
+ Year: 2020
7
+ Location: Lisbon, Portugal
8
+
9
+ The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0
packages/translate-el_en-1_9/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"package_version": "1.9", "argos_version": "1.9.0", "from_code": "el", "from_name": "Greek", "to_code": "en", "to_name": "English"}
packages/translate-el_en-1_9/model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
packages/translate-el_en-1_9/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-el_en-1_9/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_ca-1_9/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # English - Catalan version 1.9
2
+
3
+ Authors: Jörg Tiedemann and Santhosh Thottingal
4
+ Title: "OPUS-MT — Building open translation services for the World"
5
+ Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
6
+ Year: 2020
7
+ Location: Lisbon, Portugal
8
+
9
+ The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0
10
+
packages/translate-en_ca-1_9/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"package_version": "1.9", "argos_version": "1.9.0", "from_code": "en", "from_name": "English", "to_code": "ca", "to_name": "Catalan"}
packages/translate-en_ca-1_9/model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
packages/translate-en_ca-1_9/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_ca-1_9/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_cs-1_9_6/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # English - Czech version 1.0
2
+
3
+ Authors: Jörg Tiedemann and Santhosh Thottingal
4
+ Title: "OPUS-MT — Building open translation services for the World"
5
+ Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
6
+ Year: 2020
7
+ Location: Lisbon, Portugal
8
+
9
+ The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0
packages/translate-en_cs-1_9_6/metadata.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "package_version": "1.9.6",
3
+ "argos_version": "1.9.0",
4
+ "from_code": "en",
5
+ "from_name": "English",
6
+ "to_code": "cs",
7
+ "to_name": "Czech"
8
+ }
packages/translate-en_cs-1_9_6/model/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "multi_query_attention": false,
9
+ "unk_token": "<unk>"
10
+ }
packages/translate-en_cs-1_9_6/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_cs-1_9_6/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_el-1_9/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # English - Greek version 1.9
2
+
3
+ Authors: Jörg Tiedemann and Santhosh Thottingal
4
+ Title: "OPUS-MT — Building open translation services for the World"
5
+ Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
6
+ Year: 2020
7
+ Location: Lisbon, Portugal
8
+
9
+ The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0
packages/translate-en_el-1_9/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"package_version": "1.9", "argos_version": "1.9.0", "from_code": "en", "from_name": "English", "to_code": "el", "to_name": "Greek"}
packages/translate-en_el-1_9/model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
packages/translate-en_el-1_9/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_el-1_9/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_eu-1_9/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # English-Basque
2
+
3
+ Data compiled by [Opus](https://opus.nlpl.eu/).
4
+
5
+ Dictionary data from Wiktionary using [Wiktextract](https://github.com/tatuylonen/wiktextract).
6
+
7
+ Includes pretrained models from [Stanza](https://github.com/stanfordnlp/stanza/).
8
+
9
+ Credits:
packages/translate-en_eu-1_9/metadata.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "package_version": "1.9",
3
+ "argos_version": "1.5",
4
+ "from_code": "en",
5
+ "from_name": "English",
6
+ "to_code": "eu",
7
+ "to_name": "Basque"
8
+ }
packages/translate-en_eu-1_9/model/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "multi_query_attention": false,
9
+ "unk_token": "<unk>"
10
+ }
packages/translate-en_eu-1_9/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_eu-1_9/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_fr-1_9/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # English - French version 1.0
2
+
3
+ Authors: Jörg Tiedemann and Santhosh Thottingal
4
+ Title: "OPUS-MT — Building open translation services for the World"
5
+ Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
6
+ Year: 2020
7
+ Location: Lisbon, Portugal
8
+
9
+ The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0
packages/translate-en_fr-1_9/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"package_version": "1.9", "argos_version": "1.9.0", "from_code": "en", "from_name": "English", "to_code": "fr", "to_name": "French"}
packages/translate-en_fr-1_9/model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
packages/translate-en_fr-1_9/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_fr-1_9/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_hu-1_9/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # English - Hungarian version 1.0
2
+
3
+ Authors: Jörg Tiedemann and Santhosh Thottingal
4
+ Title: "OPUS-MT — Building open translation services for the World"
5
+ Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
6
+ Year: 2020
7
+ Location: Lisbon, Portugal
8
+
9
+ The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0
packages/translate-en_hu-1_9/metadata.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "package_version": "1.9",
3
+ "argos_version": "1.9.0",
4
+ "from_code": "en",
5
+ "from_name": "English",
6
+ "to_code": "hu",
7
+ "to_name": "Hungarian"
8
+ }
packages/translate-en_hu-1_9/model/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": true,
4
+ "bos_token": "<s>",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "multi_query_attention": false,
9
+ "unk_token": "<unk>"
10
+ }
packages/translate-en_hu-1_9/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_hu-1_9/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_lv-1_9/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # English - Latvian version 1.9
2
+
3
+ Authors: Jörg Tiedemann and Santhosh Thottingal
4
+ Title: "OPUS-MT — Building open translation services for the World"
5
+ Book Title: Proceedings of the 22nd Annual Conference of the European Association for Machine Translation (EAMT)
6
+ Year: 2020
7
+ Location: Lisbon, Portugal
8
+
9
+ The original OPUS model from which this packaged model is derived is licensed CC-BY 4.0
packages/translate-en_lv-1_9/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"package_version": "1.9", "argos_version": "1.9.0", "from_code": "en", "from_name": "English", "to_code": "lv", "to_name": "Latvian"}
packages/translate-en_lv-1_9/model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": true,
3
+ "add_source_eos": true,
4
+ "bos_token": ">>lav<<",
5
+ "decoder_start_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
packages/translate-en_lv-1_9/model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
packages/translate-en_lv-1_9/stanza/resources.json ADDED
The diff for this file is too large to render. See raw diff