remove comments and replace original paths with generic paths
Browse files- bpe-es-gl_emb.yaml +33 -36
bpe-es-gl_emb.yaml
CHANGED
|
@@ -8,60 +8,60 @@ overwrite: True
|
|
| 8 |
# Corpus opts:
|
| 9 |
data:
|
| 10 |
europarl:
|
| 11 |
-
path_src:
|
| 12 |
-
path_tgt:
|
| 13 |
transforms: [bpe, filtertoolong]
|
| 14 |
-
weight: 120
|
| 15 |
opensub:
|
| 16 |
-
path_src:
|
| 17 |
-
path_tgt:
|
| 18 |
transforms: [bpe, filtertoolong]
|
| 19 |
-
weight: 180
|
| 20 |
dgt:
|
| 21 |
-
path_src:
|
| 22 |
-
path_tgt:
|
| 23 |
transforms: [bpe, filtertoolong]
|
| 24 |
-
weight: 18
|
| 25 |
cluvi:
|
| 26 |
-
path_src:
|
| 27 |
-
path_tgt:
|
| 28 |
transforms: [bpe, filtertoolong]
|
| 29 |
-
weight: 40
|
| 30 |
opensub-es-gl:
|
| 31 |
-
path_src:
|
| 32 |
-
path_tgt:
|
| 33 |
transforms: [bpe, filtertoolong]
|
| 34 |
-
weight: 25
|
| 35 |
ted2020:
|
| 36 |
-
path_src:
|
| 37 |
-
path_tgt:
|
| 38 |
transforms: [bpe, filtertoolong]
|
| 39 |
-
weight: 10
|
| 40 |
corgaback:
|
| 41 |
-
path_src:
|
| 42 |
-
path_tgt:
|
| 43 |
transforms: [bpe, filtertoolong]
|
| 44 |
-
weight: 13
|
| 45 |
ccmatrix:
|
| 46 |
-
path_src:
|
| 47 |
-
path_tgt:
|
| 48 |
transforms: [bpe, filtertoolong]
|
| 49 |
-
weight: 180
|
| 50 |
resto:
|
| 51 |
-
path_src:
|
| 52 |
-
path_tgt:
|
| 53 |
transforms: [bpe, filtertoolong]
|
| 54 |
-
weight: 120
|
| 55 |
opensub_2018:
|
| 56 |
-
path_src:
|
| 57 |
-
path_tgt:
|
| 58 |
transforms: [bpe, filtertoolong]
|
| 59 |
-
weight: 25
|
| 60 |
|
| 61 |
|
| 62 |
valid:
|
| 63 |
-
path_src:
|
| 64 |
-
path_tgt:
|
| 65 |
transforms: [bpe, filtertoolong]
|
| 66 |
|
| 67 |
### Transform related opts:
|
|
@@ -70,8 +70,6 @@ src_subword_model: ./bpe/es.code
|
|
| 70 |
tgt_subword_model: ./bpe/gl.code
|
| 71 |
src_subword_vocab: ./run/vocab/es-gl/bpe.vocab.src
|
| 72 |
tgt_subword_vocab: ./run/vocab/es-gl/bpe.vocab.tgt
|
| 73 |
-
#src_subword_model: ../sentencepiece/en-gl/en.sp.model
|
| 74 |
-
#tgt_subword_model: ../sentencepiece/en-gl/gl.sp.model
|
| 75 |
src_subword_type: bpe
|
| 76 |
tgt_subord_type: bpe
|
| 77 |
|
|
@@ -88,7 +86,7 @@ tgt_embeddings: ../embeddings/gl.emb.txt
|
|
| 88 |
embeddings_type: "word2vec"
|
| 89 |
|
| 90 |
# word_vec_size need to match with the pretrained embeddings dimensions
|
| 91 |
-
word_vec_size:
|
| 92 |
|
| 93 |
|
| 94 |
#### Filter
|
|
@@ -146,7 +144,6 @@ enc_layers: 6
|
|
| 146 |
dec_layers: 6
|
| 147 |
heads: 8
|
| 148 |
rnn_size: 512
|
| 149 |
-
word_vec_size: 512
|
| 150 |
transformer_ff: 2048
|
| 151 |
dropout_steps: [0]
|
| 152 |
dropout: [0.1]
|
|
|
|
| 8 |
# Corpus opts:
|
| 9 |
data:
|
| 10 |
europarl:
|
| 11 |
+
path_src: corpora/europarl/partitions/es_train.txt
|
| 12 |
+
path_tgt: corpora/europarl_translit/partitions/gl_train.txt
|
| 13 |
transforms: [bpe, filtertoolong]
|
| 14 |
+
weight: 120
|
| 15 |
opensub:
|
| 16 |
+
path_src: corpora/opensub/partitions/es_train.txt
|
| 17 |
+
path_tgt: corpora/opensub_translit/partitions/gl_train.txt
|
| 18 |
transforms: [bpe, filtertoolong]
|
| 19 |
+
weight: 180
|
| 20 |
dgt:
|
| 21 |
+
path_src: corpora/dgt/partitions/es_train.txt
|
| 22 |
+
path_tgt: corpora/dgt_translit/partitions/gl_train.txt
|
| 23 |
transforms: [bpe, filtertoolong]
|
| 24 |
+
weight: 18
|
| 25 |
cluvi:
|
| 26 |
+
path_src: corpora/cluvi/partitions/es_train.txt
|
| 27 |
+
path_tgt: corpora/cluvi/partitions/gl_train.txt
|
| 28 |
transforms: [bpe, filtertoolong]
|
| 29 |
+
weight: 40
|
| 30 |
opensub-es-gl:
|
| 31 |
+
path_src: corpora/opensub-es-gl/partitions/es_train.txt
|
| 32 |
+
path_tgt: corpora/opensub-es-gl/partitions/gl_train.txt
|
| 33 |
transforms: [bpe, filtertoolong]
|
| 34 |
+
weight: 25
|
| 35 |
ted2020:
|
| 36 |
+
path_src: corpora/ted2020/partitions/es_train.txt
|
| 37 |
+
path_tgt: corpora/ted2020/partitions/gl_train.txt
|
| 38 |
transforms: [bpe, filtertoolong]
|
| 39 |
+
weight: 10
|
| 40 |
corgaback:
|
| 41 |
+
path_src: corpora/corgaback/partitions/es_train.txt
|
| 42 |
+
path_tgt: corpora/corgaback/partitions/gl_train.txt
|
| 43 |
transforms: [bpe, filtertoolong]
|
| 44 |
+
weight: 13
|
| 45 |
ccmatrix:
|
| 46 |
+
path_src: corpora/ccmatrix/es.txt
|
| 47 |
+
path_tgt: corpora/ccmatrix/gl.txt
|
| 48 |
transforms: [bpe, filtertoolong]
|
| 49 |
+
weight: 180
|
| 50 |
resto:
|
| 51 |
+
path_src: corpora/resto/es.txt
|
| 52 |
+
path_tgt: corpora/resto/gl.txt
|
| 53 |
transforms: [bpe, filtertoolong]
|
| 54 |
+
weight: 120
|
| 55 |
opensub_2018:
|
| 56 |
+
path_src: corpora/opensub_2018/es.txt
|
| 57 |
+
path_tgt: corpora/opensub_2018/gl.txt
|
| 58 |
transforms: [bpe, filtertoolong]
|
| 59 |
+
weight: 25
|
| 60 |
|
| 61 |
|
| 62 |
valid:
|
| 63 |
+
path_src: corpora/partitions/all-es_valid.txt
|
| 64 |
+
path_tgt: corpora/partitions_translit/all-gl_valid.txt
|
| 65 |
transforms: [bpe, filtertoolong]
|
| 66 |
|
| 67 |
### Transform related opts:
|
|
|
|
| 70 |
tgt_subword_model: ./bpe/gl.code
|
| 71 |
src_subword_vocab: ./run/vocab/es-gl/bpe.vocab.src
|
| 72 |
tgt_subword_vocab: ./run/vocab/es-gl/bpe.vocab.tgt
|
|
|
|
|
|
|
| 73 |
src_subword_type: bpe
|
| 74 |
tgt_subord_type: bpe
|
| 75 |
|
|
|
|
| 86 |
embeddings_type: "word2vec"
|
| 87 |
|
| 88 |
# word_vec_size need to match with the pretrained embeddings dimensions
|
| 89 |
+
word_vec_size: 512
|
| 90 |
|
| 91 |
|
| 92 |
#### Filter
|
|
|
|
| 144 |
dec_layers: 6
|
| 145 |
heads: 8
|
| 146 |
rnn_size: 512
|
|
|
|
| 147 |
transformer_ff: 2048
|
| 148 |
dropout_steps: [0]
|
| 149 |
dropout: [0.1]
|