Update eole-config.yaml
Browse filesShow huggingface datasets rather than local files
- eole-config.yaml +15 -14
eole-config.yaml
CHANGED
|
@@ -8,8 +8,8 @@ tensorboard: true
|
|
| 8 |
tensorboard_log_dir: tensorboard
|
| 9 |
|
| 10 |
### Vocab
|
| 11 |
-
src_vocab:
|
| 12 |
-
tgt_vocab:
|
| 13 |
src_vocab_size: 32000
|
| 14 |
tgt_vocab_size: 32000
|
| 15 |
vocab_size_multiple: 8
|
|
@@ -18,26 +18,29 @@ n_sample: 0
|
|
| 18 |
|
| 19 |
data:
|
| 20 |
corpus_1:
|
| 21 |
-
path_src:
|
| 22 |
-
path_tgt:
|
|
|
|
| 23 |
weight: 2
|
| 24 |
corpus_2:
|
| 25 |
-
path_src: /
|
| 26 |
-
path_tgt: /
|
|
|
|
| 27 |
weight: 1
|
| 28 |
corpus_3:
|
| 29 |
-
path_src: /
|
| 30 |
-
path_tgt: /
|
|
|
|
| 31 |
weight: 2
|
| 32 |
valid:
|
| 33 |
-
path_src:
|
| 34 |
-
path_tgt:
|
| 35 |
|
| 36 |
transforms: [sentencepiece, filtertoolong]
|
| 37 |
transforms_configs:
|
| 38 |
sentencepiece:
|
| 39 |
-
src_subword_model: "
|
| 40 |
-
tgt_subword_model: "
|
| 41 |
filtertoolong:
|
| 42 |
src_seq_length: 256
|
| 43 |
tgt_seq_length: 256
|
|
@@ -55,7 +58,6 @@ training:
|
|
| 55 |
gpu_ranks: [0]
|
| 56 |
|
| 57 |
# Batching 120,000 tokens
|
| 58 |
-
# For RTX 5090, 15000 batch size, accum_count 8
|
| 59 |
batch_type: "tokens"
|
| 60 |
batch_size: 6000
|
| 61 |
valid_batch_size: 2048
|
|
@@ -66,7 +68,6 @@ training:
|
|
| 66 |
# Optimizer & Compute
|
| 67 |
compute_dtype: "fp16"
|
| 68 |
optim: "adamw"
|
| 69 |
-
#use_amp: True
|
| 70 |
learning_rate: 3.0
|
| 71 |
warmup_steps: 5000
|
| 72 |
decay_method: "noam"
|
|
|
|
| 8 |
tensorboard_log_dir: tensorboard
|
| 9 |
|
| 10 |
### Vocab
|
| 11 |
+
src_vocab: fa.eole.vocab
|
| 12 |
+
tgt_vocab: en.eole.vocab
|
| 13 |
src_vocab_size: 32000
|
| 14 |
tgt_vocab_size: 32000
|
| 15 |
vocab_size_multiple: 8
|
|
|
|
| 18 |
|
| 19 |
data:
|
| 20 |
corpus_1:
|
| 21 |
+
path_src: hf://quickmt/quickmt-train.fa-en/fa
|
| 22 |
+
path_tgt: hf://quickmt/quickmt-train.fa-en/en
|
| 23 |
+
path_sco: hf://quickmt/quickmt-train.fa-en/sco
|
| 24 |
weight: 2
|
| 25 |
corpus_2:
|
| 26 |
+
path_src: hf://quickmt/newscrawl2024-en-backtranslated-fa/fa
|
| 27 |
+
path_tgt: hf://quickmt/newscrawl2024-en-backtranslated-fa/en
|
| 28 |
+
path_sco: hf://quickmt/newscrawl2024-en-backtranslated-fa/sco
|
| 29 |
weight: 1
|
| 30 |
corpus_3:
|
| 31 |
+
path_src: hf://quickmt/madlad400-en-backtranslated-fa/fa
|
| 32 |
+
path_tgt: hf://quickmt/madlad400-en-backtranslated-fa/en
|
| 33 |
+
path_sco: hf://quickmt/madlad400-en-backtranslated-fa/sco
|
| 34 |
weight: 2
|
| 35 |
valid:
|
| 36 |
+
path_src: dev.fa
|
| 37 |
+
path_tgt: dev.en
|
| 38 |
|
| 39 |
transforms: [sentencepiece, filtertoolong]
|
| 40 |
transforms_configs:
|
| 41 |
sentencepiece:
|
| 42 |
+
src_subword_model: "fa.spm.model"
|
| 43 |
+
tgt_subword_model: "en.spm.model"
|
| 44 |
filtertoolong:
|
| 45 |
src_seq_length: 256
|
| 46 |
tgt_seq_length: 256
|
|
|
|
| 58 |
gpu_ranks: [0]
|
| 59 |
|
| 60 |
# Batching 120,000 tokens
|
|
|
|
| 61 |
batch_type: "tokens"
|
| 62 |
batch_size: 6000
|
| 63 |
valid_batch_size: 2048
|
|
|
|
| 68 |
# Optimizer & Compute
|
| 69 |
compute_dtype: "fp16"
|
| 70 |
optim: "adamw"
|
|
|
|
| 71 |
learning_rate: 3.0
|
| 72 |
warmup_steps: 5000
|
| 73 |
decay_method: "noam"
|