Upload 4 files
Browse files
trans_fairseq/scripts/run1.sh
CHANGED
|
@@ -37,6 +37,7 @@ num_workers=8
|
|
| 37 |
|
| 38 |
# specified param
|
| 39 |
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
|
|
|
| 40 |
arch=my_transformer_vaswani_wmt_en_de_big
|
| 41 |
use_rmsnorm=0
|
| 42 |
use_llama_ffn=0
|
|
|
|
| 37 |
|
| 38 |
# specified param
|
| 39 |
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
| 40 |
+
conf_name=my_transformer_big
|
| 41 |
arch=my_transformer_vaswani_wmt_en_de_big
|
| 42 |
use_rmsnorm=0
|
| 43 |
use_llama_ffn=0
|
trans_fairseq/scripts/run2.sh
CHANGED
|
@@ -37,6 +37,7 @@ num_workers=8
|
|
| 37 |
|
| 38 |
# specified param
|
| 39 |
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
|
|
|
| 40 |
arch=my_transformer_vaswani_wmt_en_de_big
|
| 41 |
use_rmsnorm=0
|
| 42 |
use_llama_ffn=1
|
|
|
|
| 37 |
|
| 38 |
# specified param
|
| 39 |
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
| 40 |
+
conf_name=my_transformer_big
|
| 41 |
arch=my_transformer_vaswani_wmt_en_de_big
|
| 42 |
use_rmsnorm=0
|
| 43 |
use_llama_ffn=1
|
trans_fairseq/scripts/run3.sh
CHANGED
|
@@ -37,6 +37,7 @@ num_workers=8
|
|
| 37 |
|
| 38 |
# specified param
|
| 39 |
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
|
|
|
| 40 |
arch=my_transformer_vaswani_wmt_en_de_big
|
| 41 |
use_rmsnorm=1
|
| 42 |
use_llama_ffn=0
|
|
|
|
| 37 |
|
| 38 |
# specified param
|
| 39 |
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
| 40 |
+
conf_name=my_transformer_big
|
| 41 |
arch=my_transformer_vaswani_wmt_en_de_big
|
| 42 |
use_rmsnorm=1
|
| 43 |
use_llama_ffn=0
|
trans_fairseq/scripts/run4.sh
CHANGED
|
@@ -37,6 +37,7 @@ num_workers=8
|
|
| 37 |
|
| 38 |
# specified param
|
| 39 |
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
|
|
|
| 40 |
arch=my_transformer_vaswani_wmt_en_de_big
|
| 41 |
use_rmsnorm=1
|
| 42 |
use_llama_ffn=1
|
|
|
|
| 37 |
|
| 38 |
# specified param
|
| 39 |
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
| 40 |
+
conf_name=my_transformer_big
|
| 41 |
arch=my_transformer_vaswani_wmt_en_de_big
|
| 42 |
use_rmsnorm=1
|
| 43 |
use_llama_ffn=1
|