sleepyhead111 commited on
Commit
2bc19e9
·
verified ·
1 Parent(s): f094e3c

Upload 4 files

Browse files
trans_fairseq/scripts/run1.sh CHANGED
@@ -37,6 +37,7 @@ num_workers=8
37
 
38
  # specified param
39
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
 
40
  arch=my_transformer_vaswani_wmt_en_de_big
41
  use_rmsnorm=0
42
  use_llama_ffn=0
 
37
 
38
  # specified param
39
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
40
+ conf_name=my_transformer_big
41
  arch=my_transformer_vaswani_wmt_en_de_big
42
  use_rmsnorm=0
43
  use_llama_ffn=0
trans_fairseq/scripts/run2.sh CHANGED
@@ -37,6 +37,7 @@ num_workers=8
37
 
38
  # specified param
39
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
 
40
  arch=my_transformer_vaswani_wmt_en_de_big
41
  use_rmsnorm=0
42
  use_llama_ffn=1
 
37
 
38
  # specified param
39
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
40
+ conf_name=my_transformer_big
41
  arch=my_transformer_vaswani_wmt_en_de_big
42
  use_rmsnorm=0
43
  use_llama_ffn=1
trans_fairseq/scripts/run3.sh CHANGED
@@ -37,6 +37,7 @@ num_workers=8
37
 
38
  # specified param
39
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
 
40
  arch=my_transformer_vaswani_wmt_en_de_big
41
  use_rmsnorm=1
42
  use_llama_ffn=0
 
37
 
38
  # specified param
39
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
40
+ conf_name=my_transformer_big
41
  arch=my_transformer_vaswani_wmt_en_de_big
42
  use_rmsnorm=1
43
  use_llama_ffn=0
trans_fairseq/scripts/run4.sh CHANGED
@@ -37,6 +37,7 @@ num_workers=8
37
 
38
  # specified param
39
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
 
40
  arch=my_transformer_vaswani_wmt_en_de_big
41
  use_rmsnorm=1
42
  use_llama_ffn=1
 
37
 
38
  # specified param
39
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
40
+ conf_name=my_transformer_big
41
  arch=my_transformer_vaswani_wmt_en_de_big
42
  use_rmsnorm=1
43
  use_llama_ffn=1