sleepyhead111 commited on
Commit
a8b45d3
·
verified ·
1 Parent(s): 55184cc

Upload trans_fairseq/scripts/run.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. trans_fairseq/scripts/run.sh +19 -18
trans_fairseq/scripts/run.sh CHANGED
@@ -1,8 +1,8 @@
1
  #! /usr/bin/bash
2
  set -eux
3
 
4
- train_device=1,2
5
- eval_device=2
6
  # xzq-fairseq
7
  root_dir=$(dirname "$PWD")
8
 
@@ -18,7 +18,7 @@ trainable_data_dir=$data_dir/trainable_data
18
  user_dir=$root_dir/my_dir
19
 
20
  ## eval&decode param
21
- decode_max_tokens=1024
22
  beam=5
23
  nbest=1
24
  lenpen=1.0
@@ -28,44 +28,44 @@ criterion=label_smoothed_cross_entropy
28
  label_smoothing=0.1
29
  seed=42
30
  max_epoch=40
31
- keep_last_epochs=5
32
- keep_best_checkpoints=3
33
  patience=5
34
  num_workers=8
35
 
36
  # specified param
37
- conf_name=transformer_base
38
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
39
- if [ $conf_name == "transformer_base" ]; then
40
- arch=transformer
41
  use_rmsnorm=0
42
  use_llama_ffn=0
43
  use_rope=0
44
  activation_fn=relu
45
- encoder_ffn_embed_dim=2048
46
  share_all_embeddings=1
47
  share_decoder_input_output_embed=1
48
  learing_rate=1e-3
49
  warmup=4000
50
  max_tokens=8192
51
  weight_decay=0.0
52
- dropout=0.1
53
- gradient_accumulation_steps=3
54
- elif [ $conf_name == "my_transformer_base" ]; then
55
- arch=my_transformer
56
  use_rmsnorm=1
57
  use_llama_ffn=1
58
  use_rope=1
59
  activation_fn=swish
60
- encoder_ffn_embed_dim=1366
61
  share_all_embeddings=1
62
  share_decoder_input_output_embed=1
63
  learing_rate=1e-3
64
  warmup=4000
65
  max_tokens=8192
66
  weight_decay=0.0
67
- dropout=0.1
68
- gradient_accumulation_steps=3
69
  else
70
  echo "unknown conf_name=$conf_name"
71
  exit
@@ -101,7 +101,6 @@ cmd="fairseq-train $trainable_data_dir \
101
  --dropout $dropout \
102
  --criterion $criterion --label-smoothing $label_smoothing \
103
  --max-epoch $max_epoch \
104
- --keep-last-epochs $keep_last_epochs \
105
  --max-tokens $max_tokens \
106
  --update-freq $gradient_accumulation_steps \
107
  --user-dir $user_dir \
@@ -109,10 +108,12 @@ cmd="fairseq-train $trainable_data_dir \
109
  --encoder-ffn-embed-dim $encoder_ffn_embed_dim \
110
  --seed $seed \
111
  --num-workers $num_workers \
 
 
112
  --keep-best-checkpoints $keep_best_checkpoints \
113
  --patience $patience \
114
  --no-progress-bar \
115
- --log-interval 50 \
116
  --task "translation" \
117
  --ddp-backend no_c10d \
118
  --save-dir $model_dir \
 
1
  #! /usr/bin/bash
2
  set -eux
3
 
4
+ train_device=0,1,2,3,4,5,6,7
5
+ eval_device=0
6
  # xzq-fairseq
7
  root_dir=$(dirname "$PWD")
8
 
 
18
  user_dir=$root_dir/my_dir
19
 
20
  ## eval&decode param
21
+ decode_max_tokens=4096
22
  beam=5
23
  nbest=1
24
  lenpen=1.0
 
28
  label_smoothing=0.1
29
  seed=42
30
  max_epoch=40
31
+ keep_last_epochs=1
32
+ keep_best_checkpoints=5
33
  patience=5
34
  num_workers=8
35
 
36
  # specified param
37
+ conf_name=transformer_big
38
  # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
39
+ if [ $conf_name == "transformer_big" ]; then
40
+ arch=transformer_vaswani_wmt_en_de_big
41
  use_rmsnorm=0
42
  use_llama_ffn=0
43
  use_rope=0
44
  activation_fn=relu
45
+ encoder_ffn_embed_dim=4096
46
  share_all_embeddings=1
47
  share_decoder_input_output_embed=1
48
  learing_rate=1e-3
49
  warmup=4000
50
  max_tokens=8192
51
  weight_decay=0.0
52
+ dropout=0.3
53
+ gradient_accumulation_steps=4
54
+ elif [ $conf_name == "my_transformer_big" ]; then
55
+ arch=my_transformer_vaswani_wmt_en_de_big
56
  use_rmsnorm=1
57
  use_llama_ffn=1
58
  use_rope=1
59
  activation_fn=swish
60
+ encoder_ffn_embed_dim=2732
61
  share_all_embeddings=1
62
  share_decoder_input_output_embed=1
63
  learing_rate=1e-3
64
  warmup=4000
65
  max_tokens=8192
66
  weight_decay=0.0
67
+ dropout=0.3
68
+ gradient_accumulation_steps=4
69
  else
70
  echo "unknown conf_name=$conf_name"
71
  exit
 
101
  --dropout $dropout \
102
  --criterion $criterion --label-smoothing $label_smoothing \
103
  --max-epoch $max_epoch \
 
104
  --max-tokens $max_tokens \
105
  --update-freq $gradient_accumulation_steps \
106
  --user-dir $user_dir \
 
108
  --encoder-ffn-embed-dim $encoder_ffn_embed_dim \
109
  --seed $seed \
110
  --num-workers $num_workers \
111
+ --no-epoch-checkpoints \
112
+ --keep-last-epochs $keep_last_epochs \
113
  --keep-best-checkpoints $keep_best_checkpoints \
114
  --patience $patience \
115
  --no-progress-bar \
116
+ --log-interval 100 \
117
  --task "translation" \
118
  --ddp-backend no_c10d \
119
  --save-dir $model_dir \