sleepyhead111 commited on
Commit
44fb854
·
verified ·
1 Parent(s): af51894

Delete trans_fairseq/scripts/run23.sh

Browse files
Files changed (1) hide show
  1. trans_fairseq/scripts/run23.sh +0 -179
trans_fairseq/scripts/run23.sh DELETED
@@ -1,179 +0,0 @@
1
- #! /usr/bin/bash
2
- set -eux
3
-
4
- ##### FFN + RMSNorm
5
-
6
- train_device=0,1,2,3,4,5,6,7
7
- eval_device=0
8
- # xzq-fairseq
9
- root_dir=$(dirname "$PWD")
10
-
11
- src_lang=${1:-"de"}
12
- tgt_lang=${2:-"en"}
13
-
14
- data_name=wmt23
15
- pair_lang=${src_lang}-${tgt_lang}
16
- task_name=${src_lang}2${tgt_lang}
17
- data_dir=$root_dir/data/$pair_lang/$data_name
18
- raw_data_dir=$data_dir/raw
19
- trainable_data_dir=$data_dir/trainable_data
20
- user_dir=$root_dir/my_dir
21
-
22
- ## eval&decode param
23
- decode_max_tokens=4096
24
- beam=5
25
- nbest=1
26
- lenpen=1.0
27
-
28
- ## common param
29
- criterion=label_smoothed_cross_entropy
30
- label_smoothing=0.1
31
- seed=42
32
- max_epoch=40
33
- keep_last_epochs=1
34
- keep_best_checkpoints=5
35
- patience=5
36
- num_workers=8
37
-
38
- # specified param
39
- # Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
40
- arch=my_transformer_vaswani_wmt_en_de_big
41
- use_rmsnorm=1
42
- use_llama_ffn=1
43
- use_rope=0
44
- activation_fn=swish
45
- encoder_ffn_embed_dim=2732
46
- share_all_embeddings=1
47
- share_decoder_input_output_embed=1
48
- learing_rate=1e-3
49
- warmup=4000
50
- max_tokens=8192
51
- weight_decay=0.0
52
- dropout=0.3
53
- gradient_accumulation_steps=4
54
-
55
- model_dir=$root_dir/exps/$task_name/${conf_name}_${data_name}
56
- # 根据 消融组件 更改 目录名
57
- if [ $use_rmsnorm -eq 1 ]; then
58
- model_dir=${model_dir}_norm
59
- fi
60
- if [ $use_llama_ffn -eq 1 ]; then
61
- model_dir=${model_dir}_ffn
62
- activation_fn=swish
63
- fi
64
- if [ $use_rope -eq 1 ] && [ $use_rmsnorm -eq 1 ] && [ $use_llama_ffn -eq 1 ]; then
65
- model_dir=$root_dir/exps/$task_name/${conf_name}_${data_name}_all_nosinpos
66
- elif [ $use_rope -eq 1 ]; then
67
- model_dir=${model_dir}_att_nosinpos
68
- fi
69
- mkdir -p $model_dir
70
- cp ${BASH_SOURCE[0]} $model_dir
71
-
72
- gpu_num=`echo "$train_device" | awk '{split($0,arr,",");print length(arr)}'`
73
- export CUDA_VISIBLE_DEVICES=$train_device
74
- cmd="fairseq-train $trainable_data_dir \
75
- --distributed-world-size $gpu_num -s $src_lang -t $tgt_lang \
76
- --arch $arch \
77
- --fp16 \
78
- --optimizer adam --clip-norm 0.0 \
79
- --lr-scheduler inverse_sqrt --warmup-init-lr 1e-07 --warmup-updates $warmup \
80
- --lr $learing_rate --adam-betas '(0.9, 0.98)' \
81
- --weight-decay $weight_decay \
82
- --dropout $dropout \
83
- --criterion $criterion --label-smoothing $label_smoothing \
84
- --max-epoch $max_epoch \
85
- --max-tokens $max_tokens \
86
- --update-freq $gradient_accumulation_steps \
87
- --user-dir $user_dir \
88
- --activation-fn $activation_fn \
89
- --encoder-ffn-embed-dim $encoder_ffn_embed_dim \
90
- --seed $seed \
91
- --num-workers $num_workers \
92
- --no-epoch-checkpoints \
93
- --keep-last-epochs $keep_last_epochs \
94
- --keep-best-checkpoints $keep_best_checkpoints \
95
- --patience $patience \
96
- --no-progress-bar \
97
- --log-interval 100 \
98
- --task "translation" \
99
- --ddp-backend no_c10d \
100
- --save-dir $model_dir \
101
- --tensorboard-logdir $model_dir"
102
-
103
- # add param
104
- if [ $share_all_embeddings -eq 1 ]; then
105
- cmd=${cmd}" --share-all-embeddings "
106
- fi
107
- if [ $share_decoder_input_output_embed -eq 1 ]; then
108
- cmd=${cmd}" --share-decoder-input-output-embed "
109
- fi
110
- if [ ${max_update:=0} -ne 0 ]; then
111
- cmd=${cmd}" --max-update $max_update"
112
- fi
113
- if [ $use_rmsnorm -eq 1 ]; then
114
- cmd=${cmd}" --use-rmsnorm "
115
- fi
116
- if [ $use_llama_ffn -eq 1 ]; then
117
- cmd=${cmd}" --use-llama-ffn "
118
- fi
119
- if [ $use_rope -eq 1 ]; then
120
- cmd=${cmd}" --use-rope --no-token-positional-embeddings "
121
- fi
122
-
123
- # run command
124
- cur_time=`date +"%Y-%m-%d %H:%M:%S"`
125
- echo "=============$cur_time===================" >> $model_dir/train.log
126
- cmd="nohup ${cmd} >> $model_dir/train.log 2>&1 &"
127
-
128
- eval $cmd
129
-
130
- wait
131
-
132
- ### decode
133
- checkpoint_path=$model_dir/checkpoint_best.pt
134
- save_dir=$model_dir/decode_result
135
-
136
- mkdir -p $save_dir
137
- cp ${BASH_SOURCE[0]} $save_dir
138
-
139
- declare -A gen_subset_dict
140
- gen_subset_dict=([test]=flores [test1]=wmt22 [test2]=wmt23)
141
- for gen_subset in ${!gen_subset_dict[*]}
142
- do
143
- decode_file=$save_dir/decode_${gen_subset_dict[$gen_subset]}_beam${beam}_lenpen${lenpen}.$tgt_lang
144
- pure_file=$save_dir/pure_decode_${gen_subset_dict[$gen_subset]}_beam${beam}_lenpen${lenpen}.$tgt_lang
145
-
146
- CUDA_VISIBLE_DEVICES=$eval_device fairseq-generate \
147
- $trainable_data_dir \
148
- -s $src_lang -t $tgt_lang \
149
- --user-dir $user_dir \
150
- --gen-subset $gen_subset \
151
- --path $checkpoint_path \
152
- --max-tokens $decode_max_tokens \
153
- --beam $beam \
154
- --nbest $nbest \
155
- --lenpen $lenpen \
156
- --seed $seed \
157
- --remove-bpe | tee $decode_file
158
-
159
- ### eval
160
- # purify file
161
- grep ^H $decode_file | LC_ALL=C sort -V | cut -f3- | perl $root_dir/mosesdecoder/scripts/tokenizer/detokenizer.perl -l $tgt_lang > $pure_file
162
-
163
- eval_file=$model_dir/eval_${gen_subset_dict[$gen_subset]}.log
164
- cur_time=`date +"%Y-%m-%d %H:%M:%S"`
165
- echo "=============$cur_time===================" >> $eval_file
166
- echo $checkpoint_path >> $eval_file
167
- tail -n1 $decode_file >> $eval_file # multi-bleu
168
- # get score
169
- src_file=$raw_data_dir/test.${gen_subset_dict[$gen_subset]}.$src_lang
170
- ref_file=$raw_data_dir/test.${gen_subset_dict[$gen_subset]}.$tgt_lang
171
- sacrebleu_file=$save_dir/sacrebleu.${gen_subset_dict[$gen_subset]}.beam${beam}_lenpen${lenpen}
172
- comet22_file=$save_dir/comet22.${gen_subset_dict[$gen_subset]}.beam${beam}_lenpen${lenpen}
173
- sacrebleu $ref_file -i $pure_file -w 2 >> $eval_file
174
- comet-score -s $src_file -t $pure_file -r $ref_file --model $root_dir/wmt22-comet-da/checkpoints/model.ckpt | tee $comet22_file
175
- echo "Comet22 Score" >> $eval_file
176
- tail -n1 $comet22_file >> $eval_file # 只取平均comet分
177
-
178
- echo -e "decode finished! \n decode tokenized file in $decode_file \n detokenized file in $pure_file \n sacrebleu file in $eval_file"
179
- done