Delete trans_fairseq/scripts/run23.sh
Browse files- trans_fairseq/scripts/run23.sh +0 -179
trans_fairseq/scripts/run23.sh
DELETED
|
@@ -1,179 +0,0 @@
|
|
| 1 |
-
#! /usr/bin/bash
|
| 2 |
-
set -eux
|
| 3 |
-
|
| 4 |
-
##### FFN + RMSNorm
|
| 5 |
-
|
| 6 |
-
train_device=0,1,2,3,4,5,6,7
|
| 7 |
-
eval_device=0
|
| 8 |
-
# xzq-fairseq
|
| 9 |
-
root_dir=$(dirname "$PWD")
|
| 10 |
-
|
| 11 |
-
src_lang=${1:-"de"}
|
| 12 |
-
tgt_lang=${2:-"en"}
|
| 13 |
-
|
| 14 |
-
data_name=wmt23
|
| 15 |
-
pair_lang=${src_lang}-${tgt_lang}
|
| 16 |
-
task_name=${src_lang}2${tgt_lang}
|
| 17 |
-
data_dir=$root_dir/data/$pair_lang/$data_name
|
| 18 |
-
raw_data_dir=$data_dir/raw
|
| 19 |
-
trainable_data_dir=$data_dir/trainable_data
|
| 20 |
-
user_dir=$root_dir/my_dir
|
| 21 |
-
|
| 22 |
-
## eval&decode param
|
| 23 |
-
decode_max_tokens=4096
|
| 24 |
-
beam=5
|
| 25 |
-
nbest=1
|
| 26 |
-
lenpen=1.0
|
| 27 |
-
|
| 28 |
-
## common param
|
| 29 |
-
criterion=label_smoothed_cross_entropy
|
| 30 |
-
label_smoothing=0.1
|
| 31 |
-
seed=42
|
| 32 |
-
max_epoch=40
|
| 33 |
-
keep_last_epochs=1
|
| 34 |
-
keep_best_checkpoints=5
|
| 35 |
-
patience=5
|
| 36 |
-
num_workers=8
|
| 37 |
-
|
| 38 |
-
# specified param
|
| 39 |
-
# Global Batch=卡数*max-tokens*梯度累计,对于训练数据较大的语种(train-set几十M),global batch在 100k tokens以上较好
|
| 40 |
-
arch=my_transformer_vaswani_wmt_en_de_big
|
| 41 |
-
use_rmsnorm=1
|
| 42 |
-
use_llama_ffn=1
|
| 43 |
-
use_rope=0
|
| 44 |
-
activation_fn=swish
|
| 45 |
-
encoder_ffn_embed_dim=2732
|
| 46 |
-
share_all_embeddings=1
|
| 47 |
-
share_decoder_input_output_embed=1
|
| 48 |
-
learing_rate=1e-3
|
| 49 |
-
warmup=4000
|
| 50 |
-
max_tokens=8192
|
| 51 |
-
weight_decay=0.0
|
| 52 |
-
dropout=0.3
|
| 53 |
-
gradient_accumulation_steps=4
|
| 54 |
-
|
| 55 |
-
model_dir=$root_dir/exps/$task_name/${conf_name}_${data_name}
|
| 56 |
-
# 根据 消融组件 更改 目录名
|
| 57 |
-
if [ $use_rmsnorm -eq 1 ]; then
|
| 58 |
-
model_dir=${model_dir}_norm
|
| 59 |
-
fi
|
| 60 |
-
if [ $use_llama_ffn -eq 1 ]; then
|
| 61 |
-
model_dir=${model_dir}_ffn
|
| 62 |
-
activation_fn=swish
|
| 63 |
-
fi
|
| 64 |
-
if [ $use_rope -eq 1 ] && [ $use_rmsnorm -eq 1 ] && [ $use_llama_ffn -eq 1 ]; then
|
| 65 |
-
model_dir=$root_dir/exps/$task_name/${conf_name}_${data_name}_all_nosinpos
|
| 66 |
-
elif [ $use_rope -eq 1 ]; then
|
| 67 |
-
model_dir=${model_dir}_att_nosinpos
|
| 68 |
-
fi
|
| 69 |
-
mkdir -p $model_dir
|
| 70 |
-
cp ${BASH_SOURCE[0]} $model_dir
|
| 71 |
-
|
| 72 |
-
gpu_num=`echo "$train_device" | awk '{split($0,arr,",");print length(arr)}'`
|
| 73 |
-
export CUDA_VISIBLE_DEVICES=$train_device
|
| 74 |
-
cmd="fairseq-train $trainable_data_dir \
|
| 75 |
-
--distributed-world-size $gpu_num -s $src_lang -t $tgt_lang \
|
| 76 |
-
--arch $arch \
|
| 77 |
-
--fp16 \
|
| 78 |
-
--optimizer adam --clip-norm 0.0 \
|
| 79 |
-
--lr-scheduler inverse_sqrt --warmup-init-lr 1e-07 --warmup-updates $warmup \
|
| 80 |
-
--lr $learing_rate --adam-betas '(0.9, 0.98)' \
|
| 81 |
-
--weight-decay $weight_decay \
|
| 82 |
-
--dropout $dropout \
|
| 83 |
-
--criterion $criterion --label-smoothing $label_smoothing \
|
| 84 |
-
--max-epoch $max_epoch \
|
| 85 |
-
--max-tokens $max_tokens \
|
| 86 |
-
--update-freq $gradient_accumulation_steps \
|
| 87 |
-
--user-dir $user_dir \
|
| 88 |
-
--activation-fn $activation_fn \
|
| 89 |
-
--encoder-ffn-embed-dim $encoder_ffn_embed_dim \
|
| 90 |
-
--seed $seed \
|
| 91 |
-
--num-workers $num_workers \
|
| 92 |
-
--no-epoch-checkpoints \
|
| 93 |
-
--keep-last-epochs $keep_last_epochs \
|
| 94 |
-
--keep-best-checkpoints $keep_best_checkpoints \
|
| 95 |
-
--patience $patience \
|
| 96 |
-
--no-progress-bar \
|
| 97 |
-
--log-interval 100 \
|
| 98 |
-
--task "translation" \
|
| 99 |
-
--ddp-backend no_c10d \
|
| 100 |
-
--save-dir $model_dir \
|
| 101 |
-
--tensorboard-logdir $model_dir"
|
| 102 |
-
|
| 103 |
-
# add param
|
| 104 |
-
if [ $share_all_embeddings -eq 1 ]; then
|
| 105 |
-
cmd=${cmd}" --share-all-embeddings "
|
| 106 |
-
fi
|
| 107 |
-
if [ $share_decoder_input_output_embed -eq 1 ]; then
|
| 108 |
-
cmd=${cmd}" --share-decoder-input-output-embed "
|
| 109 |
-
fi
|
| 110 |
-
if [ ${max_update:=0} -ne 0 ]; then
|
| 111 |
-
cmd=${cmd}" --max-update $max_update"
|
| 112 |
-
fi
|
| 113 |
-
if [ $use_rmsnorm -eq 1 ]; then
|
| 114 |
-
cmd=${cmd}" --use-rmsnorm "
|
| 115 |
-
fi
|
| 116 |
-
if [ $use_llama_ffn -eq 1 ]; then
|
| 117 |
-
cmd=${cmd}" --use-llama-ffn "
|
| 118 |
-
fi
|
| 119 |
-
if [ $use_rope -eq 1 ]; then
|
| 120 |
-
cmd=${cmd}" --use-rope --no-token-positional-embeddings "
|
| 121 |
-
fi
|
| 122 |
-
|
| 123 |
-
# run command
|
| 124 |
-
cur_time=`date +"%Y-%m-%d %H:%M:%S"`
|
| 125 |
-
echo "=============$cur_time===================" >> $model_dir/train.log
|
| 126 |
-
cmd="nohup ${cmd} >> $model_dir/train.log 2>&1 &"
|
| 127 |
-
|
| 128 |
-
eval $cmd
|
| 129 |
-
|
| 130 |
-
wait
|
| 131 |
-
|
| 132 |
-
### decode
|
| 133 |
-
checkpoint_path=$model_dir/checkpoint_best.pt
|
| 134 |
-
save_dir=$model_dir/decode_result
|
| 135 |
-
|
| 136 |
-
mkdir -p $save_dir
|
| 137 |
-
cp ${BASH_SOURCE[0]} $save_dir
|
| 138 |
-
|
| 139 |
-
declare -A gen_subset_dict
|
| 140 |
-
gen_subset_dict=([test]=flores [test1]=wmt22 [test2]=wmt23)
|
| 141 |
-
for gen_subset in ${!gen_subset_dict[*]}
|
| 142 |
-
do
|
| 143 |
-
decode_file=$save_dir/decode_${gen_subset_dict[$gen_subset]}_beam${beam}_lenpen${lenpen}.$tgt_lang
|
| 144 |
-
pure_file=$save_dir/pure_decode_${gen_subset_dict[$gen_subset]}_beam${beam}_lenpen${lenpen}.$tgt_lang
|
| 145 |
-
|
| 146 |
-
CUDA_VISIBLE_DEVICES=$eval_device fairseq-generate \
|
| 147 |
-
$trainable_data_dir \
|
| 148 |
-
-s $src_lang -t $tgt_lang \
|
| 149 |
-
--user-dir $user_dir \
|
| 150 |
-
--gen-subset $gen_subset \
|
| 151 |
-
--path $checkpoint_path \
|
| 152 |
-
--max-tokens $decode_max_tokens \
|
| 153 |
-
--beam $beam \
|
| 154 |
-
--nbest $nbest \
|
| 155 |
-
--lenpen $lenpen \
|
| 156 |
-
--seed $seed \
|
| 157 |
-
--remove-bpe | tee $decode_file
|
| 158 |
-
|
| 159 |
-
### eval
|
| 160 |
-
# purify file
|
| 161 |
-
grep ^H $decode_file | LC_ALL=C sort -V | cut -f3- | perl $root_dir/mosesdecoder/scripts/tokenizer/detokenizer.perl -l $tgt_lang > $pure_file
|
| 162 |
-
|
| 163 |
-
eval_file=$model_dir/eval_${gen_subset_dict[$gen_subset]}.log
|
| 164 |
-
cur_time=`date +"%Y-%m-%d %H:%M:%S"`
|
| 165 |
-
echo "=============$cur_time===================" >> $eval_file
|
| 166 |
-
echo $checkpoint_path >> $eval_file
|
| 167 |
-
tail -n1 $decode_file >> $eval_file # multi-bleu
|
| 168 |
-
# get score
|
| 169 |
-
src_file=$raw_data_dir/test.${gen_subset_dict[$gen_subset]}.$src_lang
|
| 170 |
-
ref_file=$raw_data_dir/test.${gen_subset_dict[$gen_subset]}.$tgt_lang
|
| 171 |
-
sacrebleu_file=$save_dir/sacrebleu.${gen_subset_dict[$gen_subset]}.beam${beam}_lenpen${lenpen}
|
| 172 |
-
comet22_file=$save_dir/comet22.${gen_subset_dict[$gen_subset]}.beam${beam}_lenpen${lenpen}
|
| 173 |
-
sacrebleu $ref_file -i $pure_file -w 2 >> $eval_file
|
| 174 |
-
comet-score -s $src_file -t $pure_file -r $ref_file --model $root_dir/wmt22-comet-da/checkpoints/model.ckpt | tee $comet22_file
|
| 175 |
-
echo "Comet22 Score" >> $eval_file
|
| 176 |
-
tail -n1 $comet22_file >> $eval_file # 只取平均comet分
|
| 177 |
-
|
| 178 |
-
echo -e "decode finished! \n decode tokenized file in $decode_file \n detokenized file in $pure_file \n sacrebleu file in $eval_file"
|
| 179 |
-
done
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|