File size: 2,510 Bytes
12aef23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#! /usr/bin/bash
set -eux

# 若为0则使用CPU
comet_eval_gpus=8
# xzq-fairseq
root_dir=$(dirname "$PWD")
# 语言对
src_lang=en
tgt_lang=de
threshold=0.7

task_name=${src_lang}2${tgt_lang}
raw_data_dir=$root_dir/data/test/raw/$task_name
trainable_data_dir=$root_dir/data/test/trainable_data/$task_name

## eval&decode param
decode_max_tokens=2048
beam=5
nbest=1
lenpen=1.0

# 模型所在目录
model_dir=$root_dir/exps/${task_name}/${threshold}/transformer_big_wmt23

### decode
checkpoint_path=$model_dir/checkpoint_best.pt
save_dir=$model_dir/decode_result

mkdir -p $save_dir
cp ${BASH_SOURCE[0]}  $save_dir

declare -A gen_subset_dict
gen_subset_dict=([test]=flores [test1]=wmt22 [test2]=wmt23)
for gen_subset in ${!gen_subset_dict[*]}
do
    decode_file=$save_dir/decode_${gen_subset_dict[$gen_subset]}_beam${beam}_lenpen${lenpen}.$tgt_lang
    pure_file=$save_dir/pure_decode_${gen_subset_dict[$gen_subset]}_beam${beam}_lenpen${lenpen}.$tgt_lang

    CUDA_VISIBLE_DEVICES=0 fairseq-generate $trainable_data_dir -s $src_lang -t $tgt_lang \
        --gen-subset $gen_subset \
        --path $checkpoint_path  \
        --max-tokens $decode_max_tokens \
        --beam $beam \
        --nbest $nbest \
        --lenpen $lenpen  \
        --seed 42 \
        --remove-bpe | tee $decode_file
    
    ### eval
    # purify file
    grep ^H $decode_file | LC_ALL=C sort -V | cut -f3- | perl $root_dir/mosesdecoder/scripts/tokenizer/detokenizer.perl -l $tgt_lang > $pure_file

    eval_file=$model_dir/eval_${gen_subset_dict[$gen_subset]}.log
    cur_time=`date +"%Y-%m-%d %H:%M:%S"`
    echo "=============$cur_time===================" >> $eval_file
    echo $checkpoint_path >> $eval_file
    tail -n1 $decode_file >> $eval_file    # multi-bleu
    # get score
    src_file=$raw_data_dir/test.${task_name}.${gen_subset_dict[$gen_subset]}.$src_lang
    ref_file=$raw_data_dir/test.${task_name}.${gen_subset_dict[$gen_subset]}.$tgt_lang
    # sacrebleu_file=$save_dir/sacrebleu.${gen_subset_dict[$gen_subset]}.beam${beam}_lenpen${lenpen}
    comet22_file=$save_dir/comet22.${gen_subset_dict[$gen_subset]}.beam${beam}_lenpen${lenpen}
    # sacrebleu $ref_file -i $pure_file -w 2 >> $eval_file
    sacrebleu $ref_file -i $pure_file -w 2 --tokenize zh >> $eval_file
    comet-score -s $src_file -t $pure_file -r $ref_file --model $root_dir/wmt22-comet-da/checkpoints/model.ckpt | tee $comet22_file
    echo "Comet22 Score" >> $eval_file
    tail -n1 $comet22_file >> $eval_file    # 只取平均comet分
done