File size: 3,091 Bytes
981b783
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

# trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT

device=$1
deploy_flag=$2
step=$3

if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
    echo "Usage: $0 <device> <deploy_flag> <step>"
    exit 1
fi

# model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step}
model_path=$4

language=$5
src_lang=${language%-*}
tgt_lang=${language#*-}

# infer_address=10.249.42.177:8010
# schedule_address=10.249.42.177:8011
# infer_address=127.0.0.1:801$infer_device
# schedule_address=127.0.0.1:801$schedule_device
# address=10.249.42.182:801${device}
# address=127.0.0.1:801${device}
# address=10.249.45.139:801${device}
address=${device}

level=$6

# setting=window_20_1ep
# setting=window_20_2ep_new
work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial
# data_dir=/data/wyt/codes/DocDPO/data/2017-01-ted-test
data_dir=/data/wyt/codes/DocDPO/data/ted_robust/level_${level}
output_dir=/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/results_robust/level_${level}

if [ "$deploy_flag" = "true" ]; then
    if [ "${address%%:*}" = "127.0.0.1" ]; then
        source ~/.zshrc
        conda activate vllm
        CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 &
        conda activate optima-vllm
    else
        ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &"
    fi
fi

echo "Waiting for LLM deployment in 20 seconds..."
# sleep 20

echo "Testing API of ${address}..."
while true; do
    python test_api.py $address
    if [ $? -eq 0 ]; then
        echo "API connected successfully!"
        break
    else
        echo "API connection failed. Retrying in 5 seconds..."
        sleep 5
    fi
done

cur_path=`pwd`
cd $work_dir

doc_ids=("${@:7}")
echo "Document IDs to process: ${doc_ids[@]}"

# for i in {0..5}; do
# for i in {0..11}; do
for i in "${doc_ids[@]}"; do
    if [ ! -f "$output_dir/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then
        echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}
        python -u infer.py \
            --src_file $data_dir/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \
            --output_path $output_dir/${src_lang}-${tgt_lang}_${step} \
            --window_size 10 \
            --infer_address $address \
            --schedule_address $address \
            --language ${src_lang}-${tgt_lang} \
            --infer_temperature 0.7 \
            --schedule_temperature 0.7 \
            --translate_style base
    fi
done

cd $cur_path

# ssh wyt@${infer_address%%:*} "killall pt_main_thread"