# trap 'ssh wyt@${infer_address%%:*} "killall pt_main_thread"; exit' SIGINT device=$1 deploy_flag=$2 step=$3 if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then echo "Usage: $0 " exit 1 fi # model_path=/data/wyt/codes/DocDPO/sft/checkpoints_llama_factory/lora/ted_react_trans_base_sample_sft_dpolora_balanced_474/merged_fix/checkpoint-${step} model_path=$4 language=$5 src_lang=${language%-*} tgt_lang=${language#*-} # infer_address=10.249.42.177:8010 # schedule_address=10.249.42.177:8011 # infer_address=127.0.0.1:801$infer_device # schedule_address=127.0.0.1:801$schedule_device # address=10.249.42.182:801${device} # address=127.0.0.1:801${device} address=10.249.45.139:801${device} # setting=window_20_1ep # setting=window_20_2ep_new work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial # data_dir=/data/wyt/codes/DocDPO/data/2017-01-ted-test data_dir=/data/wyt/codes/DocDPO/data/ted_robust/level_3 output_dir=/data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/results_robust/level_3 if [ "$deploy_flag" = "true" ]; then if [ "${address%%:*}" = "127.0.0.1" ]; then source ~/.zshrc conda activate vllm CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 & conda activate optima-vllm else ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &" fi fi echo "Waiting for LLM deployment in 20 seconds..." # sleep 20 echo "Testing API of ${address}..." while true; do python test_api.py $address if [ $? -eq 0 ]; then echo "API connected successfully!" break else echo "API connection failed. Retrying in 5 seconds..." sleep 5 fi done cur_path=`pwd` cd $work_dir for i in {6..11}; do # for i in {0..11}; do if [ ! -f "$output_dir/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang} python -u infer.py \ --src_file $data_dir/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \ --output_path $output_dir/${src_lang}-${tgt_lang}_${step} \ --window_size 10 \ --infer_address $address \ --schedule_address $address \ --language ${src_lang}-${tgt_lang} \ --infer_temperature 0.7 \ --schedule_temperature 0.7 \ --translate_style base fi done cd $cur_path # ssh wyt@${infer_address%%:*} "killall pt_main_thread"