lmt-arr / scripts /inference.sh
sleepyhead111's picture
Upload folder using huggingface_hub
746b7a6 verified
raw
history blame
5.27 kB
#! /bin/bash
set -eux
ROOT_DIR=$(dirname $(dirname `readlink -f $0`))
export HF_HOME="$ROOT_DIR/cache/"
export MODELSCOPE_CACHE="$ROOT_DIR/cache/"
export HF_EVALUATE_OFFLINE=1
export HF_DATASETS_OFFLINE=1
config_file=$ROOT_DIR/configs/accelerate_config.yaml
export NPROC_PER_NODE=8
# model
predict_model_dir=${1:-""}
# eval
comet_model=$ROOT_DIR/model_card/wmt22-comet-da/checkpoints/model.ckpt
xcome_model=$ROOT_DIR/model_card/XCOMET-XXL/checkpoints/model.ckpt
lang_pair_strs=""
src_file_strs=""
ref_file_strs=""
hypo_file_strs=""
# for lang in en ja ru de ug; do
for lang in en de ru bn hi th jv sw si km; do
# for lang in en ja ko ru de fr it pt es;do
for src in $lang zh ;do
if [ $src = "zh" ]; then # en2zh
src_lang=zh
tgt_lang=$lang
else # zh2en
src_lang=$lang
tgt_lang=zh
fi
lp=${src_lang}2${tgt_lang}
src_file=$ROOT_DIR/data_arr/flores200/zh-${lang}/test.zh-$lang.$src_lang
ref_file=$ROOT_DIR/data_arr/flores200/zh-${lang}/test.zh-$lang.$tgt_lang
# test_file=$ROOT_DIR/data_arr/sft_100k_ugbomn/test.$lp.jsonl
test_file=$ROOT_DIR/data_arr/test/test.$lp.jsonl
output_dir=$predict_model_dir/decode_result/$lp
mkdir -p $output_dir
############################!!!!!
rm -rf $output_dir/*
#######################
cp $0 $output_dir
swift infer \
--infer_backend pt \
--val_dataset $test_file \
--load_from_cache_file True \
--dataset_shuffle False \
--val_dataset_shuffle False \
--model $predict_model_dir \
--torch_dtype bfloat16 \
--max_new_tokens 1024 \
--max_batch_size 16 \
--num_beams 5 \
--max_length 1024 \
--dataset_num_proc 8 \
--temperature 0 \
--result_path $output_dir/generated_predictions.jsonl | tee $output_dir/train.log
jq -r '.response' $output_dir/generated_predictions.jsonl > $output_dir/hypo.$lp.txt
hypo_file=$output_dir/hypo.$lp.txt
lang_pair_strs=${lang_pair_strs:+$lang_pair_strs,}$lp
src_file_strs=${src_file_strs:+$src_file_strs,}$src_file
ref_file_strs=${ref_file_strs:+$ref_file_strs,}$ref_file
hypo_file_strs=${hypo_file_strs:+$hypo_file_strs,}$hypo_file
done
done
# # metric="bleu,comet_22,xcomet_xxl"
metric="bleu,comet_22"
python $ROOT_DIR/src/mt_scoring.py \
--metric $metric \
--comet_22_path $comet_model \
--xcomet_xxl_path $xcome_model \
--lang_pair $lang_pair_strs \
--src_file $src_file_strs \
--ref_file $ref_file_strs \
--hypo_file $hypo_file_strs \
--record_file "result_mt.xlsx"
# lang_pair_strs=""
# src_file_strs=""
# ref_file_strs=""
# hypo_file_strs=""
# #
# # for lang in ja ko ru de fr it pt es;do
# # for lang in ja ru de ug; do
# for lang in mn_cn; do
# for src in $lang en ;do
# if [ $src = "en" ]; then # en2zh
# src_lang=en
# tgt_lang=$lang
# else # zh2en
# src_lang=$lang
# tgt_lang=en
# fi
# lp=${src_lang}2${tgt_lang}
# src_file=$ROOT_DIR/data_arr/flores200/en-${lang}/test.en-$lang.$src_lang
# ref_file=$ROOT_DIR/data_arr/flores200/en-${lang}/test.en-$lang.$tgt_lang
# # test_file=$ROOT_DIR/data_arr/sft_100k_ugbomn/test.$lp.jsonl
# test_file=/mnt/nvme1/luoyingfeng/llm-mt/data_arr/merge_0701/train1/test/test.$lp.jsonl
# output_dir=$predict_model_dir/decode_result/$lp
# mkdir -p $output_dir
# #############################!!!!!
# rm -rf $output_dir/*
# ########################
# cp $0 $output_dir
# # --load_args False \
# swift infer \
# --infer_backend pt \
# --val_dataset $test_file \
# --load_from_cache_file True \
# --dataset_shuffle False \
# --val_dataset_shuffle False \
# --model $predict_model_dir \
# --torch_dtype bfloat16 \
# --max_new_tokens 1024 \
# --max_batch_size 8 \
# --num_beams 5 \
# --max_length 1024 \
# --dataset_num_proc 8 \
# --temperature 0 \
# --result_path $output_dir/generated_predictions.jsonl | tee $output_dir/train.log
# jq -r '.response' $output_dir/generated_predictions.jsonl > $output_dir/hypo.$lp.txt
# hypo_file=$output_dir/hypo.$lp.txt
# lang_pair_strs=${lang_pair_strs:+$lang_pair_strs,}$lp
# src_file_strs=${src_file_strs:+$src_file_strs,}$src_file
# ref_file_strs=${ref_file_strs:+$ref_file_strs,}$ref_file
# hypo_file_strs=${hypo_file_strs:+$hypo_file_strs,}$hypo_file
# done
# done
# # metric="bleu,comet_22,xcomet_xxl"
# metric="bleu,comet_22"
# python $ROOT_DIR/src/mt_scoring.py \
# --metric $metric \
# --comet_22_path $comet_model \
# --xcomet_xxl_path $xcome_model \
# --lang_pair $lang_pair_strs \
# --src_file $src_file_strs \
# --ref_file $ref_file_strs \
# --hypo_file $hypo_file_strs \
# --record_file "result_mt.xlsx"