Spaces:
Runtime error
Runtime error
File size: 3,845 Bytes
af11ce4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
#!/bin/bash
# This script is an example of evaluate TTS models with objective metrics reported in ZipVoice-Dialog paper.
# Add project root to PYTHONPATH
export PYTHONPATH=../../:$PYTHONPATH
# Set bash to 'debug' mode, it will exit on:
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail
stage=1
stop_stage=6
download_dir=download/
# Uncomment this line to use HF mirror
# export HF_ENDPOINT=https://hf-mirror.com
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "Stage 1: Download test sets (test-dialog)"
hf_repo=k2-fsa/TTS_eval_datasets
mkdir -p ${download_dir}/
file=dialog_testset.tar.gz
echo "Downloading ${file}..."
huggingface-cli download \
--repo-type dataset \
--local-dir ${download_dir}/ \
${hf_repo} \
${file}
echo "Extracting ${file}..."
tar -xzf ${download_dir}/${file} -C ${download_dir}/
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "Stage 2: Download all required evaluation models"
mkdir -p ${download_dir}/tts_eval_models
mkdir -p ${download_dir}
huggingface-cli download \
--local-dir ${download_dir}/tts_eval_models \
${hf_repo}
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo "Stage 3: Inference with the pre-trained ZipVoice model from huggingface"
for testset in test_dialog_en test_dialog_zh; do
if [ "$testset" = "test_dialog_en" ]; then
test_tsv=${download_dir}/dialog_testset/en/test.tsv
elif [ "$testset" = "test_dialog_zh" ]; then
test_tsv=${download_dir}/dialog_testset/zh/test.tsv
else
echo "Error: unknown testset ${testset}" >&2
exit 1
fi
echo "Inference on tetset ${testset}..."
python3 -m zipvoice.bin.infer_zipvoice_dialog \
--model-name zipvoice_dialog \
--test-list ${test_tsv} \
--res-dir results/${testset}
done
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
echo "Stage 4: Evaluation on test-dialog-en"
model_path=${download_dir}/tts_eval_models
wav_path=results/test_dialog_en
test_tsv=${download_dir}/dialog_testset/en/test.tsv
python3 -m zipvoice.eval.speaker_similarity.cpsim \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path}
python3 -m zipvoice.eval.wer.dialog \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path} \
--lang en
# cpWER mode: will only compute WER and cpWER
# for speech less than 30s
python3 -m zipvoice.eval.wer.dialog \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path} \
--lang en \
--cpwer
python3 -m zipvoice.eval.mos.utmos \
--wav-path ${wav_path} \
--model-dir ${model_path}
fi
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
echo "Stage 5: Evaluation on test-dialog-zh"
model_path=${download_dir}/tts_eval_models
wav_path=results/test_dialog_zh
test_tsv=${download_dir}/dialog_testset/zh/test.tsv
python3 -m zipvoice.eval.speaker_similarity.cpsim \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path}
python3 -m zipvoice.eval.wer.dialog \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path} \
--lang zh
python3 -m zipvoice.eval.mos.utmos \
--wav-path ${wav_path} \
--model-dir ${model_path}
fi |