Spaces:
Runtime error
Runtime error
File size: 4,645 Bytes
af11ce4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
#!/bin/bash
# This script is an example of evaluate TTS models with objective metrics reported in ZipVoice paper.
# Add project root to PYTHONPATH
export PYTHONPATH=../../:$PYTHONPATH
# Set bash to 'debug' mode, it will exit on:
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail
stage=1
stop_stage=7
download_dir=download/
# Uncomment this line to use HF mirror
# export HF_ENDPOINT=https://hf-mirror.com
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "Stage 1: Download test sets (LibriSpeech-PC and Seed-TTS)"
hf_repo=k2-fsa/TTS_eval_datasets
mkdir -p ${download_dir}/
for file in librispeech_pc_testset.tar.gz seedtts_testset.tar.gz; do
echo "Downloading ${file}..."
huggingface-cli download \
--repo-type dataset \
--local-dir ${download_dir}/ \
${hf_repo} \
${file}
echo "Extracting ${file}..."
tar -xzf ${download_dir}/${file} -C ${download_dir}/
done
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "Stage 2: Download all required evaluation models"
hf_repo=k2-fsa/TTS_eval_models
mkdir -p ${download_dir}/tts_eval_models
huggingface-cli download \
--local-dir ${download_dir}/tts_eval_models \
${hf_repo}
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo "Stage 3: Inference with the pre-trained ZipVoice model from huggingface"
for testset in librispeech_pc seedtts_en seedtts_zh; do
if [ "$testset" = "librispeech_pc" ]; then
test_tsv=${download_dir}/librispeech_pc_testset/test.tsv
elif [ "$testset" = "seedtts_en" ]; then
test_tsv=${download_dir}/seedtts_testset/en/test.tsv
elif [ "$testset" = "seedtts_zh" ]; then
test_tsv=${download_dir}/seedtts_testset/zh/test.tsv
else
echo "Error: unknown testset ${testset}" >&2
exit 1
fi
echo "Inference on tetset ${testset}..."
python3 -m zipvoice.bin.infer_zipvoice \
--model-name zipvoice \
--test-list ${test_tsv} \
--res-dir results/${testset}
done
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
echo "Stage 4: Evaluation on LibriSpeech-PC"
model_path=${download_dir}/tts_eval_models
wav_path=results/librispeech_pc
test_tsv=${download_dir}/librispeech_pc_testset/test.tsv
# Use LibriSpeech style transcripts for WER evaluation
transcript_tsv=${download_dir}/librispeech_pc_testset/transcript.tsv
python3 -m zipvoice.eval.speaker_similarity.sim \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path}
python3 -m zipvoice.eval.wer.hubert \
--wav-path ${wav_path} \
--test-list ${transcript_tsv} \
--model-dir ${model_path}
python3 -m zipvoice.eval.mos.utmos \
--wav-path ${wav_path} \
--model-dir ${model_path}
fi
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
echo "Stage 5: Evaluation on Seed-TTS test en"
model_path=${download_dir}/tts_eval_models
wav_path=results/seedtts_en
test_tsv=${download_dir}/seedtts_testset/en/test.tsv
python3 -m zipvoice.eval.speaker_similarity.sim \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path}
python3 -m zipvoice.eval.wer.seedtts \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path} \
--lang en
python3 -m zipvoice.eval.mos.utmos \
--wav-path ${wav_path} \
--model-dir ${model_path}
fi
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
echo "Stage 6: Evaluation on Seed-TTS test en"
model_path=${download_dir}/tts_eval_models
wav_path=results/seedtts_zh
test_tsv=${download_dir}/seedtts_testset/zh/test.tsv
python3 -m zipvoice.eval.speaker_similarity.sim \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path}
python3 -m zipvoice.eval.wer.seedtts \
--wav-path ${wav_path} \
--test-list ${test_tsv} \
--model-dir ${model_path} \
--lang zh
python3 -m zipvoice.eval.mos.utmos \
--wav-path ${wav_path} \
--model-dir ${model_path}
fi |