|
|
import os |
|
|
from paddlespeech.cli.tts.infer import TTSExecutor |
|
|
|
|
|
""" |
|
|
PaddleSpeech |
|
|
|
|
|
声码器说明:这里预制了三种声码器【PWGan】【WaveRnn】【HifiGan】, 三种声码器效果和生成时间有比较大的差距,请跟进自己的需要进行选择。不过只选择了前两种,因为WaveRNN太慢了 |
|
|
|
|
|
| 声码器 | 音频质量 | 生成速度 | |
|
|
| :----: | :----: | :----: | |
|
|
| PWGan | 中等 | 中等 | |
|
|
| WaveRnn | 高 | 非常慢(耐心等待) | |
|
|
| HifiGan | 低 | 快 | |
|
|
|
|
|
这些PaddleSpeech中的样例主要按数据集分类,我们主要使用的TTS数据集有: |
|
|
|
|
|
CSMCS (普通话单发音人) |
|
|
AISHELL3 (普通话多发音人) |
|
|
LJSpeech (英文单发音人) |
|
|
VCTK (英文多发音人) |
|
|
|
|
|
PaddleSpeech 的 TTS 模型具有以下映射关系: |
|
|
|
|
|
tts0 - Tacotron2 |
|
|
tts1 - TransformerTTS |
|
|
tts2 - SpeedySpeech |
|
|
tts3 - FastSpeech2 |
|
|
voc0 - WaveFlow |
|
|
voc1 - Parallel WaveGAN |
|
|
voc2 - MelGAN |
|
|
voc3 - MultiBand MelGAN |
|
|
voc4 - Style MelGAN |
|
|
voc5 - HiFiGAN |
|
|
vc0 - Tacotron2 Voice Clone with GE2E |
|
|
vc1 - FastSpeech2 Voice Clone with GE2E |
|
|
|
|
|
以下是 PaddleSpeech 提供的可以被命令行和 python API 使用的预训练模型列表: |
|
|
|
|
|
- 声学模型 |
|
|
| 模型 | 语言 | |
|
|
| :--- | :---: | |
|
|
| speedyspeech_csmsc | zh | |
|
|
| fastspeech2_csmsc | zh | |
|
|
| fastspeech2_ljspeech | en | |
|
|
| fastspeech2_aishell3 | zh | |
|
|
| fastspeech2_vctk | en | |
|
|
| fastspeech2_cnndecoder_csmsc | zh | |
|
|
| fastspeech2_mix | mix | |
|
|
| tacotron2_csmsc | zh | |
|
|
| tacotron2_ljspeech | en | |
|
|
| fastspeech2_male | zh | |
|
|
| fastspeech2_male | en | |
|
|
| fastspeech2_male | mix | |
|
|
| fastspeech2_canton | canton | |
|
|
|
|
|
- 声码器 |
|
|
| 模型 | 语言 | |
|
|
| :--- | :---: | |
|
|
| pwgan_csmsc | zh | |
|
|
| pwgan_ljspeech | en | |
|
|
| pwgan_aishell3 | zh | |
|
|
| pwgan_vctk | en | |
|
|
| mb_melgan_csmsc | zh | |
|
|
| style_melgan_csmsc | zh | |
|
|
| hifigan_csmsc | zh | |
|
|
| hifigan_ljspeech | en | |
|
|
| hifigan_aishell3 | zh | |
|
|
| hifigan_vctk | en | |
|
|
| wavernn_csmsc | zh | |
|
|
| pwgan_male | zh | |
|
|
| hifigan_male | zh | |
|
|
""" |
|
|
|
|
|
|
|
|
class PaddleTTS: |
|
|
def __init__(self) -> None: |
|
|
pass |
|
|
|
|
|
def predict(self, text, am, voc, spk_id = 174, lang = 'zh', male=False, save_path = 'output.wav'): |
|
|
self.tts = TTSExecutor() |
|
|
|
|
|
use_onnx = True |
|
|
voc = voc.lower() |
|
|
am = am.lower() |
|
|
|
|
|
if male: |
|
|
assert voc in ["pwgan", "hifigan"], "male voc must be 'pwgan' or 'hifigan'" |
|
|
wav_file = self.tts( |
|
|
text = text, |
|
|
output = save_path, |
|
|
am='fastspeech2_male', |
|
|
voc= voc + '_male', |
|
|
lang=lang, |
|
|
use_onnx=use_onnx |
|
|
) |
|
|
return wav_file |
|
|
|
|
|
assert am in ['tacotron2', 'fastspeech2'], "am must be 'tacotron2' or 'fastspeech2'" |
|
|
|
|
|
|
|
|
if lang == 'mix': |
|
|
|
|
|
am = 'fastspeech2_mix' |
|
|
voc += '_csmsc' |
|
|
|
|
|
elif lang == 'en': |
|
|
am += '_ljspeech' |
|
|
voc += '_ljspeech' |
|
|
|
|
|
elif lang == 'zh': |
|
|
assert voc in ['wavernn', 'pwgan', 'hifigan', 'style_melgan', 'mb_melgan'], "voc must be 'wavernn' or 'pwgan' or 'hifigan' or 'style_melgan' or 'mb_melgan'" |
|
|
am += '_csmsc' |
|
|
voc += '_csmsc' |
|
|
elif lang == 'canton': |
|
|
am = 'fastspeech2_canton' |
|
|
voc = 'pwgan_aishell3' |
|
|
spk_id = 10 |
|
|
print("am:", am, "voc:", voc, "lang:", lang, "male:", male, "spk_id:", spk_id) |
|
|
try: |
|
|
cmd = f'paddlespeech tts --am {am} --voc {voc} --input "{text}" --output {save_path} --lang {lang} --spk_id {spk_id} --use_onnx {use_onnx}' |
|
|
os.system(cmd) |
|
|
wav_file = save_path |
|
|
except: |
|
|
|
|
|
wav_file = self.tts( |
|
|
text = text, |
|
|
output = save_path, |
|
|
am = am, |
|
|
voc = voc, |
|
|
lang = lang, |
|
|
spk_id = spk_id, |
|
|
use_onnx=use_onnx |
|
|
) |
|
|
return wav_file |
|
|
|
|
|
if __name__ == "__main__": |
|
|
tts = PaddleTTS() |
|
|
tts.predict("Hello world", 'FastSpeech2', 'PWGan', spk_id=174, lang='en', male=False, save_path='output.wav') |