TestTranslator / scripts /run_kokoro_sample.py
yujuanqin's picture
update scripts and test_data
e4406a3
from time import time
import soundfile as sf
from misaki import en, espeak, zh
from kokoro_onnx import Kokoro
def run_en():
# Misaki G2P with espeak-ng fallback
fallback = espeak.EspeakFallback(british=False)
g2p = en.G2P(trf=False, british=False, fallback=fallback)
models = "/Users/jeqin/work/code/TestTranslator/scripts/kokoro_models/"
# Kokoro
kokoro = Kokoro(f"{models}kokoro-v1.0.onnx", f"{models}voices-v1.0.bin")
texts = [
"[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models.",
"For example, the geology and terrain along the railway line.",
" When choosing solid-state drives, we sometimes see reviews or videos discussing whether a particular solid-state drive has a caching scheme or an uncaching scheme in the performance testing section."
]
for index, text in enumerate(texts):
# Phonemize
# text = "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models."
phonemes, _ = g2p(text)
# Create
start = time()
samples, sample_rate = kokoro.create(phonemes, "af_heart", is_phonemes=True)
end = time()
time_cost = end - start
print(f"time cost: {time_cost} for text: {text}")
# Save
sf.write(f"audio{index}.wav", samples, sample_rate)
print(f"Created audio{index}.wav")
def run_zh():
# Misaki G2P with espeak-ng fallback
# fallback = espeak.EspeakFallback(british=False)
g2p = zh.ZHG2P()
models = "/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro"
# Kokoro
kokoro = Kokoro(f"{models}/kokoro-quant.onnx", f"{models}/voices-v1.0.bin", vocab_config=f"{models}/zh_config.json")
texts = [
"千里之行,始于足下。",
"我想听你唱首歌",
"窗前明月光,疑是地上霜。举头望明月,低头思故乡。"
]
for index, text in enumerate(texts):
phonemes, _ = g2p(text)
# Create
start = time()
samples, sample_rate = kokoro.create(phonemes, "zf_xiaoyi", is_phonemes=True, speed=1.0)
end = time()
time_cost = end - start
print(f"time cost: {time_cost} for text: {text}")
# Save
sf.write(f"audio{index}.wav", samples, sample_rate)
print(f"Created audio{index}.wav")
if __name__ == '__main__':
run_zh()