File size: 2,407 Bytes
e4406a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from time import time
import soundfile as sf
from misaki import en, espeak, zh
from kokoro_onnx import Kokoro
def run_en():
# Misaki G2P with espeak-ng fallback
fallback = espeak.EspeakFallback(british=False)
g2p = en.G2P(trf=False, british=False, fallback=fallback)
models = "/Users/jeqin/work/code/TestTranslator/scripts/kokoro_models/"
# Kokoro
kokoro = Kokoro(f"{models}kokoro-v1.0.onnx", f"{models}voices-v1.0.bin")
texts = [
"[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models.",
"For example, the geology and terrain along the railway line.",
" When choosing solid-state drives, we sometimes see reviews or videos discussing whether a particular solid-state drive has a caching scheme or an uncaching scheme in the performance testing section."
]
for index, text in enumerate(texts):
# Phonemize
# text = "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models."
phonemes, _ = g2p(text)
# Create
start = time()
samples, sample_rate = kokoro.create(phonemes, "af_heart", is_phonemes=True)
end = time()
time_cost = end - start
print(f"time cost: {time_cost} for text: {text}")
# Save
sf.write(f"audio{index}.wav", samples, sample_rate)
print(f"Created audio{index}.wav")
def run_zh():
# Misaki G2P with espeak-ng fallback
# fallback = espeak.EspeakFallback(british=False)
g2p = zh.ZHG2P()
models = "/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro"
# Kokoro
kokoro = Kokoro(f"{models}/kokoro-quant.onnx", f"{models}/voices-v1.0.bin", vocab_config=f"{models}/zh_config.json")
texts = [
"千里之行,始于足下。",
"我想听你唱首歌",
"窗前明月光,疑是地上霜。举头望明月,低头思故乡。"
]
for index, text in enumerate(texts):
phonemes, _ = g2p(text)
# Create
start = time()
samples, sample_rate = kokoro.create(phonemes, "zf_xiaoyi", is_phonemes=True, speed=1.0)
end = time()
time_cost = end - start
print(f"time cost: {time_cost} for text: {text}")
# Save
sf.write(f"audio{index}.wav", samples, sample_rate)
print(f"Created audio{index}.wav")
if __name__ == '__main__':
run_zh() |