|
|
from time import time |
|
|
import soundfile as sf |
|
|
from misaki import en, espeak, zh |
|
|
from kokoro_onnx import Kokoro |
|
|
|
|
|
|
|
|
|
|
|
def run_en(): |
|
|
|
|
|
fallback = espeak.EspeakFallback(british=False) |
|
|
g2p = en.G2P(trf=False, british=False, fallback=fallback) |
|
|
|
|
|
models = "/Users/jeqin/work/code/TestTranslator/scripts/kokoro_models/" |
|
|
|
|
|
kokoro = Kokoro(f"{models}kokoro-v1.0.onnx", f"{models}voices-v1.0.bin") |
|
|
|
|
|
texts = [ |
|
|
"[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models.", |
|
|
"For example, the geology and terrain along the railway line.", |
|
|
" When choosing solid-state drives, we sometimes see reviews or videos discussing whether a particular solid-state drive has a caching scheme or an uncaching scheme in the performance testing section." |
|
|
] |
|
|
for index, text in enumerate(texts): |
|
|
|
|
|
|
|
|
phonemes, _ = g2p(text) |
|
|
|
|
|
|
|
|
start = time() |
|
|
samples, sample_rate = kokoro.create(phonemes, "af_heart", is_phonemes=True) |
|
|
end = time() |
|
|
time_cost = end - start |
|
|
print(f"time cost: {time_cost} for text: {text}") |
|
|
|
|
|
sf.write(f"audio{index}.wav", samples, sample_rate) |
|
|
print(f"Created audio{index}.wav") |
|
|
|
|
|
def run_zh(): |
|
|
|
|
|
|
|
|
g2p = zh.ZHG2P() |
|
|
|
|
|
models = "/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro" |
|
|
|
|
|
kokoro = Kokoro(f"{models}/kokoro-quant.onnx", f"{models}/voices-v1.0.bin", vocab_config=f"{models}/zh_config.json") |
|
|
|
|
|
texts = [ |
|
|
"千里之行,始于足下。", |
|
|
"我想听你唱首歌", |
|
|
"窗前明月光,疑是地上霜。举头望明月,低头思故乡。" |
|
|
] |
|
|
for index, text in enumerate(texts): |
|
|
phonemes, _ = g2p(text) |
|
|
|
|
|
|
|
|
start = time() |
|
|
samples, sample_rate = kokoro.create(phonemes, "zf_xiaoyi", is_phonemes=True, speed=1.0) |
|
|
end = time() |
|
|
time_cost = end - start |
|
|
print(f"time cost: {time_cost} for text: {text}") |
|
|
|
|
|
sf.write(f"audio{index}.wav", samples, sample_rate) |
|
|
print(f"Created audio{index}.wav") |
|
|
|
|
|
if __name__ == '__main__': |
|
|
run_zh() |