TestTranslator / scripts /run_kokoro.py
yujuanqin's picture
update scripts and test_data
e4406a3
from pathlib import Path
from time import time
import os
import soundfile as sf
from misaki import zh
import onnxruntime
from kokoro_onnx import Kokoro
# providers = onnxruntime.get_available_providers()
# print(f"Available onnx runtime providers: {providers}")
def create_session(model_path):
# See list of providers https://github.com/microsoft/onnxruntime/issues/22101#issuecomment-2357667377
providers = onnxruntime.get_available_providers()
providers = providers[1:2]
print(f"Available onnx runtime providers: {providers}")
# See session options https://onnxruntime.ai/docs/performance/tune-performance/threading.html#thread-management
sess_options = onnxruntime.SessionOptions()
cpu_count = os.cpu_count() // 2
print(f"Setting threads to CPU cores count: {cpu_count}")
# sess_options.intra_op_num_threads = cpu_count
session = onnxruntime.InferenceSession(
model_path, providers=providers, sess_options=sess_options
)
return session
model_folder = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro")
model_path = str(model_folder/"kokoro-quant.onnx")
voice_model_path = str(model_folder/"voices-v1.0.bin")
vocab_config = str(model_folder/"zh_config.json")
texts = [
"千里之行,始于足下。",
"我想听你唱首歌",
"窗前明月光,疑是地上霜。举头望明月,低头思故乡。"
]
voice = "zf_xiaoyi"
session = create_session(model_path)
model = Kokoro.from_session(session, voice_model_path, vocab_config=vocab_config)
g2p = zh.ZHG2P()
for i in range(5):
for index, text in enumerate(texts):
phonemes, _ = g2p(text)
start = time()
samples, sample_rate = model.create(phonemes, voice=voice, speed=1.0, is_phonemes=True)
end = time()
time_cost = end - start
print(f"time cost: {time_cost} for text: {text}")
sf.write(f"audio_{index}.wav", samples, sample_rate)
print(f"Created audio_{index}.wav")