|
|
from pathlib import Path |
|
|
from time import time |
|
|
import os |
|
|
|
|
|
import soundfile as sf |
|
|
from misaki import zh |
|
|
import onnxruntime |
|
|
|
|
|
from kokoro_onnx import Kokoro |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_session(model_path): |
|
|
|
|
|
providers = onnxruntime.get_available_providers() |
|
|
providers = providers[1:2] |
|
|
print(f"Available onnx runtime providers: {providers}") |
|
|
|
|
|
|
|
|
|
|
|
sess_options = onnxruntime.SessionOptions() |
|
|
cpu_count = os.cpu_count() // 2 |
|
|
print(f"Setting threads to CPU cores count: {cpu_count}") |
|
|
|
|
|
session = onnxruntime.InferenceSession( |
|
|
model_path, providers=providers, sess_options=sess_options |
|
|
) |
|
|
return session |
|
|
|
|
|
model_folder = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro") |
|
|
model_path = str(model_folder/"kokoro-quant.onnx") |
|
|
voice_model_path = str(model_folder/"voices-v1.0.bin") |
|
|
vocab_config = str(model_folder/"zh_config.json") |
|
|
|
|
|
texts = [ |
|
|
"千里之行,始于足下。", |
|
|
"我想听你唱首歌", |
|
|
"窗前明月光,疑是地上霜。举头望明月,低头思故乡。" |
|
|
] |
|
|
voice = "zf_xiaoyi" |
|
|
session = create_session(model_path) |
|
|
model = Kokoro.from_session(session, voice_model_path, vocab_config=vocab_config) |
|
|
g2p = zh.ZHG2P() |
|
|
for i in range(5): |
|
|
for index, text in enumerate(texts): |
|
|
phonemes, _ = g2p(text) |
|
|
start = time() |
|
|
samples, sample_rate = model.create(phonemes, voice=voice, speed=1.0, is_phonemes=True) |
|
|
end = time() |
|
|
time_cost = end - start |
|
|
print(f"time cost: {time_cost} for text: {text}") |
|
|
sf.write(f"audio_{index}.wav", samples, sample_rate) |
|
|
print(f"Created audio_{index}.wav") |