from pathlib import Path from time import time import os import soundfile as sf from misaki import zh import onnxruntime from kokoro_onnx import Kokoro # providers = onnxruntime.get_available_providers() # print(f"Available onnx runtime providers: {providers}") def create_session(model_path): # See list of providers https://github.com/microsoft/onnxruntime/issues/22101#issuecomment-2357667377 providers = onnxruntime.get_available_providers() providers = providers[1:2] print(f"Available onnx runtime providers: {providers}") # See session options https://onnxruntime.ai/docs/performance/tune-performance/threading.html#thread-management sess_options = onnxruntime.SessionOptions() cpu_count = os.cpu_count() // 2 print(f"Setting threads to CPU cores count: {cpu_count}") # sess_options.intra_op_num_threads = cpu_count session = onnxruntime.InferenceSession( model_path, providers=providers, sess_options=sess_options ) return session model_folder = Path("/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro") model_path = str(model_folder/"kokoro-quant.onnx") voice_model_path = str(model_folder/"voices-v1.0.bin") vocab_config = str(model_folder/"zh_config.json") texts = [ "千里之行,始于足下。", "我想听你唱首歌", "窗前明月光,疑是地上霜。举头望明月,低头思故乡。" ] voice = "zf_xiaoyi" session = create_session(model_path) model = Kokoro.from_session(session, voice_model_path, vocab_config=vocab_config) g2p = zh.ZHG2P() for i in range(5): for index, text in enumerate(texts): phonemes, _ = g2p(text) start = time() samples, sample_rate = model.create(phonemes, voice=voice, speed=1.0, is_phonemes=True) end = time() time_cost = end - start print(f"time cost: {time_cost} for text: {text}") sf.write(f"audio_{index}.wav", samples, sample_rate) print(f"Created audio_{index}.wav")