from time import time import soundfile as sf from misaki import en, espeak, zh from kokoro_onnx import Kokoro def run_en(): # Misaki G2P with espeak-ng fallback fallback = espeak.EspeakFallback(british=False) g2p = en.G2P(trf=False, british=False, fallback=fallback) models = "/Users/jeqin/work/code/TestTranslator/scripts/kokoro_models/" # Kokoro kokoro = Kokoro(f"{models}kokoro-v1.0.onnx", f"{models}voices-v1.0.bin") texts = [ "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models.", "For example, the geology and terrain along the railway line.", " When choosing solid-state drives, we sometimes see reviews or videos discussing whether a particular solid-state drive has a caching scheme or an uncaching scheme in the performance testing section." ] for index, text in enumerate(texts): # Phonemize # text = "[Misaki](/misˈɑki/) is a G2P engine designed for [Kokoro](/kˈOkəɹO/) models." phonemes, _ = g2p(text) # Create start = time() samples, sample_rate = kokoro.create(phonemes, "af_heart", is_phonemes=True) end = time() time_cost = end - start print(f"time cost: {time_cost} for text: {text}") # Save sf.write(f"audio{index}.wav", samples, sample_rate) print(f"Created audio{index}.wav") def run_zh(): # Misaki G2P with espeak-ng fallback # fallback = espeak.EspeakFallback(british=False) g2p = zh.ZHG2P() models = "/Users/jeqin/work/code/Translator/python_server/moyoyo_asr_models/kokoro" # Kokoro kokoro = Kokoro(f"{models}/kokoro-quant.onnx", f"{models}/voices-v1.0.bin", vocab_config=f"{models}/zh_config.json") texts = [ "千里之行,始于足下。", "我想听你唱首歌", "窗前明月光,疑是地上霜。举头望明月,低头思故乡。" ] for index, text in enumerate(texts): phonemes, _ = g2p(text) # Create start = time() samples, sample_rate = kokoro.create(phonemes, "zf_xiaoyi", is_phonemes=True, speed=1.0) end = time() time_cost = end - start print(f"time cost: {time_cost} for text: {text}") # Save sf.write(f"audio{index}.wav", samples, sample_rate) print(f"Created audio{index}.wav") if __name__ == '__main__': run_zh()