|
|
import sys |
|
|
import re |
|
|
import asyncio |
|
|
from kokoro import KPipeline |
|
|
import base_model |
|
|
import utils |
|
|
|
|
|
class StreamingEngine(base_model.BaseEngine): |
|
|
def __init__(self, name): |
|
|
|
|
|
self.default_voice = "af_heart" |
|
|
self.voice_mapping = { |
|
|
"alloy": "af_heart", |
|
|
"echo": "af_bella", |
|
|
"fable": "af_nicole", |
|
|
"onyx": "af_aoede", |
|
|
"nova": "af_aoede", |
|
|
"shimmer": "af_aoede" |
|
|
} |
|
|
|
|
|
|
|
|
super().__init__(name) |
|
|
|
|
|
def load_model(self): |
|
|
try: |
|
|
self.tts = KPipeline(lang_code='a') |
|
|
|
|
|
self.sample_rate = 24000 |
|
|
print(f"Model Loaded. Rate: {self.sample_rate}") |
|
|
except Exception as e: |
|
|
|
|
|
print(f"Error initializing model {self.name}: {e}") |
|
|
raise RuntimeError(f"Failed to load model {self.name}") from e |
|
|
|
|
|
def get_style_safe(self, voice_name: str): |
|
|
""" |
|
|
Safely retrieves a voice style. |
|
|
""" |
|
|
|
|
|
clean_name = voice_name.lower().strip() |
|
|
target_name = self.voice_mapping.get(clean_name, self.default_voice) |
|
|
print(f"Found voice {target_name}") |
|
|
return target_name |
|
|
|
|
|
def preprocess_text(self, text): |
|
|
if not text: |
|
|
return [] |
|
|
return [text] |
|
|
|
|
|
def generate(self, chunks: str, voice_name: str, speed: float): |
|
|
""" |
|
|
Generates audio. |
|
|
Returns: audio_float_array |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
generator = self.tts(chunks, voice=voice_name,speed=speed) |
|
|
for i, (gs, ps, audio) in enumerate(generator): |
|
|
yield audio.numpy() |