File size: 1,969 Bytes
f20a8ad 7a85341 f20a8ad 7a85341 f20a8ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | import sys
import re
import asyncio
from kokoro import KPipeline
import base_model
import utils
class StreamingEngine(base_model.BaseEngine):
def __init__(self, name):
# 1. Initialize configuration variables first
self.default_voice = "af_heart"
self.voice_mapping = {
"alloy": "af_heart",
"echo": "af_bella",
"fable": "af_nicole",
"onyx": "af_aoede",
"nova": "af_aoede",
"shimmer": "af_aoede"
}
# 2. Call super init (which usually calls load_model)
super().__init__(name)
def load_model(self):
try:
self.tts = KPipeline(lang_code='a')
# self.text_processor = self.tts.model.text_processor
self.sample_rate = 24000
print(f"Model Loaded. Rate: {self.sample_rate}")
except Exception as e:
# 3. CRITICAL FIX: Don't sys.exit(1). Raise exception instead.
print(f"Error initializing model {self.name}: {e}")
raise RuntimeError(f"Failed to load model {self.name}") from e
def get_style_safe(self, voice_name: str):
"""
Safely retrieves a voice style.
"""
# 4. Logic optimized: Map -> Try -> Fallback
clean_name = voice_name.lower().strip()
target_name = self.voice_mapping.get(clean_name, self.default_voice)
print(f"Found voice {target_name}")
return target_name
def preprocess_text(self, text):
if not text:
return []
return [text]
def generate(self, chunks: str, voice_name: str, speed: float):
"""
Generates audio.
Returns: audio_float_array
"""
# If supertonic DOES NOT support speed, simple generation:
generator = self.tts(chunks, voice=voice_name,speed=speed)
for i, (gs, ps, audio) in enumerate(generator):
yield audio.numpy() |