Update tts_core.py
Browse files- tts_core.py +8 -6
tts_core.py
CHANGED
|
@@ -5,7 +5,11 @@ import os
|
|
| 5 |
import numpy as np
|
| 6 |
|
| 7 |
class KokoroTTS:
|
|
|
|
|
|
|
|
|
|
| 8 |
def __init__(self):
|
|
|
|
| 9 |
self.lang_code = 'a'
|
| 10 |
self.pipeline = KPipeline(lang_code=self.lang_code)
|
| 11 |
|
|
@@ -36,11 +40,11 @@ class KokoroTTS:
|
|
| 36 |
"am_santa": "Male Santa"
|
| 37 |
}
|
| 38 |
|
| 39 |
-
# Default
|
| 40 |
-
|
| 41 |
self.output_dir = "/tmp/outputs"
|
| 42 |
-
os.makedirs(self.output_dir, exist_ok=True)
|
| 43 |
-
|
| 44 |
def generate_speech(self, text, voice=None, speed=1.0, output_file=None, return_audio=True):
|
| 45 |
"""Generate speech from text"""
|
| 46 |
voice = voice or self.default_voice
|
|
@@ -53,11 +57,9 @@ class KokoroTTS:
|
|
| 53 |
split_pattern=r'\n+'
|
| 54 |
)
|
| 55 |
|
| 56 |
-
# Process audio segments
|
| 57 |
audio_chunks = [audio for _, _, audio in generator]
|
| 58 |
final_audio = np.concatenate(audio_chunks) if audio_chunks else np.zeros(1000, dtype=np.float32)
|
| 59 |
|
| 60 |
-
# Save output
|
| 61 |
output_file = output_file or f"{self.output_dir}/tts_{voice}_{hash(text) % 10000}.wav"
|
| 62 |
sf.write(output_file, final_audio, 24000)
|
| 63 |
|
|
|
|
| 5 |
import numpy as np
|
| 6 |
|
| 7 |
class KokoroTTS:
|
| 8 |
+
"""
|
| 9 |
+
A CPU-optimized wrapper for Kokoro82m TTS model
|
| 10 |
+
"""
|
| 11 |
def __init__(self):
|
| 12 |
+
# Initialize with American English
|
| 13 |
self.lang_code = 'a'
|
| 14 |
self.pipeline = KPipeline(lang_code=self.lang_code)
|
| 15 |
|
|
|
|
| 40 |
"am_santa": "Male Santa"
|
| 41 |
}
|
| 42 |
|
| 43 |
+
# Default configuration
|
| 44 |
+
self.default_voice = "af_heart"
|
| 45 |
self.output_dir = "/tmp/outputs"
|
| 46 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
| 47 |
+
|
| 48 |
def generate_speech(self, text, voice=None, speed=1.0, output_file=None, return_audio=True):
|
| 49 |
"""Generate speech from text"""
|
| 50 |
voice = voice or self.default_voice
|
|
|
|
| 57 |
split_pattern=r'\n+'
|
| 58 |
)
|
| 59 |
|
|
|
|
| 60 |
audio_chunks = [audio for _, _, audio in generator]
|
| 61 |
final_audio = np.concatenate(audio_chunks) if audio_chunks else np.zeros(1000, dtype=np.float32)
|
| 62 |
|
|
|
|
| 63 |
output_file = output_file or f"{self.output_dir}/tts_{voice}_{hash(text) % 10000}.wav"
|
| 64 |
sf.write(output_file, final_audio, 24000)
|
| 65 |
|