Spaces:
Sleeping
Sleeping
File size: 4,912 Bytes
4545835 206c828 4545835 206c828 4545835 206c828 e602908 206c828 e602908 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 f0481ea 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 206c828 4545835 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | import marimo
__generated_with = "0.20.4"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
return (mo,)
@app.cell
def _(mo):
mo.md("""
# Kokoro TTS Demo
This notebook demonstrates how to use the kokoro library directly for text-to-speech synthesis.
The Kokoro-82M model supports multiple languages and voices.
**Goals:**
1. Test various SoPs / pilot calls to find the best match for PF/PM (language, voice, speed).
2. Test configurations for a voice coach.
""")
return
@app.cell
def _():
from pathlib import Path
import numpy as np
import soundfile as sf
from kokoro import KPipeline
# Kokoro outputs audio at 24 kHz
SAMPLE_RATE = 24000
# Setup output directory
notebook_path = Path(__file__).parent if "__file__" in dir() else Path.cwd()
if notebook_path.name == "notebooks":
project_root = notebook_path.parent
else:
project_root = notebook_path
output_dir = project_root / "output" / "tts"
output_dir.mkdir(parents=True, exist_ok=True)
return KPipeline, SAMPLE_RATE, np, output_dir, sf
@app.cell
def _(mo):
mo.md("""
## Configuration
""")
return
@app.cell
def _(mo):
# Interactive controls for TTS configuration
lang_code = mo.ui.dropdown(
options={
"American English": "a",
"British English": "b",
},
value="American English",
label="Language",
)
return (lang_code,)
@app.cell
def _(lang_code, mo):
voice_options = (
{
"Female, warm (default)": "af_heart",
"Female, bright": "af_sky",
"Male, neutral": "am_adam",
"Male, deep": "am_michael",
}
if lang_code.value == "a"
else {
"British Female": "bf_emma",
"British Male": "bm_george",
}
)
voice = mo.ui.dropdown(
options=voice_options,
value="Female, warm (default)" if lang_code.value == "a" else "British Female",
label="Voice",
)
speed = mo.ui.slider(
start=0.5,
stop=2.0,
step=0.1,
value=1.0,
label="Speed",
show_value=True,
)
return speed, voice
@app.cell
def _(lang_code, mo, speed, voice):
mo.hstack([lang_code, voice, speed], justify="start")
return
@app.cell
def _(mo):
mo.md("""
## Text Input
""")
return
@app.cell
def _(mo):
text_input = mo.ui.text_area(
value="Hi! This is a demo of the Kokoro TTS system for the meetSofi voice-coach. GO AROUND. Flaps 15",
label="Text to synthesize",
rows=4,
full_width=True,
)
text_input
return (text_input,)
@app.cell
def _(mo):
mo.md("""
## Generate Speech
""")
return
@app.cell
def _(mo):
generate_button = mo.ui.run_button(
label="🎵 Generate Audio",
kind="success",
)
generate_button
return (generate_button,)
@app.cell
def _(
KPipeline,
SAMPLE_RATE,
generate_button,
lang_code,
mo,
np,
output_dir,
sf,
speed,
text_input,
voice,
):
output_file = None
status_message = None
if generate_button.value:
try:
# Initialize Kokoro pipeline
pipeline = KPipeline(lang_code=lang_code.value)
# Collect audio chunks
audio_chunks = []
for _i, (_gs, _ps, _audio) in enumerate(
pipeline(text_input.value, voice=voice.value, speed=speed.value)
):
audio_chunks.append(_audio)
if not audio_chunks:
raise RuntimeError("Kokoro pipeline produced no audio output.")
# Combine and save
combined = np.concatenate(audio_chunks)
output_file = output_dir / "kokoro_direct_output.wav"
sf.write(str(output_file), combined, SAMPLE_RATE)
status_message = mo.md(
f"""
✅ **Audio generated successfully!**
- Output file: `{output_file}`
- Language: {lang_code.selected_key}
- Voice: {voice.value}
- Speed: {speed.value}x
- Chunks: {len(audio_chunks)}
- Samples: {len(combined)}
"""
)
except Exception as e:
print(e)
status_message = mo.md(f"❌ **Error:** {str(e)}")
(
status_message
if status_message
else mo.md("_Click 'Generate Audio' button above to create speech_")
)
return (output_file,)
@app.cell
def _(mo, output_file):
# Display audio player if file was generated
_audio_player = None
if output_file and output_file.exists():
_audio_player = mo.audio(src=str(output_file))
_audio_player
return
if __name__ == "__main__":
app.run()
|