TTS-kokoro-test / app.py
garcejan's picture
Update app.py
f0481ea verified
import marimo
__generated_with = "0.20.4"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
return (mo,)
@app.cell
def _(mo):
mo.md("""
# Kokoro TTS Demo
This notebook demonstrates how to use the kokoro library directly for text-to-speech synthesis.
The Kokoro-82M model supports multiple languages and voices.
**Goals:**
1. Test various SoPs / pilot calls to find the best match for PF/PM (language, voice, speed).
2. Test configurations for a voice coach.
""")
return
@app.cell
def _():
from pathlib import Path
import numpy as np
import soundfile as sf
from kokoro import KPipeline
# Kokoro outputs audio at 24 kHz
SAMPLE_RATE = 24000
# Setup output directory
notebook_path = Path(__file__).parent if "__file__" in dir() else Path.cwd()
if notebook_path.name == "notebooks":
project_root = notebook_path.parent
else:
project_root = notebook_path
output_dir = project_root / "output" / "tts"
output_dir.mkdir(parents=True, exist_ok=True)
return KPipeline, SAMPLE_RATE, np, output_dir, sf
@app.cell
def _(mo):
mo.md("""
## Configuration
""")
return
@app.cell
def _(mo):
# Interactive controls for TTS configuration
lang_code = mo.ui.dropdown(
options={
"American English": "a",
"British English": "b",
},
value="American English",
label="Language",
)
return (lang_code,)
@app.cell
def _(lang_code, mo):
voice_options = (
{
"Female, warm (default)": "af_heart",
"Female, bright": "af_sky",
"Male, neutral": "am_adam",
"Male, deep": "am_michael",
}
if lang_code.value == "a"
else {
"British Female": "bf_emma",
"British Male": "bm_george",
}
)
voice = mo.ui.dropdown(
options=voice_options,
value="Female, warm (default)" if lang_code.value == "a" else "British Female",
label="Voice",
)
speed = mo.ui.slider(
start=0.5,
stop=2.0,
step=0.1,
value=1.0,
label="Speed",
show_value=True,
)
return speed, voice
@app.cell
def _(lang_code, mo, speed, voice):
mo.hstack([lang_code, voice, speed], justify="start")
return
@app.cell
def _(mo):
mo.md("""
## Text Input
""")
return
@app.cell
def _(mo):
text_input = mo.ui.text_area(
value="Hi! This is a demo of the Kokoro TTS system for the meetSofi voice-coach. GO AROUND. Flaps 15",
label="Text to synthesize",
rows=4,
full_width=True,
)
text_input
return (text_input,)
@app.cell
def _(mo):
mo.md("""
## Generate Speech
""")
return
@app.cell
def _(mo):
generate_button = mo.ui.run_button(
label="🎵 Generate Audio",
kind="success",
)
generate_button
return (generate_button,)
@app.cell
def _(
KPipeline,
SAMPLE_RATE,
generate_button,
lang_code,
mo,
np,
output_dir,
sf,
speed,
text_input,
voice,
):
output_file = None
status_message = None
if generate_button.value:
try:
# Initialize Kokoro pipeline
pipeline = KPipeline(lang_code=lang_code.value)
# Collect audio chunks
audio_chunks = []
for _i, (_gs, _ps, _audio) in enumerate(
pipeline(text_input.value, voice=voice.value, speed=speed.value)
):
audio_chunks.append(_audio)
if not audio_chunks:
raise RuntimeError("Kokoro pipeline produced no audio output.")
# Combine and save
combined = np.concatenate(audio_chunks)
output_file = output_dir / "kokoro_direct_output.wav"
sf.write(str(output_file), combined, SAMPLE_RATE)
status_message = mo.md(
f"""
✅ **Audio generated successfully!**
- Output file: `{output_file}`
- Language: {lang_code.selected_key}
- Voice: {voice.value}
- Speed: {speed.value}x
- Chunks: {len(audio_chunks)}
- Samples: {len(combined)}
"""
)
except Exception as e:
print(e)
status_message = mo.md(f"❌ **Error:** {str(e)}")
(
status_message
if status_message
else mo.md("_Click 'Generate Audio' button above to create speech_")
)
return (output_file,)
@app.cell
def _(mo, output_file):
# Display audio player if file was generated
_audio_player = None
if output_file and output_file.exists():
_audio_player = mo.audio(src=str(output_file))
_audio_player
return
if __name__ == "__main__":
app.run()