import marimo __generated_with = "0.20.4" app = marimo.App(width="medium") @app.cell def _(): import marimo as mo return (mo,) @app.cell def _(mo): mo.md(""" # Kokoro TTS Demo This notebook demonstrates how to use the kokoro library directly for text-to-speech synthesis. The Kokoro-82M model supports multiple languages and voices. **Goals:** 1. Test various SoPs / pilot calls to find the best match for PF/PM (language, voice, speed). 2. Test configurations for a voice coach. """) return @app.cell def _(): from pathlib import Path import numpy as np import soundfile as sf from kokoro import KPipeline # Kokoro outputs audio at 24 kHz SAMPLE_RATE = 24000 # Setup output directory notebook_path = Path(__file__).parent if "__file__" in dir() else Path.cwd() if notebook_path.name == "notebooks": project_root = notebook_path.parent else: project_root = notebook_path output_dir = project_root / "output" / "tts" output_dir.mkdir(parents=True, exist_ok=True) return KPipeline, SAMPLE_RATE, np, output_dir, sf @app.cell def _(mo): mo.md(""" ## Configuration """) return @app.cell def _(mo): # Interactive controls for TTS configuration lang_code = mo.ui.dropdown( options={ "American English": "a", "British English": "b", }, value="American English", label="Language", ) return (lang_code,) @app.cell def _(lang_code, mo): voice_options = ( { "Female, warm (default)": "af_heart", "Female, bright": "af_sky", "Male, neutral": "am_adam", "Male, deep": "am_michael", } if lang_code.value == "a" else { "British Female": "bf_emma", "British Male": "bm_george", } ) voice = mo.ui.dropdown( options=voice_options, value="Female, warm (default)" if lang_code.value == "a" else "British Female", label="Voice", ) speed = mo.ui.slider( start=0.5, stop=2.0, step=0.1, value=1.0, label="Speed", show_value=True, ) return speed, voice @app.cell def _(lang_code, mo, speed, voice): mo.hstack([lang_code, voice, speed], justify="start") return @app.cell def _(mo): mo.md(""" ## Text Input """) return @app.cell def _(mo): text_input = mo.ui.text_area( value="Hi! This is a demo of the Kokoro TTS system for the meetSofi voice-coach. GO AROUND. Flaps 15", label="Text to synthesize", rows=4, full_width=True, ) text_input return (text_input,) @app.cell def _(mo): mo.md(""" ## Generate Speech """) return @app.cell def _(mo): generate_button = mo.ui.run_button( label="🎵 Generate Audio", kind="success", ) generate_button return (generate_button,) @app.cell def _( KPipeline, SAMPLE_RATE, generate_button, lang_code, mo, np, output_dir, sf, speed, text_input, voice, ): output_file = None status_message = None if generate_button.value: try: # Initialize Kokoro pipeline pipeline = KPipeline(lang_code=lang_code.value) # Collect audio chunks audio_chunks = [] for _i, (_gs, _ps, _audio) in enumerate( pipeline(text_input.value, voice=voice.value, speed=speed.value) ): audio_chunks.append(_audio) if not audio_chunks: raise RuntimeError("Kokoro pipeline produced no audio output.") # Combine and save combined = np.concatenate(audio_chunks) output_file = output_dir / "kokoro_direct_output.wav" sf.write(str(output_file), combined, SAMPLE_RATE) status_message = mo.md( f""" ✅ **Audio generated successfully!** - Output file: `{output_file}` - Language: {lang_code.selected_key} - Voice: {voice.value} - Speed: {speed.value}x - Chunks: {len(audio_chunks)} - Samples: {len(combined)} """ ) except Exception as e: print(e) status_message = mo.md(f"❌ **Error:** {str(e)}") ( status_message if status_message else mo.md("_Click 'Generate Audio' button above to create speech_") ) return (output_file,) @app.cell def _(mo, output_file): # Display audio player if file was generated _audio_player = None if output_file and output_file.exists(): _audio_player = mo.audio(src=str(output_file)) _audio_player return if __name__ == "__main__": app.run()