Spaces:
Sleeping
Sleeping
| import marimo | |
| __generated_with = "0.20.4" | |
| app = marimo.App(width="medium") | |
| def _(): | |
| import marimo as mo | |
| return (mo,) | |
| def _(mo): | |
| mo.md(""" | |
| # Kokoro TTS Demo | |
| This notebook demonstrates how to use the kokoro library directly for text-to-speech synthesis. | |
| The Kokoro-82M model supports multiple languages and voices. | |
| **Goals:** | |
| 1. Test various SoPs / pilot calls to find the best match for PF/PM (language, voice, speed). | |
| 2. Test configurations for a voice coach. | |
| """) | |
| return | |
| def _(): | |
| from pathlib import Path | |
| import numpy as np | |
| import soundfile as sf | |
| from kokoro import KPipeline | |
| # Kokoro outputs audio at 24 kHz | |
| SAMPLE_RATE = 24000 | |
| # Setup output directory | |
| notebook_path = Path(__file__).parent if "__file__" in dir() else Path.cwd() | |
| if notebook_path.name == "notebooks": | |
| project_root = notebook_path.parent | |
| else: | |
| project_root = notebook_path | |
| output_dir = project_root / "output" / "tts" | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| return KPipeline, SAMPLE_RATE, np, output_dir, sf | |
| def _(mo): | |
| mo.md(""" | |
| ## Configuration | |
| """) | |
| return | |
| def _(mo): | |
| # Interactive controls for TTS configuration | |
| lang_code = mo.ui.dropdown( | |
| options={ | |
| "American English": "a", | |
| "British English": "b", | |
| }, | |
| value="American English", | |
| label="Language", | |
| ) | |
| return (lang_code,) | |
| def _(lang_code, mo): | |
| voice_options = ( | |
| { | |
| "Female, warm (default)": "af_heart", | |
| "Female, bright": "af_sky", | |
| "Male, neutral": "am_adam", | |
| "Male, deep": "am_michael", | |
| } | |
| if lang_code.value == "a" | |
| else { | |
| "British Female": "bf_emma", | |
| "British Male": "bm_george", | |
| } | |
| ) | |
| voice = mo.ui.dropdown( | |
| options=voice_options, | |
| value="Female, warm (default)" if lang_code.value == "a" else "British Female", | |
| label="Voice", | |
| ) | |
| speed = mo.ui.slider( | |
| start=0.5, | |
| stop=2.0, | |
| step=0.1, | |
| value=1.0, | |
| label="Speed", | |
| show_value=True, | |
| ) | |
| return speed, voice | |
| def _(lang_code, mo, speed, voice): | |
| mo.hstack([lang_code, voice, speed], justify="start") | |
| return | |
| def _(mo): | |
| mo.md(""" | |
| ## Text Input | |
| """) | |
| return | |
| def _(mo): | |
| text_input = mo.ui.text_area( | |
| value="Hi! This is a demo of the Kokoro TTS system for the meetSofi voice-coach. GO AROUND. Flaps 15", | |
| label="Text to synthesize", | |
| rows=4, | |
| full_width=True, | |
| ) | |
| text_input | |
| return (text_input,) | |
| def _(mo): | |
| mo.md(""" | |
| ## Generate Speech | |
| """) | |
| return | |
| def _(mo): | |
| generate_button = mo.ui.run_button( | |
| label="🎵 Generate Audio", | |
| kind="success", | |
| ) | |
| generate_button | |
| return (generate_button,) | |
| def _( | |
| KPipeline, | |
| SAMPLE_RATE, | |
| generate_button, | |
| lang_code, | |
| mo, | |
| np, | |
| output_dir, | |
| sf, | |
| speed, | |
| text_input, | |
| voice, | |
| ): | |
| output_file = None | |
| status_message = None | |
| if generate_button.value: | |
| try: | |
| # Initialize Kokoro pipeline | |
| pipeline = KPipeline(lang_code=lang_code.value) | |
| # Collect audio chunks | |
| audio_chunks = [] | |
| for _i, (_gs, _ps, _audio) in enumerate( | |
| pipeline(text_input.value, voice=voice.value, speed=speed.value) | |
| ): | |
| audio_chunks.append(_audio) | |
| if not audio_chunks: | |
| raise RuntimeError("Kokoro pipeline produced no audio output.") | |
| # Combine and save | |
| combined = np.concatenate(audio_chunks) | |
| output_file = output_dir / "kokoro_direct_output.wav" | |
| sf.write(str(output_file), combined, SAMPLE_RATE) | |
| status_message = mo.md( | |
| f""" | |
| ✅ **Audio generated successfully!** | |
| - Output file: `{output_file}` | |
| - Language: {lang_code.selected_key} | |
| - Voice: {voice.value} | |
| - Speed: {speed.value}x | |
| - Chunks: {len(audio_chunks)} | |
| - Samples: {len(combined)} | |
| """ | |
| ) | |
| except Exception as e: | |
| print(e) | |
| status_message = mo.md(f"❌ **Error:** {str(e)}") | |
| ( | |
| status_message | |
| if status_message | |
| else mo.md("_Click 'Generate Audio' button above to create speech_") | |
| ) | |
| return (output_file,) | |
| def _(mo, output_file): | |
| # Display audio player if file was generated | |
| _audio_player = None | |
| if output_file and output_file.exists(): | |
| _audio_player = mo.audio(src=str(output_file)) | |
| _audio_player | |
| return | |
| if __name__ == "__main__": | |
| app.run() | |