Spaces:

NCEE-Build-Lab
/

TTS-kokoro-test

Sleeping

File size: 4,912 Bytes

import marimo

__generated_with = "0.20.4"
app = marimo.App(width="medium")


@app.cell
def _():
    import marimo as mo

    return (mo,)


@app.cell
def _(mo):
    mo.md("""
    # Kokoro TTS Demo
    
    This notebook demonstrates how to use the kokoro library directly for text-to-speech synthesis.
    The Kokoro-82M model supports multiple languages and voices.
    
    
    **Goals:**
    1. Test various SoPs / pilot calls to find the best match for PF/PM (language, voice, speed).
    2. Test configurations for a voice coach.
    """)
    return


@app.cell
def _():
    from pathlib import Path
    import numpy as np
    import soundfile as sf
    from kokoro import KPipeline

    # Kokoro outputs audio at 24 kHz
    SAMPLE_RATE = 24000

    # Setup output directory
    notebook_path = Path(__file__).parent if "__file__" in dir() else Path.cwd()
    if notebook_path.name == "notebooks":
        project_root = notebook_path.parent
    else:
        project_root = notebook_path

    output_dir = project_root / "output" / "tts"
    output_dir.mkdir(parents=True, exist_ok=True)
    return KPipeline, SAMPLE_RATE, np, output_dir, sf


@app.cell
def _(mo):
    mo.md("""
    ## Configuration
    """)
    return


@app.cell
def _(mo):
    # Interactive controls for TTS configuration
    lang_code = mo.ui.dropdown(
        options={
            "American English": "a",
            "British English": "b",
        },
        value="American English",
        label="Language",
    )
    return (lang_code,)


@app.cell
def _(lang_code, mo):
    voice_options = (
        {
            "Female, warm (default)": "af_heart",
            "Female, bright": "af_sky",
            "Male, neutral": "am_adam",
            "Male, deep": "am_michael",
        }
        if lang_code.value == "a"
        else {
            "British Female": "bf_emma",
            "British Male": "bm_george",
        }
    )

    voice = mo.ui.dropdown(
        options=voice_options,
        value="Female, warm (default)" if lang_code.value == "a" else "British Female",
        label="Voice",
    )

    speed = mo.ui.slider(
        start=0.5,
        stop=2.0,
        step=0.1,
        value=1.0,
        label="Speed",
        show_value=True,
    )
    return speed, voice


@app.cell
def _(lang_code, mo, speed, voice):
    mo.hstack([lang_code, voice, speed], justify="start")
    return


@app.cell
def _(mo):
    mo.md("""
    ## Text Input
    """)
    return


@app.cell
def _(mo):
    text_input = mo.ui.text_area(
            value="Hi! This is a demo of the Kokoro TTS system for the meetSofi voice-coach. GO AROUND. Flaps 15",
        label="Text to synthesize",
        rows=4,
        full_width=True,
    )
    text_input
    return (text_input,)


@app.cell
def _(mo):
    mo.md("""
    ## Generate Speech
    """)
    return


@app.cell
def _(mo):
    generate_button = mo.ui.run_button(
        label="🎵 Generate Audio",
        kind="success",
    )
    generate_button
    return (generate_button,)


@app.cell
def _(
    KPipeline,
    SAMPLE_RATE,
    generate_button,
    lang_code,
    mo,
    np,
    output_dir,
    sf,
    speed,
    text_input,
    voice,
):
    output_file = None
    status_message = None

    if generate_button.value:
        try:
            # Initialize Kokoro pipeline
            pipeline = KPipeline(lang_code=lang_code.value)

            # Collect audio chunks
            audio_chunks = []
            for _i, (_gs, _ps, _audio) in enumerate(
                pipeline(text_input.value, voice=voice.value, speed=speed.value)
            ):
                audio_chunks.append(_audio)

            if not audio_chunks:
                raise RuntimeError("Kokoro pipeline produced no audio output.")

            # Combine and save
            combined = np.concatenate(audio_chunks)
            output_file = output_dir / "kokoro_direct_output.wav"
            sf.write(str(output_file), combined, SAMPLE_RATE)

            status_message = mo.md(
                f"""
            ✅ **Audio generated successfully!**

            - Output file: `{output_file}`
            - Language: {lang_code.selected_key}
            - Voice: {voice.value}
            - Speed: {speed.value}x
            - Chunks: {len(audio_chunks)}
            - Samples: {len(combined)}
            """
            )
        except Exception as e:
            print(e)
            status_message = mo.md(f"❌ **Error:** {str(e)}")

    (
        status_message
        if status_message
        else mo.md("_Click 'Generate Audio' button above to create speech_")
    )
    return (output_file,)


@app.cell
def _(mo, output_file):
    # Display audio player if file was generated
    _audio_player = None
    if output_file and output_file.exists():
        _audio_player = mo.audio(src=str(output_file))
    _audio_player
    return


if __name__ == "__main__":
    app.run()