Spaces:

Aranwer
/

TTS

Runtime error

TTS

File size: 5,037 Bytes

cc74df0
3915525
 
3681b2d
3915525
cc74df0
 
3915525
 
cc74df0
20c432f
 
 
 
 
 
 
 
 
 
 
 
3915525
cc74df0
20c432f
 
 
3915525
3681b2d
 
 
cc74df0
3915525
20c432f
 
 
 
cc74df0
20c432f
cc74df0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3915525
3681b2d
3915525
3681b2d
cc74df0
 
 
3681b2d
20c432f
cc74df0
 
 
3681b2d
 
cc74df0
 
 
 
 
 
20c432f
 
 
cc74df0
20c432f
 
cc74df0
 
20c432f
cc74df0
20c432f
 
cc74df0
20c432f
 
cc74df0
20c432f
cc74df0
 
20c432f
cc74df0
 
 
3681b2d
20c432f
cc74df0
20c432f
 
 
cc74df0
 
 
 
 
 
 
 
 
 
 
 
 
3681b2d
 
20c432f
3681b2d
 
 
 
cc74df0
3681b2d
cc74df0
3681b2d
 
20c432f
 
 
3681b2d
20c432f
3681b2d
 
20c432f
3681b2d
3915525
 
20c432f
 
 
 
 
3681b2d

import gradio as gr 
from TTS.api import TTS
import tempfile
import os

# Initialize TTS
model_name = "tts_models/en/vctk/vits"
tts = TTS(model_name)

# Custom speaker labels
speaker_labels = {
    "p225": "Male, Young Adult",
    "p226": "Female, Middle-Aged",
    "p227": "Male, Mature Storyteller",
    "p228": "Female, Young Adult",
    "p229": "Male, Elderly Narrator",
    "p230": "Female, Warm Storyteller",
    "p231": "Male, Deep Voice",
    "p232": "Female, Clear Articulation",
    "p233": "Male, Authoritative",
    "p234": "Female, Gentle Storyteller"
}

# Filter available speakers
available_speakers = [spk for spk in tts.speakers if spk in speaker_labels]

def text_to_speech(text, speaker_name, speed, pitch):
    try:
        if not text.strip():
            raise ValueError("Please enter some text")
        
        # Generate temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
            tts.tts_to_file(
                text=text,
                speaker=speaker_name,
                file_path=f.name,
                speed=speed
            )
            output_path = f.name

        # Adjust pitch using sox if needed
        if pitch != 0.0:
            try:
                import sox
                tfm = sox.Transformer()
                tfm.pitch(pitch)
                adjusted_file = output_path + "_adjusted.wav"
                tfm.build_file(output_path, adjusted_file)
                os.replace(adjusted_file, output_path)
            except ImportError:
                print("Sox not installed; skipping pitch adjustment.")

        return output_path

    except Exception as e:
        raise gr.Error(f"Error generating speech: {str(e)}")

def create_download_link(audio_file):
    if audio_file and os.path.exists(audio_file):
        return gr.update(visible=True, value=audio_file)
    return gr.update(visible=False)

with gr.Blocks(title="Storytelling TTS App") as app:
    gr.Markdown("# 🎙️ Professional Storytelling Text-to-Speech")
    gr.Markdown("Convert your text into narrated audio using expressive voices. Ideal for audiobooks, storytelling, and podcast narration.")

    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(
                label="Enter your story text",
                lines=8,
                placeholder="Once upon a time..."
            )

            speaker = gr.Dropdown(
                choices=available_speakers,
                label="Narrator Voice",
                value="p227",
                format_func=lambda x: speaker_labels[x]
            )

            with gr.Accordion("🎛️ Voice Adjustment", open=True):
                speed = gr.Slider(
                    minimum=0.5, maximum=2.0,
                    value=1.0, step=0.1,
                    label="Speaking Rate",
                    info="1.0 = normal speed"
                )
                pitch = gr.Slider(
                    minimum=-5.0, maximum=5.0,
                    value=0.0, step=0.5,
                    label="Pitch Shift (in semitones)",
                    info="0 = normal, positive = higher pitch"
                )

            generate_btn = gr.Button("🎧 Generate Narration", variant="primary")

        with gr.Column():
            audio_output = gr.Audio(
                label="Generated Narration",
                type="filepath",
                elem_classes=["output-audio"]
            )
            download_button = gr.DownloadButton(
                label="Download Audio", visible=False
            )

    with gr.Accordion("🎤 Preview Narrator Voices (Samples Coming Soon)", open=False):
        gr.Markdown("Previews will be available here once sample audios are added.")
        for speaker_id in available_speakers[:3]:
            gr.Audio(
                value=None,
                label=speaker_labels[speaker_id],
                visible=False  # Set to True and provide file path or URL to enable
            )

    generate_btn.click(
        fn=text_to_speech,
        inputs=[text_input, speaker, speed, pitch],
        outputs=audio_output
    ).then(
        fn=create_download_link,
        inputs=audio_output,
        outputs=download_button
    )

    gr.Examples(
        examples=[
            ["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0],
            ["In a quiet village nestled between the mountains, a young girl discovered a secret that would change everything.", "p234", 1.0, 0.5],
            ["The detective examined the clue carefully, knowing this small piece of evidence could crack the entire case wide open.", "p231", 1.1, -1.0]
        ],
        inputs=[text_input, speaker, speed, pitch],
        outputs=audio_output,
        fn=text_to_speech,
        cache_examples=False
    )

if __name__ == "__main__":
    try:
        import sox
    except ImportError:
        print("Consider installing sox for pitch adjustment: pip install sox")
    
    app.launch()