File size: 5,037 Bytes
cc74df0
3915525
 
3681b2d
3915525
cc74df0
 
3915525
 
cc74df0
20c432f
 
 
 
 
 
 
 
 
 
 
 
3915525
cc74df0
20c432f
 
 
3915525
3681b2d
 
 
cc74df0
3915525
20c432f
 
 
 
cc74df0
20c432f
cc74df0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3915525
3681b2d
3915525
3681b2d
cc74df0
 
 
3681b2d
20c432f
cc74df0
 
 
3681b2d
 
cc74df0
 
 
 
 
 
20c432f
 
 
cc74df0
20c432f
 
cc74df0
 
20c432f
cc74df0
20c432f
 
cc74df0
20c432f
 
cc74df0
20c432f
cc74df0
 
20c432f
cc74df0
 
 
3681b2d
20c432f
cc74df0
20c432f
 
 
cc74df0
 
 
 
 
 
 
 
 
 
 
 
 
3681b2d
 
20c432f
3681b2d
 
 
 
cc74df0
3681b2d
cc74df0
3681b2d
 
20c432f
 
 
3681b2d
20c432f
3681b2d
 
20c432f
3681b2d
3915525
 
20c432f
 
 
 
 
3681b2d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import gradio as gr 
from TTS.api import TTS
import tempfile
import os

# Initialize TTS
model_name = "tts_models/en/vctk/vits"
tts = TTS(model_name)

# Custom speaker labels
speaker_labels = {
    "p225": "Male, Young Adult",
    "p226": "Female, Middle-Aged",
    "p227": "Male, Mature Storyteller",
    "p228": "Female, Young Adult",
    "p229": "Male, Elderly Narrator",
    "p230": "Female, Warm Storyteller",
    "p231": "Male, Deep Voice",
    "p232": "Female, Clear Articulation",
    "p233": "Male, Authoritative",
    "p234": "Female, Gentle Storyteller"
}

# Filter available speakers
available_speakers = [spk for spk in tts.speakers if spk in speaker_labels]

def text_to_speech(text, speaker_name, speed, pitch):
    try:
        if not text.strip():
            raise ValueError("Please enter some text")
        
        # Generate temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
            tts.tts_to_file(
                text=text,
                speaker=speaker_name,
                file_path=f.name,
                speed=speed
            )
            output_path = f.name

        # Adjust pitch using sox if needed
        if pitch != 0.0:
            try:
                import sox
                tfm = sox.Transformer()
                tfm.pitch(pitch)
                adjusted_file = output_path + "_adjusted.wav"
                tfm.build_file(output_path, adjusted_file)
                os.replace(adjusted_file, output_path)
            except ImportError:
                print("Sox not installed; skipping pitch adjustment.")

        return output_path

    except Exception as e:
        raise gr.Error(f"Error generating speech: {str(e)}")

def create_download_link(audio_file):
    if audio_file and os.path.exists(audio_file):
        return gr.update(visible=True, value=audio_file)
    return gr.update(visible=False)

with gr.Blocks(title="Storytelling TTS App") as app:
    gr.Markdown("# ๐ŸŽ™๏ธ Professional Storytelling Text-to-Speech")
    gr.Markdown("Convert your text into narrated audio using expressive voices. Ideal for audiobooks, storytelling, and podcast narration.")

    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(
                label="Enter your story text",
                lines=8,
                placeholder="Once upon a time..."
            )

            speaker = gr.Dropdown(
                choices=available_speakers,
                label="Narrator Voice",
                value="p227",
                format_func=lambda x: speaker_labels[x]
            )

            with gr.Accordion("๐ŸŽ›๏ธ Voice Adjustment", open=True):
                speed = gr.Slider(
                    minimum=0.5, maximum=2.0,
                    value=1.0, step=0.1,
                    label="Speaking Rate",
                    info="1.0 = normal speed"
                )
                pitch = gr.Slider(
                    minimum=-5.0, maximum=5.0,
                    value=0.0, step=0.5,
                    label="Pitch Shift (in semitones)",
                    info="0 = normal, positive = higher pitch"
                )

            generate_btn = gr.Button("๐ŸŽง Generate Narration", variant="primary")

        with gr.Column():
            audio_output = gr.Audio(
                label="Generated Narration",
                type="filepath",
                elem_classes=["output-audio"]
            )
            download_button = gr.DownloadButton(
                label="Download Audio", visible=False
            )

    with gr.Accordion("๐ŸŽค Preview Narrator Voices (Samples Coming Soon)", open=False):
        gr.Markdown("Previews will be available here once sample audios are added.")
        for speaker_id in available_speakers[:3]:
            gr.Audio(
                value=None,
                label=speaker_labels[speaker_id],
                visible=False  # Set to True and provide file path or URL to enable
            )

    generate_btn.click(
        fn=text_to_speech,
        inputs=[text_input, speaker, speed, pitch],
        outputs=audio_output
    ).then(
        fn=create_download_link,
        inputs=audio_output,
        outputs=download_button
    )

    gr.Examples(
        examples=[
            ["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0],
            ["In a quiet village nestled between the mountains, a young girl discovered a secret that would change everything.", "p234", 1.0, 0.5],
            ["The detective examined the clue carefully, knowing this small piece of evidence could crack the entire case wide open.", "p231", 1.1, -1.0]
        ],
        inputs=[text_input, speaker, speed, pitch],
        outputs=audio_output,
        fn=text_to_speech,
        cache_examples=False
    )

if __name__ == "__main__":
    try:
        import sox
    except ImportError:
        print("Consider installing sox for pitch adjustment: pip install sox")
    
    app.launch()