File size: 5,206 Bytes
0ea333c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
from TTS.api import TTS
from pydub import AudioSegment
import tempfile
import os

# Load Hugging Face TTS model
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)

def text_to_speech_hf(text, rate, pitch):
    if not text.strip():
        return None, "Please enter text to convert."
    try:
        # Temporary WAV file
        tmp_wav = "/tmp/output.wav"
        tmp_mp3 = "/tmp/output.mp3"

        # Adjust rate (speed). Note: pitch adjustment not directly supported
        speed = 1.0 + (rate / 100.0)  # rate -50 to +50
        tts.tts_to_file(text=text, file_path=tmp_wav, speaker=tts.speakers[0], speed=speed)

        # Convert WAV to MP3
        audio = AudioSegment.from_wav(tmp_wav)
        audio.export(tmp_mp3, format="mp3")

        return tmp_mp3, None
    except Exception as e:
        return None, f"TTS generation failed: {e}"

with gr.Blocks(analytics_enabled=False) as demo:
    gr.Markdown("# πŸŽ™οΈ Hugging Face TTS Text-to-Speech")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("## Text-to-Speech with Hugging Face TTS")
            gr.Markdown("""
                Convert text to speech using Hugging Face TTS model.
                Adjust speech rate: 0 is default, positive values increase speed, negative values decrease.
            """)
            
            gr.HTML("""
            <div style="margin: 20px 0; padding: 15px; border: 1px solid #4CAF50; border-radius: 10px; background-color: #f1f8e9;">
                <p style="margin-top: 0;"><b>Looking for more features?</b></p>
                <p>You can upgrade to advanced versions that include:</p>
                <ul>
                    <li><b>Subtitle Support</b>: Input SRT format or TXT</li>
                    <li><b>File Upload</b>: Easily upload text files</li>
                    <li><b>MP3 Output</b>: Generate audio in multiple formats</li>
                </ul>
                <div style="text-align: center; margin-top: 15px;">
                    <a href="https://text-to-speech.wingetgui.com/" target="_blank" 
                       style="display: inline-block; 
                              background: linear-gradient(45deg, #4CAF50, #8BC34A); 
                              color: white; 
                              padding: 12px 30px; 
                              text-decoration: none; 
                              border-radius: 30px; 
                              font-weight: bold; 
                              font-size: 16px;">Try New Version βž”</a>
                </div>
            </div>
            """)

        with gr.Column(scale=1):
            gr.HTML("""
            <div style="height: 100%; background-color: #f0f8ff; padding: 15px; border-radius: 10px;">
                <h2 style="color: #1e90ff; margin-top: 0;">Turn Your Text Into Professional Videos!</h2>
                <ul style="list-style-type: none; padding-left: 0;">
                    <li>βœ… <b>40+ languages and 300+ voices supported</b></li>
                    <li>βœ… <b>Custom backgrounds, music, and visual effects</b></li>
                    <li>βœ… <b>Create engaging video content from simple text</b></li>
                    <li>βœ… <b>Perfect for educators, content creators, and marketers</b></li>
                </ul>
                <div style="text-align: center; margin-top: 20px;">
                    <span style="font-size: 96px;">🎬</span>
                    <div style="margin-top: 15px;">
                        <a href="https://text2video.wingetgui.com/" target="_blank" 
                           style="display: inline-block; 
                                  background: linear-gradient(45deg, #2196F3, #21CBF3); 
                                  color: white; 
                                  padding: 12px 30px; 
                                  text-decoration: none; 
                                  border-radius: 30px; 
                                  font-weight: bold; 
                                  font-size: 16px;">Try Text-to-Video βž”</a>
                    </div>
                </div>
            </div>
            """)

    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(label="Input Text", lines=5)
            rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1)
            pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
            
            generate_btn = gr.Button("Generate Speech", variant="primary")
            
            audio_output = gr.Audio(label="Generated Audio", type="filepath")
            warning_md = gr.Markdown(label="Warning", visible=False)
            
            generate_btn.click(
                fn=text_to_speech_hf,
                inputs=[text_input, rate_slider, pitch_slider],
                outputs=[audio_output, warning_md]
            )

    gr.Markdown("Experience the power of Hugging Face TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for more creative possibilities!")

demo.queue()
demo.launch(show_api=False)