import gradio as gr from TTS.api import TTS from pydub import AudioSegment import tempfile import os # Load Hugging Face TTS model tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) def text_to_speech_hf(text, rate, pitch): if not text.strip(): return None, "Please enter text to convert." try: # Temporary WAV file tmp_wav = "/tmp/output.wav" tmp_mp3 = "/tmp/output.mp3" # Adjust rate (speed). Note: pitch adjustment not directly supported speed = 1.0 + (rate / 100.0) # rate -50 to +50 tts.tts_to_file(text=text, file_path=tmp_wav, speaker=tts.speakers[0], speed=speed) # Convert WAV to MP3 audio = AudioSegment.from_wav(tmp_wav) audio.export(tmp_mp3, format="mp3") return tmp_mp3, None except Exception as e: return None, f"TTS generation failed: {e}" with gr.Blocks(analytics_enabled=False) as demo: gr.Markdown("# 🎙️ Hugging Face TTS Text-to-Speech") with gr.Row(): with gr.Column(scale=1): gr.Markdown("## Text-to-Speech with Hugging Face TTS") gr.Markdown(""" Convert text to speech using Hugging Face TTS model. Adjust speech rate: 0 is default, positive values increase speed, negative values decrease. """) gr.HTML("""

Looking for more features?

You can upgrade to advanced versions that include:

Subtitle Support: Input SRT format or TXT
File Upload: Easily upload text files
MP3 Output: Generate audio in multiple formats

Try New Version ➔

""") with gr.Column(scale=1): gr.HTML("""

Turn Your Text Into Professional Videos!

✅ 40+ languages and 300+ voices supported
✅ Custom backgrounds, music, and visual effects
✅ Create engaging video content from simple text
✅ Perfect for educators, content creators, and marketers

🎬

Try Text-to-Video ➔

""") with gr.Row(): with gr.Column(): text_input = gr.Textbox(label="Input Text", lines=5) rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1) pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1) generate_btn = gr.Button("Generate Speech", variant="primary") audio_output = gr.Audio(label="Generated Audio", type="filepath") warning_md = gr.Markdown(label="Warning", visible=False) generate_btn.click( fn=text_to_speech_hf, inputs=[text_input, rate_slider, pitch_slider], outputs=[audio_output, warning_md] ) gr.Markdown("Experience the power of Hugging Face TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for more creative possibilities!") demo.queue() demo.launch(show_api=False)