import gradio as gr from TTS.api import TTS from pydub import AudioSegment import tempfile import os # Load Hugging Face TTS model tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) def text_to_speech_hf(text, rate, pitch): if not text.strip(): return None, "Please enter text to convert." try: # Temporary WAV file tmp_wav = "/tmp/output.wav" tmp_mp3 = "/tmp/output.mp3" # Adjust rate (speed). Note: pitch adjustment not directly supported speed = 1.0 + (rate / 100.0) # rate -50 to +50 tts.tts_to_file(text=text, file_path=tmp_wav, speaker=tts.speakers[0], speed=speed) # Convert WAV to MP3 audio = AudioSegment.from_wav(tmp_wav) audio.export(tmp_mp3, format="mp3") return tmp_mp3, None except Exception as e: return None, f"TTS generation failed: {e}" with gr.Blocks(analytics_enabled=False) as demo: gr.Markdown("# 🎙️ Hugging Face TTS Text-to-Speech") with gr.Row(): with gr.Column(scale=1): gr.Markdown("## Text-to-Speech with Hugging Face TTS") gr.Markdown(""" Convert text to speech using Hugging Face TTS model. Adjust speech rate: 0 is default, positive values increase speed, negative values decrease. """) gr.HTML("""
Looking for more features?
You can upgrade to advanced versions that include: