import gradio as gr from TTS.api import TTS import tempfile import os # Initialize TTS model_name = "tts_models/en/vctk/vits" tts = TTS(model_name) # Custom speaker labels speaker_labels = { "p225": "Male, Young Adult", "p226": "Female, Middle-Aged", "p227": "Male, Mature Storyteller", "p228": "Female, Young Adult", "p229": "Male, Elderly Narrator", "p230": "Female, Warm Storyteller", "p231": "Male, Deep Voice", "p232": "Female, Clear Articulation", "p233": "Male, Authoritative", "p234": "Female, Gentle Storyteller" } # Filter available speakers available_speakers = [spk for spk in tts.speakers if spk in speaker_labels] def text_to_speech(text, speaker_name, speed, pitch): try: if not text.strip(): raise ValueError("Please enter some text") # Generate temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: tts.tts_to_file( text=text, speaker=speaker_name, file_path=f.name, speed=speed ) output_path = f.name # Adjust pitch using sox if needed if pitch != 0.0: try: import sox tfm = sox.Transformer() tfm.pitch(pitch) adjusted_file = output_path + "_adjusted.wav" tfm.build_file(output_path, adjusted_file) os.replace(adjusted_file, output_path) except ImportError: print("Sox not installed; skipping pitch adjustment.") return output_path except Exception as e: raise gr.Error(f"Error generating speech: {str(e)}") def create_download_link(audio_file): if audio_file and os.path.exists(audio_file): return gr.update(visible=True, value=audio_file) return gr.update(visible=False) with gr.Blocks(title="Storytelling TTS App") as app: gr.Markdown("# 🎙️ Professional Storytelling Text-to-Speech") gr.Markdown("Convert your text into narrated audio using expressive voices. Ideal for audiobooks, storytelling, and podcast narration.") with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Enter your story text", lines=8, placeholder="Once upon a time..." ) speaker = gr.Dropdown( choices=available_speakers, label="Narrator Voice", value="p227", format_func=lambda x: speaker_labels[x] ) with gr.Accordion("🎛️ Voice Adjustment", open=True): speed = gr.Slider( minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speaking Rate", info="1.0 = normal speed" ) pitch = gr.Slider( minimum=-5.0, maximum=5.0, value=0.0, step=0.5, label="Pitch Shift (in semitones)", info="0 = normal, positive = higher pitch" ) generate_btn = gr.Button("🎧 Generate Narration", variant="primary") with gr.Column(): audio_output = gr.Audio( label="Generated Narration", type="filepath", elem_classes=["output-audio"] ) download_button = gr.DownloadButton( label="Download Audio", visible=False ) with gr.Accordion("🎤 Preview Narrator Voices (Samples Coming Soon)", open=False): gr.Markdown("Previews will be available here once sample audios are added.") for speaker_id in available_speakers[:3]: gr.Audio( value=None, label=speaker_labels[speaker_id], visible=False # Set to True and provide file path or URL to enable ) generate_btn.click( fn=text_to_speech, inputs=[text_input, speaker, speed, pitch], outputs=audio_output ).then( fn=create_download_link, inputs=audio_output, outputs=download_button ) gr.Examples( examples=[ ["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0], ["In a quiet village nestled between the mountains, a young girl discovered a secret that would change everything.", "p234", 1.0, 0.5], ["The detective examined the clue carefully, knowing this small piece of evidence could crack the entire case wide open.", "p231", 1.1, -1.0] ], inputs=[text_input, speaker, speed, pitch], outputs=audio_output, fn=text_to_speech, cache_examples=False ) if __name__ == "__main__": try: import sox except ImportError: print("Consider installing sox for pitch adjustment: pip install sox") app.launch()