Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from TTS.api import TTS | |
| import os | |
| import tempfile | |
| os.environ["COQUI_TOS_AGREED"] = "1" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Initialize TTS model | |
| def load_tts_model(): | |
| return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) | |
| tts = load_tts_model() | |
| # Celebrity voices (example list, you may want to expand or modify this) | |
| celebrity_voices = { | |
| "Morgan Freeman": "path/to/morgan_freeman_sample.wav", | |
| "Scarlett Johansson": "path/to/scarlett_johansson_sample.wav", | |
| "David Attenborough": "path/to/david_attenborough_sample.wav", | |
| } | |
| def tts_generate(text, voice, language): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: | |
| temp_audio_path = temp_audio.name | |
| tts.tts_to_file( | |
| text=text, | |
| speaker_wav=celebrity_voices[voice], | |
| language=language, | |
| file_path=temp_audio_path | |
| ) | |
| return temp_audio_path | |
| def clone_voice(text, audio_file, language): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: | |
| temp_audio_path = temp_audio.name | |
| tts.tts_to_file( | |
| text=text, | |
| speaker_wav=audio_file, | |
| language=language, | |
| file_path=temp_audio_path | |
| ) | |
| return temp_audio_path | |
| # Placeholder function for Talking Image tab | |
| def talking_image_placeholder(): | |
| return "Talking Image functionality not implemented yet." | |
| # Define Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Advanced Voice Synthesis") | |
| with gr.Tabs(): | |
| with gr.TabItem("TTS"): | |
| with gr.Row(): | |
| tts_text = gr.Textbox(label="Text to speak") | |
| tts_voice = gr.Dropdown(choices=list(celebrity_voices.keys()), label="Celebrity Voice") | |
| tts_language = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en") | |
| tts_generate_btn = gr.Button("Generate") | |
| tts_output = gr.Audio(label="Generated Audio") | |
| tts_generate_btn.click( | |
| tts_generate, | |
| inputs=[tts_text, tts_voice, tts_language], | |
| outputs=tts_output | |
| ) | |
| with gr.TabItem("Talking Image"): | |
| gr.Markdown("Talking Image functionality coming soon!") | |
| with gr.TabItem("Clone Voice"): | |
| with gr.Row(): | |
| clone_text = gr.Textbox(label="Text to speak") | |
| clone_audio = gr.Audio(label="Voice reference audio file", type="filepath") | |
| clone_language = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en") | |
| clone_generate_btn = gr.Button("Generate") | |
| clone_output = gr.Audio(label="Generated Audio") | |
| clone_generate_btn.click( | |
| clone_voice, | |
| inputs=[clone_text, clone_audio, clone_language], | |
| outputs=clone_output | |
| ) | |
| # Launch the interface | |
| demo.launch() | |
| # Clean up temporary files (this will run after the Gradio server is closed) | |
| for file in os.listdir(): | |
| if file.endswith('.wav') and file.startswith('tmp'): | |
| os.remove(file) |