| | import spaces |
| | import gradio as gr |
| | from f5_tts.infer.utils_infer import remove_silence_for_generated_wav |
| | from f5_tts.api import F5TTS |
| | import tempfile |
| | import os |
| | import requests |
| | import gdown |
| | import zipfile |
| | from pathlib import Path |
| |
|
| | |
| | f5tts = F5TTS() |
| |
|
| | @spaces.GPU |
| | def run_tts(ref_audio, ref_text, gen_text, remove_silence=False): |
| | output_wav_path = tempfile.mktemp(suffix=".wav") |
| | wav, sr, _ = f5tts.infer( |
| | ref_file=ref_audio, |
| | ref_text=ref_text, |
| | gen_text=gen_text, |
| | file_wave=output_wav_path, |
| | remove_silence=remove_silence, |
| | ) |
| | return output_wav_path |
| |
|
| | def download_voice(voice_url, voice_name, progress=gr.Progress()): |
| | """Download and setup a voice from URL""" |
| | if not voice_url or not voice_name: |
| | return "Please provide both URL and voice name." |
| | |
| | base_path = "downloaded_voices" |
| | os.makedirs(base_path, exist_ok=True) |
| | |
| | |
| | is_huggingface = "huggingface.co" in voice_url |
| | is_google_drive = "drive.google.com" in voice_url |
| | |
| | if not (is_huggingface or is_google_drive): |
| | return "Unsupported URL. Only Hugging Face and Google Drive links are supported." |
| | |
| | |
| | voice_dir = os.path.join(base_path, voice_name) |
| | os.makedirs(voice_dir, exist_ok=True) |
| | |
| | |
| | zip_path = os.path.join(voice_dir, f"{voice_name}.zip") |
| | |
| | try: |
| | if is_huggingface: |
| | progress(0, desc="Downloading from Hugging Face...") |
| | response = requests.get(voice_url, stream=True) |
| | response.raise_for_status() |
| | total_size = int(response.headers.get('content-length', 0)) |
| | |
| | with open(zip_path, 'wb') as f: |
| | downloaded = 0 |
| | for chunk in response.iter_content(chunk_size=8192): |
| | if chunk: |
| | f.write(chunk) |
| | downloaded += len(chunk) |
| | if total_size > 0: |
| | progress(downloaded / total_size, desc=f"Downloading: {downloaded//1024}KB/{total_size//1024}KB") |
| | elif is_google_drive: |
| | progress(0, desc="Downloading from Google Drive...") |
| | gdown.download(url=voice_url, output=zip_path, quiet=False, fuzzy=True) |
| | |
| | |
| | progress(0.8, desc="Extracting files...") |
| | with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
| | zip_ref.extractall(voice_dir) |
| | |
| | |
| | if os.path.exists(zip_path): |
| | os.remove(zip_path) |
| | |
| | |
| | if not os.path.exists(voice_dir) or len(os.listdir(voice_dir)) == 0: |
| | return "Voice directory is empty after extraction. Download may have failed." |
| | |
| | |
| | files = os.listdir(voice_dir) |
| | file_list = "\n".join([f" - {file}" for file in files]) |
| | |
| | return f"✅ Voice '{voice_name}' successfully downloaded!\n📁 Location: {voice_dir}\n📋 Files:\n{file_list}" |
| | |
| | except Exception as e: |
| | |
| | if os.path.exists(voice_dir): |
| | try: |
| | if os.path.exists(zip_path): |
| | os.remove(zip_path) |
| | |
| | except: |
| | pass |
| | return f"❌ Error downloading voice: {str(e)}" |
| |
|
| | def list_available_voices(): |
| | """List available downloaded voices""" |
| | base_path = "downloaded_voices" |
| | if not os.path.exists(base_path): |
| | return "No voices downloaded yet." |
| | |
| | voices = [] |
| | for item in os.listdir(base_path): |
| | item_path = os.path.join(base_path, item) |
| | if os.path.isdir(item_path): |
| | files = os.listdir(item_path) |
| | voices.append(f"🎤 **{item}**\n📍 Path: {item_path}\n📋 Files: {', '.join(files)}\n") |
| | |
| | if not voices: |
| | return "No voices found in the downloaded_voices directory." |
| | |
| | return "\n".join(voices) |
| |
|
| | def load_voice_audio(voice_name, audio_file): |
| | """Load audio from downloaded voice""" |
| | base_path = "downloaded_voices" |
| | voice_path = os.path.join(base_path, voice_name) |
| | |
| | if not os.path.exists(voice_path): |
| | return None, f"Voice '{voice_name}' not found." |
| | |
| | audio_path = os.path.join(voice_path, audio_file) |
| | if not os.path.exists(audio_path): |
| | return None, f"Audio file '{audio_file}' not found in voice '{voice_name}' directory." |
| | |
| | return audio_path, f"✅ Loaded audio: {audio_file} from voice '{voice_name}'" |
| |
|
| | |
| | with gr.Blocks(title="🗣️ F5-TTS Demo with Voice Download") as demo: |
| | gr.Markdown("# 🗣️ F5-TTS Demo with Voice Management") |
| | gr.Markdown("Upload a reference voice, give reference and generation text, and hear it in the same voice! Plus, download pre-made voices from Hugging Face or Google Drive.") |
| | |
| | with gr.Tabs(): |
| | with gr.TabItem("🔊 Generate Speech"): |
| | with gr.Row(): |
| | with gr.Column(): |
| | ref_audio = gr.Audio(label="Reference Audio", type="filepath") |
| | ref_text = gr.Textbox( |
| | label="Reference Text", |
| | placeholder="some call me nature, others call me mother nature.", |
| | lines=3 |
| | ) |
| | gen_text = gr.Textbox( |
| | label="Generation Text", |
| | placeholder="I don't really care what you call me...", |
| | lines=5 |
| | ) |
| | remove_silence = gr.Checkbox(label="Remove Silence from Output?", value=False) |
| | generate_btn = gr.Button("Generate Speech", variant="primary") |
| | |
| | with gr.Column(): |
| | output_audio = gr.Audio(label="Generated Speech") |
| | spectrogram = gr.Image(label="Spectrogram (if available)") |
| | |
| | generate_btn.click( |
| | fn=run_tts, |
| | inputs=[ref_audio, ref_text, gen_text, remove_silence], |
| | outputs=[output_audio] |
| | ) |
| | |
| | with gr.TabItem("📥 Download Voices"): |
| | gr.Markdown("## 📥 Download Pre-made Voices") |
| | gr.Markdown("Download voices from Hugging Face or Google Drive. The voice should be in ZIP format containing audio files and metadata.") |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | voice_url = gr.Textbox( |
| | label="Voice URL (Hugging Face or Google Drive)", |
| | placeholder="https://huggingface.co/Chouio/Adam/resolve/main/AdamDefinitive.zip", |
| | lines=2 |
| | ) |
| | voice_name = gr.Textbox( |
| | label="Voice Name (for folder)", |
| | placeholder="my_voice" |
| | ) |
| | download_btn = gr.Button("Download Voice", variant="primary") |
| | download_status = gr.Textbox(label="Status", interactive=False) |
| | |
| | with gr.Column(): |
| | gr.Markdown("### 📋 Available Voices") |
| | refresh_btn = gr.Button("Refresh List") |
| | voices_list = gr.Markdown(label="Available Voices", value="No voices downloaded yet.") |
| | |
| | download_btn.click( |
| | fn=download_voice, |
| | inputs=[voice_url, voice_name], |
| | outputs=[download_status] |
| | ) |
| | |
| | refresh_btn.click( |
| | fn=list_available_voices, |
| | outputs=[voices_list] |
| | ) |
| | |
| | with gr.TabItem("🎭 Use Downloaded Voice"): |
| | gr.Markdown("## 🎭 Use Downloaded Voice for TTS") |
| | gr.Markdown("Select a downloaded voice and use its audio files for reference.") |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | |
| | available_voices = gr.Dropdown(label="Select Voice", choices=[]) |
| | refresh_voices_btn = gr.Button("Refresh Voices") |
| | |
| | |
| | voice_audio_files = gr.Dropdown(label="Select Audio File", choices=[]) |
| | load_audio_btn = gr.Button("Load Selected Audio") |
| | |
| | |
| | ref_text_downloaded = gr.Textbox( |
| | label="Reference Text", |
| | placeholder="Reference text will be auto-filled or you can enter manually", |
| | lines=3 |
| | ) |
| | |
| | |
| | gen_text_downloaded = gr.Textbox( |
| | label="Generation Text", |
| | placeholder="Enter text to generate in this voice...", |
| | lines=5 |
| | ) |
| | |
| | remove_silence_downloaded = gr.Checkbox(label="Remove Silence from Output?", value=False) |
| | generate_from_voice_btn = gr.Button("Generate with This Voice", variant="primary") |
| | |
| | with gr.Column(): |
| | loaded_audio = gr.Audio(label="Loaded Reference Audio") |
| | output_audio_downloaded = gr.Audio(label="Generated Speech") |
| | |
| | |
| | def refresh_voice_list(): |
| | base_path = "downloaded_voices" |
| | if not os.path.exists(base_path): |
| | return [] |
| | |
| | voices = [] |
| | for item in os.listdir(base_path): |
| | if os.path.isdir(os.path.join(base_path, item)): |
| | voices.append(item) |
| | return voices |
| | |
| | refresh_voices_btn.click( |
| | fn=refresh_voice_list, |
| | outputs=[available_voices] |
| | ) |
| | |
| | |
| | def update_audio_files(voice_name): |
| | if not voice_name: |
| | return [] |
| | |
| | base_path = "downloaded_voices" |
| | voice_path = os.path.join(base_path, voice_name) |
| | |
| | if not os.path.exists(voice_path): |
| | return [] |
| | |
| | audio_files = [] |
| | for file in os.listdir(voice_path): |
| | if file.lower().endswith(('.wav', '.mp3', '.flac', '.ogg')): |
| | audio_files.append(file) |
| | return audio_files |
| | |
| | available_voices.change( |
| | fn=update_audio_files, |
| | inputs=[available_voices], |
| | outputs=[voice_audio_files] |
| | ) |
| | |
| | |
| | load_audio_btn.click( |
| | fn=load_voice_audio, |
| | inputs=[available_voices, voice_audio_files], |
| | outputs=[loaded_audio, ref_text_downloaded] |
| | ) |
| | |
| | |
| | generate_from_voice_btn.click( |
| | fn=run_tts, |
| | inputs=[loaded_audio, ref_text_downloaded, gen_text_downloaded, remove_silence_downloaded], |
| | outputs=[output_audio_downloaded] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |