E2-F5-TTS

Runtime error

App Files Files Community

Chouio commited on Jan 7

Commit

b744140

verified ·

1 Parent(s): 43f167f

Update app.py

Browse files

Files changed (1) hide show

app.py +279 -17

app.py CHANGED Viewed

@@ -1,25 +1,287 @@
 import requests
 import zipfile
-import os
-def descargar_modelo_rvc(url, destino="ckpts"):
-    os.makedirs(destino, exist_ok=True)
-    zip_path = os.path.join(destino, "modelo_rvc.zip")
-    # Descargar ZIP
-    with requests.get(url, stream=True) as r:
-        with open(zip_path, "wb") as f:
-            f.write(r.content)
-    # Extraer ZIP
-    with zipfile.ZipFile(zip_path, "r") as zip_ref:
-        zip_ref.extractall(destino)
-    # Detectar carpeta extraída
-    contenido = os.listdir(destino)
-    carpetas = [c for c in contenido if os.path.isdir(os.path.join(destino, c))]
-    if not carpetas:
-        return None
-    return os.path.join(destino, carpetas[0])

+import spaces
+import gradio as gr
+from f5_tts.infer.utils_infer import remove_silence_for_generated_wav
+from f5_tts.api import F5TTS
+import tempfile
+import os
 import requests
+import gdown
 import zipfile
+from pathlib import Path
+# Initialize F5TTS
+f5tts = F5TTS()
+@spaces.GPU
+def run_tts(ref_audio, ref_text, gen_text, remove_silence=False):
+    output_wav_path = tempfile.mktemp(suffix=".wav")
+    wav, sr, _ = f5tts.infer(
+        ref_file=ref_audio,
+        ref_text=ref_text,
+        gen_text=gen_text,
+        file_wave=output_wav_path,
+        remove_silence=remove_silence,
+    )
+    return output_wav_path
+def download_voice(voice_url, voice_name, progress=gr.Progress()):
+    """Download and setup a voice from URL"""
+    if not voice_url or not voice_name:
+        return "Please provide both URL and voice name."
+    base_path = "downloaded_voices"
+    os.makedirs(base_path, exist_ok=True)
+    # Determine download type
+    is_huggingface = "huggingface.co" in voice_url
+    is_google_drive = "drive.google.com" in voice_url
+    if not (is_huggingface or is_google_drive):
+        return "Unsupported URL. Only Hugging Face and Google Drive links are supported."
+    # Create voice directory
+    voice_dir = os.path.join(base_path, voice_name)
+    os.makedirs(voice_dir, exist_ok=True)
+    # Download file
+    zip_path = os.path.join(voice_dir, f"{voice_name}.zip")
+    try:
+        if is_huggingface:
+            progress(0, desc="Downloading from Hugging Face...")
+            response = requests.get(voice_url, stream=True)
+            response.raise_for_status()
+            total_size = int(response.headers.get('content-length', 0))
+            with open(zip_path, 'wb') as f:
+                downloaded = 0
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        downloaded += len(chunk)
+                        if total_size > 0:
+                            progress(downloaded / total_size, desc=f"Downloading: {downloaded//1024}KB/{total_size//1024}KB")
+        elif is_google_drive:
+            progress(0, desc="Downloading from Google Drive...")
+            gdown.download(url=voice_url, output=zip_path, quiet=False, fuzzy=True)
+        # Extract ZIP file
+        progress(0.8, desc="Extracting files...")
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(voice_dir)
+        # Remove ZIP file after extraction
+        if os.path.exists(zip_path):
+            os.remove(zip_path)
+        # Check if the voice was properly extracted
+        if not os.path.exists(voice_dir) or len(os.listdir(voice_dir)) == 0:
+            return "Voice directory is empty after extraction. Download may have failed."
+        # List downloaded files
+        files = os.listdir(voice_dir)
+        file_list = "\n".join([f"  - {file}" for file in files])
+        return f"✅ Voice '{voice_name}' successfully downloaded!\n📁 Location: {voice_dir}\n📋 Files:\n{file_list}"
+    except Exception as e:
+        # Clean up on error
+        if os.path.exists(voice_dir):
+            try:
+                if os.path.exists(zip_path):
+                    os.remove(zip_path)
+                # Don't remove the whole directory as it might contain other files
+            except:
+                pass
+        return f"❌ Error downloading voice: {str(e)}"
+def list_available_voices():
+    """List available downloaded voices"""
+    base_path = "downloaded_voices"
+    if not os.path.exists(base_path):
+        return "No voices downloaded yet."
+    voices = []
+    for item in os.listdir(base_path):
+        item_path = os.path.join(base_path, item)
+        if os.path.isdir(item_path):
+            files = os.listdir(item_path)
+            voices.append(f"🎤 **{item}**\n📍 Path: {item_path}\n📋 Files: {', '.join(files)}\n")
+    if not voices:
+        return "No voices found in the downloaded_voices directory."
+    return "\n".join(voices)
+def load_voice_audio(voice_name, audio_file):
+    """Load audio from downloaded voice"""
+    base_path = "downloaded_voices"
+    voice_path = os.path.join(base_path, voice_name)
+    if not os.path.exists(voice_path):
+        return None, f"Voice '{voice_name}' not found."
+    audio_path = os.path.join(voice_path, audio_file)
+    if not os.path.exists(audio_path):
+        return None, f"Audio file '{audio_file}' not found in voice '{voice_name}' directory."
+    return audio_path, f"✅ Loaded audio: {audio_file} from voice '{voice_name}'"
+# Create Gradio interface with tabs
+with gr.Blocks(title="🗣️ F5-TTS Demo with Voice Download") as demo:
+    gr.Markdown("# 🗣️ F5-TTS Demo with Voice Management")
+    gr.Markdown("Upload a reference voice, give reference and generation text, and hear it in the same voice! Plus, download pre-made voices from Hugging Face or Google Drive.")
+    with gr.Tabs():
+        with gr.TabItem("🔊 Generate Speech"):
+            with gr.Row():
+                with gr.Column():
+                    ref_audio = gr.Audio(label="Reference Audio", type="filepath")
+                    ref_text = gr.Textbox(
+                        label="Reference Text",
+                        placeholder="some call me nature, others call me mother nature.",
+                        lines=3
+                    )
+                    gen_text = gr.Textbox(
+                        label="Generation Text",
+                        placeholder="I don't really care what you call me...",
+                        lines=5
+                    )
+                    remove_silence = gr.Checkbox(label="Remove Silence from Output?", value=False)
+                    generate_btn = gr.Button("Generate Speech", variant="primary")
+                with gr.Column():
+                    output_audio = gr.Audio(label="Generated Speech")
+                    spectrogram = gr.Image(label="Spectrogram (if available)")
+            generate_btn.click(
+                fn=run_tts,
+                inputs=[ref_audio, ref_text, gen_text, remove_silence],
+                outputs=[output_audio]
+            )
+        with gr.TabItem("📥 Download Voices"):
+            gr.Markdown("## 📥 Download Pre-made Voices")
+            gr.Markdown("Download voices from Hugging Face or Google Drive. The voice should be in ZIP format containing audio files and metadata.")
+            with gr.Row():
+                with gr.Column():
+                    voice_url = gr.Textbox(
+                        label="Voice URL (Hugging Face or Google Drive)",
+                        placeholder="https://huggingface.co/Chouio/Adam/resolve/main/AdamDefinitive.zip",
+                        lines=2
+                    )
+                    voice_name = gr.Textbox(
+                        label="Voice Name (for folder)",
+                        placeholder="my_voice"
+                    )
+                    download_btn = gr.Button("Download Voice", variant="primary")
+                    download_status = gr.Textbox(label="Status", interactive=False)
+                with gr.Column():
+                    gr.Markdown("### 📋 Available Voices")
+                    refresh_btn = gr.Button("Refresh List")
+                    voices_list = gr.Markdown(label="Available Voices", value="No voices downloaded yet.")
+            download_btn.click(
+                fn=download_voice,
+                inputs=[voice_url, voice_name],
+                outputs=[download_status]
+            )
+            refresh_btn.click(
+                fn=list_available_voices,
+                outputs=[voices_list]
+            )
+        with gr.TabItem("🎭 Use Downloaded Voice"):
+            gr.Markdown("## 🎭 Use Downloaded Voice for TTS")
+            gr.Markdown("Select a downloaded voice and use its audio files for reference.")
+            with gr.Row():
+                with gr.Column():
+                    # Voice selector
+                    available_voices = gr.Dropdown(label="Select Voice", choices=[])
+                    refresh_voices_btn = gr.Button("Refresh Voices")
+                    # Audio file selector
+                    voice_audio_files = gr.Dropdown(label="Select Audio File", choices=[])
+                    load_audio_btn = gr.Button("Load Selected Audio")
+                    # Reference text (auto-filled or manual)
+                    ref_text_downloaded = gr.Textbox(
+                        label="Reference Text",
+                        placeholder="Reference text will be auto-filled or you can enter manually",
+                        lines=3
+                    )
+                    # Generation text
+                    gen_text_downloaded = gr.Textbox(
+                        label="Generation Text",
+                        placeholder="Enter text to generate in this voice...",
+                        lines=5
+                    )
+                    remove_silence_downloaded = gr.Checkbox(label="Remove Silence from Output?", value=False)
+                    generate_from_voice_btn = gr.Button("Generate with This Voice", variant="primary")
+                with gr.Column():
+                    loaded_audio = gr.Audio(label="Loaded Reference Audio")
+                    output_audio_downloaded = gr.Audio(label="Generated Speech")
+            # Refresh available voices
+            def refresh_voice_list():
+                base_path = "downloaded_voices"
+                if not os.path.exists(base_path):
+                    return []
+                voices = []
+                for item in os.listdir(base_path):
+                    if os.path.isdir(os.path.join(base_path, item)):
+                        voices.append(item)
+                return voices
+            refresh_voices_btn.click(
+                fn=refresh_voice_list,
+                outputs=[available_voices]
+            )
+            # Update audio files when voice is selected
+            def update_audio_files(voice_name):
+                if not voice_name:
+                    return []
+                base_path = "downloaded_voices"
+                voice_path = os.path.join(base_path, voice_name)
+                if not os.path.exists(voice_path):
+                    return []
+                audio_files = []
+                for file in os.listdir(voice_path):
+                    if file.lower().endswith(('.wav', '.mp3', '.flac', '.ogg')):
+                        audio_files.append(file)
+                return audio_files
+            available_voices.change(
+                fn=update_audio_files,
+                inputs=[available_voices],
+                outputs=[voice_audio_files]
+            )
+            # Load selected audio
+            load_audio_btn.click(
+                fn=load_voice_audio,
+                inputs=[available_voices, voice_audio_files],
+                outputs=[loaded_audio, ref_text_downloaded]  # Note: ref_text_downloaded will need additional handling
+            )
+            # Generate speech using downloaded voice
+            generate_from_voice_btn.click(
+                fn=run_tts,
+                inputs=[loaded_audio, ref_text_downloaded, gen_text_downloaded, remove_silence_downloaded],
+                outputs=[output_audio_downloaded]
+            )
+if __name__ == "__main__":
+    demo.launch()