Chouio commited on
Commit
b744140
·
verified ·
1 Parent(s): 43f167f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +279 -17
app.py CHANGED
@@ -1,25 +1,287 @@
 
 
 
 
 
 
1
  import requests
 
2
  import zipfile
3
- import os
 
 
 
4
 
5
- def descargar_modelo_rvc(url, destino="ckpts"):
6
- os.makedirs(destino, exist_ok=True)
7
- zip_path = os.path.join(destino, "modelo_rvc.zip")
 
 
 
 
 
 
 
 
8
 
9
- # Descargar ZIP
10
- with requests.get(url, stream=True) as r:
11
- with open(zip_path, "wb") as f:
12
- f.write(r.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Extraer ZIP
15
- with zipfile.ZipFile(zip_path, "r") as zip_ref:
16
- zip_ref.extractall(destino)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Detectar carpeta extraída
19
- contenido = os.listdir(destino)
20
- carpetas = [c for c in contenido if os.path.isdir(os.path.join(destino, c))]
 
 
 
 
 
 
 
 
 
 
21
 
22
- if not carpetas:
23
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- return os.path.join(destino, carpetas[0])
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from f5_tts.infer.utils_infer import remove_silence_for_generated_wav
4
+ from f5_tts.api import F5TTS
5
+ import tempfile
6
+ import os
7
  import requests
8
+ import gdown
9
  import zipfile
10
+ from pathlib import Path
11
+
12
+ # Initialize F5TTS
13
+ f5tts = F5TTS()
14
 
15
+ @spaces.GPU
16
+ def run_tts(ref_audio, ref_text, gen_text, remove_silence=False):
17
+ output_wav_path = tempfile.mktemp(suffix=".wav")
18
+ wav, sr, _ = f5tts.infer(
19
+ ref_file=ref_audio,
20
+ ref_text=ref_text,
21
+ gen_text=gen_text,
22
+ file_wave=output_wav_path,
23
+ remove_silence=remove_silence,
24
+ )
25
+ return output_wav_path
26
 
27
+ def download_voice(voice_url, voice_name, progress=gr.Progress()):
28
+ """Download and setup a voice from URL"""
29
+ if not voice_url or not voice_name:
30
+ return "Please provide both URL and voice name."
31
+
32
+ base_path = "downloaded_voices"
33
+ os.makedirs(base_path, exist_ok=True)
34
+
35
+ # Determine download type
36
+ is_huggingface = "huggingface.co" in voice_url
37
+ is_google_drive = "drive.google.com" in voice_url
38
+
39
+ if not (is_huggingface or is_google_drive):
40
+ return "Unsupported URL. Only Hugging Face and Google Drive links are supported."
41
+
42
+ # Create voice directory
43
+ voice_dir = os.path.join(base_path, voice_name)
44
+ os.makedirs(voice_dir, exist_ok=True)
45
+
46
+ # Download file
47
+ zip_path = os.path.join(voice_dir, f"{voice_name}.zip")
48
+
49
+ try:
50
+ if is_huggingface:
51
+ progress(0, desc="Downloading from Hugging Face...")
52
+ response = requests.get(voice_url, stream=True)
53
+ response.raise_for_status()
54
+ total_size = int(response.headers.get('content-length', 0))
55
+
56
+ with open(zip_path, 'wb') as f:
57
+ downloaded = 0
58
+ for chunk in response.iter_content(chunk_size=8192):
59
+ if chunk:
60
+ f.write(chunk)
61
+ downloaded += len(chunk)
62
+ if total_size > 0:
63
+ progress(downloaded / total_size, desc=f"Downloading: {downloaded//1024}KB/{total_size//1024}KB")
64
+ elif is_google_drive:
65
+ progress(0, desc="Downloading from Google Drive...")
66
+ gdown.download(url=voice_url, output=zip_path, quiet=False, fuzzy=True)
67
+
68
+ # Extract ZIP file
69
+ progress(0.8, desc="Extracting files...")
70
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
71
+ zip_ref.extractall(voice_dir)
72
+
73
+ # Remove ZIP file after extraction
74
+ if os.path.exists(zip_path):
75
+ os.remove(zip_path)
76
+
77
+ # Check if the voice was properly extracted
78
+ if not os.path.exists(voice_dir) or len(os.listdir(voice_dir)) == 0:
79
+ return "Voice directory is empty after extraction. Download may have failed."
80
+
81
+ # List downloaded files
82
+ files = os.listdir(voice_dir)
83
+ file_list = "\n".join([f" - {file}" for file in files])
84
+
85
+ return f"✅ Voice '{voice_name}' successfully downloaded!\n📁 Location: {voice_dir}\n📋 Files:\n{file_list}"
86
+
87
+ except Exception as e:
88
+ # Clean up on error
89
+ if os.path.exists(voice_dir):
90
+ try:
91
+ if os.path.exists(zip_path):
92
+ os.remove(zip_path)
93
+ # Don't remove the whole directory as it might contain other files
94
+ except:
95
+ pass
96
+ return f"❌ Error downloading voice: {str(e)}"
97
 
98
+ def list_available_voices():
99
+ """List available downloaded voices"""
100
+ base_path = "downloaded_voices"
101
+ if not os.path.exists(base_path):
102
+ return "No voices downloaded yet."
103
+
104
+ voices = []
105
+ for item in os.listdir(base_path):
106
+ item_path = os.path.join(base_path, item)
107
+ if os.path.isdir(item_path):
108
+ files = os.listdir(item_path)
109
+ voices.append(f"🎤 **{item}**\n📍 Path: {item_path}\n📋 Files: {', '.join(files)}\n")
110
+
111
+ if not voices:
112
+ return "No voices found in the downloaded_voices directory."
113
+
114
+ return "\n".join(voices)
115
 
116
+ def load_voice_audio(voice_name, audio_file):
117
+ """Load audio from downloaded voice"""
118
+ base_path = "downloaded_voices"
119
+ voice_path = os.path.join(base_path, voice_name)
120
+
121
+ if not os.path.exists(voice_path):
122
+ return None, f"Voice '{voice_name}' not found."
123
+
124
+ audio_path = os.path.join(voice_path, audio_file)
125
+ if not os.path.exists(audio_path):
126
+ return None, f"Audio file '{audio_file}' not found in voice '{voice_name}' directory."
127
+
128
+ return audio_path, f"✅ Loaded audio: {audio_file} from voice '{voice_name}'"
129
 
130
+ # Create Gradio interface with tabs
131
+ with gr.Blocks(title="🗣️ F5-TTS Demo with Voice Download") as demo:
132
+ gr.Markdown("# 🗣️ F5-TTS Demo with Voice Management")
133
+ gr.Markdown("Upload a reference voice, give reference and generation text, and hear it in the same voice! Plus, download pre-made voices from Hugging Face or Google Drive.")
134
+
135
+ with gr.Tabs():
136
+ with gr.TabItem("🔊 Generate Speech"):
137
+ with gr.Row():
138
+ with gr.Column():
139
+ ref_audio = gr.Audio(label="Reference Audio", type="filepath")
140
+ ref_text = gr.Textbox(
141
+ label="Reference Text",
142
+ placeholder="some call me nature, others call me mother nature.",
143
+ lines=3
144
+ )
145
+ gen_text = gr.Textbox(
146
+ label="Generation Text",
147
+ placeholder="I don't really care what you call me...",
148
+ lines=5
149
+ )
150
+ remove_silence = gr.Checkbox(label="Remove Silence from Output?", value=False)
151
+ generate_btn = gr.Button("Generate Speech", variant="primary")
152
+
153
+ with gr.Column():
154
+ output_audio = gr.Audio(label="Generated Speech")
155
+ spectrogram = gr.Image(label="Spectrogram (if available)")
156
+
157
+ generate_btn.click(
158
+ fn=run_tts,
159
+ inputs=[ref_audio, ref_text, gen_text, remove_silence],
160
+ outputs=[output_audio]
161
+ )
162
+
163
+ with gr.TabItem("📥 Download Voices"):
164
+ gr.Markdown("## 📥 Download Pre-made Voices")
165
+ gr.Markdown("Download voices from Hugging Face or Google Drive. The voice should be in ZIP format containing audio files and metadata.")
166
+
167
+ with gr.Row():
168
+ with gr.Column():
169
+ voice_url = gr.Textbox(
170
+ label="Voice URL (Hugging Face or Google Drive)",
171
+ placeholder="https://huggingface.co/Chouio/Adam/resolve/main/AdamDefinitive.zip",
172
+ lines=2
173
+ )
174
+ voice_name = gr.Textbox(
175
+ label="Voice Name (for folder)",
176
+ placeholder="my_voice"
177
+ )
178
+ download_btn = gr.Button("Download Voice", variant="primary")
179
+ download_status = gr.Textbox(label="Status", interactive=False)
180
+
181
+ with gr.Column():
182
+ gr.Markdown("### 📋 Available Voices")
183
+ refresh_btn = gr.Button("Refresh List")
184
+ voices_list = gr.Markdown(label="Available Voices", value="No voices downloaded yet.")
185
+
186
+ download_btn.click(
187
+ fn=download_voice,
188
+ inputs=[voice_url, voice_name],
189
+ outputs=[download_status]
190
+ )
191
+
192
+ refresh_btn.click(
193
+ fn=list_available_voices,
194
+ outputs=[voices_list]
195
+ )
196
+
197
+ with gr.TabItem("🎭 Use Downloaded Voice"):
198
+ gr.Markdown("## 🎭 Use Downloaded Voice for TTS")
199
+ gr.Markdown("Select a downloaded voice and use its audio files for reference.")
200
+
201
+ with gr.Row():
202
+ with gr.Column():
203
+ # Voice selector
204
+ available_voices = gr.Dropdown(label="Select Voice", choices=[])
205
+ refresh_voices_btn = gr.Button("Refresh Voices")
206
+
207
+ # Audio file selector
208
+ voice_audio_files = gr.Dropdown(label="Select Audio File", choices=[])
209
+ load_audio_btn = gr.Button("Load Selected Audio")
210
+
211
+ # Reference text (auto-filled or manual)
212
+ ref_text_downloaded = gr.Textbox(
213
+ label="Reference Text",
214
+ placeholder="Reference text will be auto-filled or you can enter manually",
215
+ lines=3
216
+ )
217
+
218
+ # Generation text
219
+ gen_text_downloaded = gr.Textbox(
220
+ label="Generation Text",
221
+ placeholder="Enter text to generate in this voice...",
222
+ lines=5
223
+ )
224
+
225
+ remove_silence_downloaded = gr.Checkbox(label="Remove Silence from Output?", value=False)
226
+ generate_from_voice_btn = gr.Button("Generate with This Voice", variant="primary")
227
+
228
+ with gr.Column():
229
+ loaded_audio = gr.Audio(label="Loaded Reference Audio")
230
+ output_audio_downloaded = gr.Audio(label="Generated Speech")
231
+
232
+ # Refresh available voices
233
+ def refresh_voice_list():
234
+ base_path = "downloaded_voices"
235
+ if not os.path.exists(base_path):
236
+ return []
237
+
238
+ voices = []
239
+ for item in os.listdir(base_path):
240
+ if os.path.isdir(os.path.join(base_path, item)):
241
+ voices.append(item)
242
+ return voices
243
+
244
+ refresh_voices_btn.click(
245
+ fn=refresh_voice_list,
246
+ outputs=[available_voices]
247
+ )
248
+
249
+ # Update audio files when voice is selected
250
+ def update_audio_files(voice_name):
251
+ if not voice_name:
252
+ return []
253
+
254
+ base_path = "downloaded_voices"
255
+ voice_path = os.path.join(base_path, voice_name)
256
+
257
+ if not os.path.exists(voice_path):
258
+ return []
259
+
260
+ audio_files = []
261
+ for file in os.listdir(voice_path):
262
+ if file.lower().endswith(('.wav', '.mp3', '.flac', '.ogg')):
263
+ audio_files.append(file)
264
+ return audio_files
265
+
266
+ available_voices.change(
267
+ fn=update_audio_files,
268
+ inputs=[available_voices],
269
+ outputs=[voice_audio_files]
270
+ )
271
+
272
+ # Load selected audio
273
+ load_audio_btn.click(
274
+ fn=load_voice_audio,
275
+ inputs=[available_voices, voice_audio_files],
276
+ outputs=[loaded_audio, ref_text_downloaded] # Note: ref_text_downloaded will need additional handling
277
+ )
278
+
279
+ # Generate speech using downloaded voice
280
+ generate_from_voice_btn.click(
281
+ fn=run_tts,
282
+ inputs=[loaded_audio, ref_text_downloaded, gen_text_downloaded, remove_silence_downloaded],
283
+ outputs=[output_audio_downloaded]
284
+ )
285
 
286
+ if __name__ == "__main__":
287
+ demo.launch()