File size: 11,755 Bytes
b744140 c43a317 b744140 c43a317 b744140 9220f67 b744140 cb99a43 b744140 9220f67 b744140 ca5ec77 b744140 ca0d05f b744140 ca0d05f b744140 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 | import spaces
import gradio as gr
from f5_tts.infer.utils_infer import remove_silence_for_generated_wav
from f5_tts.api import F5TTS
import tempfile
import os
import requests
import gdown
import zipfile
from pathlib import Path
# Initialize F5TTS
f5tts = F5TTS()
@spaces.GPU
def run_tts(ref_audio, ref_text, gen_text, remove_silence=False):
output_wav_path = tempfile.mktemp(suffix=".wav")
wav, sr, _ = f5tts.infer(
ref_file=ref_audio,
ref_text=ref_text,
gen_text=gen_text,
file_wave=output_wav_path,
remove_silence=remove_silence,
)
return output_wav_path
def download_voice(voice_url, voice_name, progress=gr.Progress()):
"""Download and setup a voice from URL"""
if not voice_url or not voice_name:
return "Please provide both URL and voice name."
base_path = "downloaded_voices"
os.makedirs(base_path, exist_ok=True)
# Determine download type
is_huggingface = "huggingface.co" in voice_url
is_google_drive = "drive.google.com" in voice_url
if not (is_huggingface or is_google_drive):
return "Unsupported URL. Only Hugging Face and Google Drive links are supported."
# Create voice directory
voice_dir = os.path.join(base_path, voice_name)
os.makedirs(voice_dir, exist_ok=True)
# Download file
zip_path = os.path.join(voice_dir, f"{voice_name}.zip")
try:
if is_huggingface:
progress(0, desc="Downloading from Hugging Face...")
response = requests.get(voice_url, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
with open(zip_path, 'wb') as f:
downloaded = 0
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
if total_size > 0:
progress(downloaded / total_size, desc=f"Downloading: {downloaded//1024}KB/{total_size//1024}KB")
elif is_google_drive:
progress(0, desc="Downloading from Google Drive...")
gdown.download(url=voice_url, output=zip_path, quiet=False, fuzzy=True)
# Extract ZIP file
progress(0.8, desc="Extracting files...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(voice_dir)
# Remove ZIP file after extraction
if os.path.exists(zip_path):
os.remove(zip_path)
# Check if the voice was properly extracted
if not os.path.exists(voice_dir) or len(os.listdir(voice_dir)) == 0:
return "Voice directory is empty after extraction. Download may have failed."
# List downloaded files
files = os.listdir(voice_dir)
file_list = "\n".join([f" - {file}" for file in files])
return f"β
Voice '{voice_name}' successfully downloaded!\nπ Location: {voice_dir}\nπ Files:\n{file_list}"
except Exception as e:
# Clean up on error
if os.path.exists(voice_dir):
try:
if os.path.exists(zip_path):
os.remove(zip_path)
# Don't remove the whole directory as it might contain other files
except:
pass
return f"β Error downloading voice: {str(e)}"
def list_available_voices():
"""List available downloaded voices"""
base_path = "downloaded_voices"
if not os.path.exists(base_path):
return "No voices downloaded yet."
voices = []
for item in os.listdir(base_path):
item_path = os.path.join(base_path, item)
if os.path.isdir(item_path):
files = os.listdir(item_path)
voices.append(f"π€ **{item}**\nπ Path: {item_path}\nπ Files: {', '.join(files)}\n")
if not voices:
return "No voices found in the downloaded_voices directory."
return "\n".join(voices)
def load_voice_audio(voice_name, audio_file):
"""Load audio from downloaded voice"""
base_path = "downloaded_voices"
voice_path = os.path.join(base_path, voice_name)
if not os.path.exists(voice_path):
return None, f"Voice '{voice_name}' not found."
audio_path = os.path.join(voice_path, audio_file)
if not os.path.exists(audio_path):
return None, f"Audio file '{audio_file}' not found in voice '{voice_name}' directory."
return audio_path, f"β
Loaded audio: {audio_file} from voice '{voice_name}'"
# Create Gradio interface with tabs
with gr.Blocks(title="π£οΈ F5-TTS Demo with Voice Download") as demo:
gr.Markdown("# π£οΈ F5-TTS Demo with Voice Management")
gr.Markdown("Upload a reference voice, give reference and generation text, and hear it in the same voice! Plus, download pre-made voices from Hugging Face or Google Drive.")
with gr.Tabs():
with gr.TabItem("π Generate Speech"):
with gr.Row():
with gr.Column():
ref_audio = gr.Audio(label="Reference Audio", type="filepath")
ref_text = gr.Textbox(
label="Reference Text",
placeholder="some call me nature, others call me mother nature.",
lines=3
)
gen_text = gr.Textbox(
label="Generation Text",
placeholder="I don't really care what you call me...",
lines=5
)
remove_silence = gr.Checkbox(label="Remove Silence from Output?", value=False)
generate_btn = gr.Button("Generate Speech", variant="primary")
with gr.Column():
output_audio = gr.Audio(label="Generated Speech")
spectrogram = gr.Image(label="Spectrogram (if available)")
generate_btn.click(
fn=run_tts,
inputs=[ref_audio, ref_text, gen_text, remove_silence],
outputs=[output_audio]
)
with gr.TabItem("π₯ Download Voices"):
gr.Markdown("## π₯ Download Pre-made Voices")
gr.Markdown("Download voices from Hugging Face or Google Drive. The voice should be in ZIP format containing audio files and metadata.")
with gr.Row():
with gr.Column():
voice_url = gr.Textbox(
label="Voice URL (Hugging Face or Google Drive)",
placeholder="https://huggingface.co/Chouio/Adam/resolve/main/AdamDefinitive.zip",
lines=2
)
voice_name = gr.Textbox(
label="Voice Name (for folder)",
placeholder="my_voice"
)
download_btn = gr.Button("Download Voice", variant="primary")
download_status = gr.Textbox(label="Status", interactive=False)
with gr.Column():
gr.Markdown("### π Available Voices")
refresh_btn = gr.Button("Refresh List")
voices_list = gr.Markdown(label="Available Voices", value="No voices downloaded yet.")
download_btn.click(
fn=download_voice,
inputs=[voice_url, voice_name],
outputs=[download_status]
)
refresh_btn.click(
fn=list_available_voices,
outputs=[voices_list]
)
with gr.TabItem("π Use Downloaded Voice"):
gr.Markdown("## π Use Downloaded Voice for TTS")
gr.Markdown("Select a downloaded voice and use its audio files for reference.")
with gr.Row():
with gr.Column():
# Voice selector
available_voices = gr.Dropdown(label="Select Voice", choices=[])
refresh_voices_btn = gr.Button("Refresh Voices")
# Audio file selector
voice_audio_files = gr.Dropdown(label="Select Audio File", choices=[])
load_audio_btn = gr.Button("Load Selected Audio")
# Reference text (auto-filled or manual)
ref_text_downloaded = gr.Textbox(
label="Reference Text",
placeholder="Reference text will be auto-filled or you can enter manually",
lines=3
)
# Generation text
gen_text_downloaded = gr.Textbox(
label="Generation Text",
placeholder="Enter text to generate in this voice...",
lines=5
)
remove_silence_downloaded = gr.Checkbox(label="Remove Silence from Output?", value=False)
generate_from_voice_btn = gr.Button("Generate with This Voice", variant="primary")
with gr.Column():
loaded_audio = gr.Audio(label="Loaded Reference Audio")
output_audio_downloaded = gr.Audio(label="Generated Speech")
# Refresh available voices
def refresh_voice_list():
base_path = "downloaded_voices"
if not os.path.exists(base_path):
return []
voices = []
for item in os.listdir(base_path):
if os.path.isdir(os.path.join(base_path, item)):
voices.append(item)
return voices
refresh_voices_btn.click(
fn=refresh_voice_list,
outputs=[available_voices]
)
# Update audio files when voice is selected
def update_audio_files(voice_name):
if not voice_name:
return []
base_path = "downloaded_voices"
voice_path = os.path.join(base_path, voice_name)
if not os.path.exists(voice_path):
return []
audio_files = []
for file in os.listdir(voice_path):
if file.lower().endswith(('.wav', '.mp3', '.flac', '.ogg')):
audio_files.append(file)
return audio_files
available_voices.change(
fn=update_audio_files,
inputs=[available_voices],
outputs=[voice_audio_files]
)
# Load selected audio
load_audio_btn.click(
fn=load_voice_audio,
inputs=[available_voices, voice_audio_files],
outputs=[loaded_audio, ref_text_downloaded] # Note: ref_text_downloaded will need additional handling
)
# Generate speech using downloaded voice
generate_from_voice_btn.click(
fn=run_tts,
inputs=[loaded_audio, ref_text_downloaded, gen_text_downloaded, remove_silence_downloaded],
outputs=[output_audio_downloaded]
)
if __name__ == "__main__":
demo.launch() |