E2-F5-TTS

Runtime error

App Files Files Community

E2-F5-TTS / app.py

Chouio

Update app.py

b744140 verified 2 months ago

raw

history blame contribute delete

11.8 kB

	import spaces
	import gradio as gr
	from f5_tts.infer.utils_infer import remove_silence_for_generated_wav
	from f5_tts.api import F5TTS
	import tempfile
	import os
	import requests
	import gdown
	import zipfile
	from pathlib import Path

	# Initialize F5TTS
	f5tts = F5TTS()

	@spaces.GPU
	def run_tts(ref_audio, ref_text, gen_text, remove_silence=False):
	output_wav_path = tempfile.mktemp(suffix=".wav")
	wav, sr, _ = f5tts.infer(
	ref_file=ref_audio,
	ref_text=ref_text,
	gen_text=gen_text,
	file_wave=output_wav_path,
	remove_silence=remove_silence,
	)
	return output_wav_path

	def download_voice(voice_url, voice_name, progress=gr.Progress()):
	"""Download and setup a voice from URL"""
	if not voice_url or not voice_name:
	return "Please provide both URL and voice name."

	base_path = "downloaded_voices"
	os.makedirs(base_path, exist_ok=True)

	# Determine download type
	is_huggingface = "huggingface.co" in voice_url
	is_google_drive = "drive.google.com" in voice_url

	if not (is_huggingface or is_google_drive):
	return "Unsupported URL. Only Hugging Face and Google Drive links are supported."

	# Create voice directory
	voice_dir = os.path.join(base_path, voice_name)
	os.makedirs(voice_dir, exist_ok=True)

	# Download file
	zip_path = os.path.join(voice_dir, f"{voice_name}.zip")

	try:
	if is_huggingface:
	progress(0, desc="Downloading from Hugging Face...")
	response = requests.get(voice_url, stream=True)
	response.raise_for_status()
	total_size = int(response.headers.get('content-length', 0))

	with open(zip_path, 'wb') as f:
	downloaded = 0
	for chunk in response.iter_content(chunk_size=8192):
	if chunk:
	f.write(chunk)
	downloaded += len(chunk)
	if total_size > 0:
	progress(downloaded / total_size, desc=f"Downloading: {downloaded//1024}KB/{total_size//1024}KB")
	elif is_google_drive:
	progress(0, desc="Downloading from Google Drive...")
	gdown.download(url=voice_url, output=zip_path, quiet=False, fuzzy=True)

	# Extract ZIP file
	progress(0.8, desc="Extracting files...")
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(voice_dir)

	# Remove ZIP file after extraction
	if os.path.exists(zip_path):
	os.remove(zip_path)

	# Check if the voice was properly extracted
	if not os.path.exists(voice_dir) or len(os.listdir(voice_dir)) == 0:
	return "Voice directory is empty after extraction. Download may have failed."

	# List downloaded files
	files = os.listdir(voice_dir)
	file_list = "\n".join([f" - {file}" for file in files])

	return f"✅ Voice '{voice_name}' successfully downloaded!\n📁 Location: {voice_dir}\n📋 Files:\n{file_list}"

	except Exception as e:
	# Clean up on error
	if os.path.exists(voice_dir):
	try:
	if os.path.exists(zip_path):
	os.remove(zip_path)
	# Don't remove the whole directory as it might contain other files
	except:
	pass
	return f"❌ Error downloading voice: {str(e)}"

	def list_available_voices():
	"""List available downloaded voices"""
	base_path = "downloaded_voices"
	if not os.path.exists(base_path):
	return "No voices downloaded yet."

	voices = []
	for item in os.listdir(base_path):
	item_path = os.path.join(base_path, item)
	if os.path.isdir(item_path):
	files = os.listdir(item_path)
	voices.append(f"🎤 {item}\n📍 Path: {item_path}\n📋 Files: {', '.join(files)}\n")

	if not voices:
	return "No voices found in the downloaded_voices directory."

	return "\n".join(voices)

	def load_voice_audio(voice_name, audio_file):
	"""Load audio from downloaded voice"""
	base_path = "downloaded_voices"
	voice_path = os.path.join(base_path, voice_name)

	if not os.path.exists(voice_path):
	return None, f"Voice '{voice_name}' not found."

	audio_path = os.path.join(voice_path, audio_file)
	if not os.path.exists(audio_path):
	return None, f"Audio file '{audio_file}' not found in voice '{voice_name}' directory."

	return audio_path, f"✅ Loaded audio: {audio_file} from voice '{voice_name}'"

	# Create Gradio interface with tabs
	with gr.Blocks(title="🗣️ F5-TTS Demo with Voice Download") as demo:
	gr.Markdown("# 🗣️ F5-TTS Demo with Voice Management")
	gr.Markdown("Upload a reference voice, give reference and generation text, and hear it in the same voice! Plus, download pre-made voices from Hugging Face or Google Drive.")

	with gr.Tabs():
	with gr.TabItem("🔊 Generate Speech"):
	with gr.Row():
	with gr.Column():
	ref_audio = gr.Audio(label="Reference Audio", type="filepath")
	ref_text = gr.Textbox(
	label="Reference Text",
	placeholder="some call me nature, others call me mother nature.",
	lines=3
	)
	gen_text = gr.Textbox(
	label="Generation Text",
	placeholder="I don't really care what you call me...",
	lines=5
	)
	remove_silence = gr.Checkbox(label="Remove Silence from Output?", value=False)
	generate_btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column():
	output_audio = gr.Audio(label="Generated Speech")
	spectrogram = gr.Image(label="Spectrogram (if available)")

	generate_btn.click(
	fn=run_tts,
	inputs=[ref_audio, ref_text, gen_text, remove_silence],
	outputs=[output_audio]
	)

	with gr.TabItem("📥 Download Voices"):
	gr.Markdown("## 📥 Download Pre-made Voices")
	gr.Markdown("Download voices from Hugging Face or Google Drive. The voice should be in ZIP format containing audio files and metadata.")

	with gr.Row():
	with gr.Column():
	voice_url = gr.Textbox(
	label="Voice URL (Hugging Face or Google Drive)",
	placeholder="https://huggingface.co/Chouio/Adam/resolve/main/AdamDefinitive.zip",
	lines=2
	)
	voice_name = gr.Textbox(
	label="Voice Name (for folder)",
	placeholder="my_voice"
	)
	download_btn = gr.Button("Download Voice", variant="primary")
	download_status = gr.Textbox(label="Status", interactive=False)

	with gr.Column():
	gr.Markdown("### 📋 Available Voices")
	refresh_btn = gr.Button("Refresh List")
	voices_list = gr.Markdown(label="Available Voices", value="No voices downloaded yet.")

	download_btn.click(
	fn=download_voice,
	inputs=[voice_url, voice_name],
	outputs=[download_status]
	)

	refresh_btn.click(
	fn=list_available_voices,
	outputs=[voices_list]
	)

	with gr.TabItem("🎭 Use Downloaded Voice"):
	gr.Markdown("## 🎭 Use Downloaded Voice for TTS")
	gr.Markdown("Select a downloaded voice and use its audio files for reference.")

	with gr.Row():
	with gr.Column():
	# Voice selector
	available_voices = gr.Dropdown(label="Select Voice", choices=[])
	refresh_voices_btn = gr.Button("Refresh Voices")

	# Audio file selector
	voice_audio_files = gr.Dropdown(label="Select Audio File", choices=[])
	load_audio_btn = gr.Button("Load Selected Audio")

	# Reference text (auto-filled or manual)
	ref_text_downloaded = gr.Textbox(
	label="Reference Text",
	placeholder="Reference text will be auto-filled or you can enter manually",
	lines=3
	)

	# Generation text
	gen_text_downloaded = gr.Textbox(
	label="Generation Text",
	placeholder="Enter text to generate in this voice...",
	lines=5
	)

	remove_silence_downloaded = gr.Checkbox(label="Remove Silence from Output?", value=False)
	generate_from_voice_btn = gr.Button("Generate with This Voice", variant="primary")

	with gr.Column():
	loaded_audio = gr.Audio(label="Loaded Reference Audio")
	output_audio_downloaded = gr.Audio(label="Generated Speech")

	# Refresh available voices
	def refresh_voice_list():
	base_path = "downloaded_voices"
	if not os.path.exists(base_path):
	return []

	voices = []
	for item in os.listdir(base_path):
	if os.path.isdir(os.path.join(base_path, item)):
	voices.append(item)
	return voices

	refresh_voices_btn.click(
	fn=refresh_voice_list,
	outputs=[available_voices]
	)

	# Update audio files when voice is selected
	def update_audio_files(voice_name):
	if not voice_name:
	return []

	base_path = "downloaded_voices"
	voice_path = os.path.join(base_path, voice_name)

	if not os.path.exists(voice_path):
	return []

	audio_files = []
	for file in os.listdir(voice_path):
	if file.lower().endswith(('.wav', '.mp3', '.flac', '.ogg')):
	audio_files.append(file)
	return audio_files

	available_voices.change(
	fn=update_audio_files,
	inputs=[available_voices],
	outputs=[voice_audio_files]
	)

	# Load selected audio
	load_audio_btn.click(
	fn=load_voice_audio,
	inputs=[available_voices, voice_audio_files],
	outputs=[loaded_audio, ref_text_downloaded] # Note: ref_text_downloaded will need additional handling
	)

	# Generate speech using downloaded voice
	generate_from_voice_btn.click(
	fn=run_tts,
	inputs=[loaded_audio, ref_text_downloaded, gen_text_downloaded, remove_silence_downloaded],
	outputs=[output_audio_downloaded]
	)

	if __name__ == "__main__":
	demo.launch()