Spaces:

crackuser
/

voiceclone-dev

Running

App Files Files Community

voiceclone-dev / app.py

crackuser

Update app.py

986aa2a verified 8 months ago

raw

history blame

6.49 kB

	import gradio as gr
	import torch
	from TTS.api import TTS
	import os
	import tempfile
	import soundfile as sf

	# Set environment variable for Coqui TOS
	os.environ["COQUI_TOS_AGREED"] = "1"

	# Initialize device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	# Initialize TTS model
	try:
	tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
	print("✅ XTTS v2 model loaded successfully!")
	except Exception as e:
	print(f"❌ Error loading model: {e}")
	tts = None

	def clone_voice(text, reference_audio):
	"""
	Clone voice using XTTS v2 model
	"""
	if not text or not text.strip():
	return None, "❌ Please enter some text to convert!"

	if not reference_audio:
	return None, "❌ Please upload a reference audio file!"

	if tts is None:
	return None, "❌ TTS model not loaded properly!"

	try:
	# Validate text length
	if len(text) > 500:
	return None, "❌ Text too long! Please keep it under 500 characters."

	# Create temporary output file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
	output_path = tmp_file.name

	# Generate cloned voice
	print(f"🎤 Cloning voice for text: {text[:50]}...")
	tts.tts_to_file(
	text=text,
	speaker_wav=reference_audio,
	language="en",
	file_path=output_path
	)

	# Verify output file exists and has content
	if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
	return output_path, f"✅ Voice cloning successful!\n🎵 Generated audio for: '{text[:100]}{'...' if len(text) > 100 else ''}'"
	else:
	return None, "❌ Failed to generate audio file!"

	except Exception as e:
	error_msg = str(e)
	print(f"❌ Voice cloning error: {error_msg}")

	if "CUDA" in error_msg:
	return None, "❌ GPU memory error! Try with shorter text or restart the space."
	elif "audio" in error_msg.lower():
	return None, "❌ Audio processing error! Please upload a clear WAV or MP3 file."
	else:
	return None, f"❌ Error: {error_msg}"

	# Create Gradio interface
	def create_interface():
	with gr.Blocks(
	title="🎭 Voice Cloning Studio",
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green")
	) as demo:

	# Header
	gr.HTML("""
	<div style="text-align: center; padding: 20px;">
	<h1 style="color: #2E86AB; margin-bottom: 10px;">🎭 AI Voice Cloning Studio</h1>
	<p style="color: #666; font-size: 18px;">Clone any voice with advanced AI technology</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# Input section
	gr.HTML("<h3 style='color: #2E86AB;'>📤 Upload Reference Voice</h3>")
	reference_audio = gr.Audio(
	label="Reference Audio (10+ seconds recommended)",
	type="filepath",
	sources=["upload"]
	)

	gr.HTML("<h3 style='color: #2E86AB;'>📝 Enter Text to Clone</h3>")
	text_input = gr.Textbox(
	label="Text to Convert",
	placeholder="Enter the text you want to speak in the cloned voice...",
	lines=4,
	max_lines=6
	)

	clone_button = gr.Button(
	"🎤 Clone Voice",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	# Output section
	gr.HTML("<h3 style='color: #2E86AB;'>🎵 Cloned Voice Output</h3>")
	audio_output = gr.Audio(
	label="Generated Audio",
	type="filepath"
	)

	status_output = gr.Textbox(
	label="Status",
	lines=3,
	interactive=False
	)

	# Examples section
	gr.HTML("<h3 style='color: #2E86AB;'>💡 Example Texts</h3>")
	examples = [
	"Hello, this is a demonstration of AI voice cloning technology.",
	"Welcome to the future of artificial intelligence and speech synthesis.",
	"This voice was generated using advanced machine learning models.",
	"Experience the power of AI-driven voice generation with natural speech patterns."
	]

	gr.Examples(
	examples=examples,
	inputs=text_input,
	label="Click to try these examples:"
	)

	# How it works
	with gr.Accordion("🔍 How It Works", open=False):
	gr.Markdown("""
	### The Technology
	1. 🎤 Voice Upload: Upload 10+ seconds of clear speech
	2. 🧠 AI Analysis: XTTS v2 model analyzes voice characteristics
	3. 📝 Text Input: Enter the text you want to convert
	4. 🎵 Voice Synthesis: Generate speech that matches the uploaded voice

	### Tips for Best Results
	- Use high-quality, clear audio recordings
	- Ensure 10+ seconds of continuous speech
	- Avoid background noise and music
	- Single speaker only in reference audio

	### Supported Languages
	- English (primary)
	- Spanish, French, German, Italian, Portuguese
	- Chinese, Japanese, Korean
	""")

	# Event handlers
	clone_button.click(
	fn=clone_voice,
	inputs=[text_input, reference_audio],
	outputs=[audio_output, status_output],
	show_progress=True
	)

	# Auto-generate on Enter
	text_input.submit(
	fn=clone_voice,
	inputs=[text_input, reference_audio],
	outputs=[audio_output, status_output],
	show_progress=True
	)

	return demo

	# Launch the app
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)