Spaces:

crackuser
/

voiceclone-dev

Running

App Files Files Community

voiceclone-dev / app.py

crackuser

Update app.py

71d678c verified 7 months ago

raw

history blame

5.65 kB

	import gradio as gr
	import torch
	import torchaudio
	import tempfile
	import os
	import warnings
	from contextlib import contextmanager

	warnings.filterwarnings("ignore")
	os.environ["COQUI_TOS_AGREED"] = "1"
	print("🚀 Starting Voice Cloning Studio...")

	@contextmanager
	def patch_torch_load():
	original_load = torch.load
	def patched_load(f, args, *kwargs):
	kwargs['weights_only'] = False
	return original_load(f, args, *kwargs)
	torch.load = patched_load
	try:
	yield
	finally:
	torch.load = original_load

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	TTS_MODEL = None
	WHISPER_MODEL = None
	MODEL_STATUS = "Not Loaded"

	def load_xtts_manual():
	global TTS_MODEL, MODEL_STATUS
	if TTS_MODEL is not None:
	return True
	try:
	with patch_torch_load():
	from TTS.api import TTS
	print("📦 Loading XTTS...")
	TTS_MODEL = TTS(
	model_name="tts_models/multilingual/multi-dataset/xtts_v2",
	progress_bar=True,
	gpu=(DEVICE == "cuda")
	)
	MODEL_STATUS = "XTTS-v2 Ready"
	print("✅ XTTS loaded!")
	return True
	except Exception as e:
	print(f"❌ XTTS loading failed: {e}")
	MODEL_STATUS = f"Manual Failed: {str(e)}"
	return False

	def load_whisper():
	global WHISPER_MODEL
	if WHISPER_MODEL is not None:
	return True
	try:
	import whisper
	WHISPER_MODEL = whisper.load_model("base")
	print("✅ Whisper loaded!")
	return True
	except Exception as e:
	print(f"❌ Whisper failed: {e}")
	return False

	def voice_to_voice_clone(reference_audio, input_audio, language="en"):
	"""
	Main voice cloning function - this will be called by both UI and API
	"""
	try:
	print(f"🎭 Voice cloning request: {language}")
	print(f"📁 Reference: {reference_audio}")
	print(f"📁 Input: {input_audio}")

	if not reference_audio or not input_audio:
	return None, "❌ Please upload both reference and input audio files!"

	# Load XTTS model
	if not load_xtts_manual():
	return None, f"❌ XTTS loading failed!\nStatus: {MODEL_STATUS}"

	# Load Whisper for transcription
	load_whisper()

	# Extract text from input audio
	extracted_text = "Voice cloning demonstration."
	if WHISPER_MODEL:
	try:
	result = WHISPER_MODEL.transcribe(input_audio)
	text = result.get("text", "").strip()
	if text and len(text) > 3:
	extracted_text = text
	print(f"✅ Extracted: '{extracted_text[:100]}...'")
	except Exception as e:
	print(f"⚠️ Whisper error: {e}")

	# Generate cloned voice
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
	output_path = tmp_file.name

	print(f"🔄 Generating voice clone...")
	with patch_torch_load():
	TTS_MODEL.tts_to_file(
	text=extracted_text,
	speaker_wav=reference_audio,
	language=language,
	file_path=output_path
	)

	# Verify output
	if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
	success_message = f"""✅ VOICE-TO-VOICE CLONING SUCCESS!
	📝 Content: '{extracted_text[:150]}...'
	🎭 Device: {DEVICE}
	🔧 Status: {MODEL_STATUS}
	📊 Output size: {os.path.getsize(output_path)} bytes
	"""
	print("✅ Voice cloning completed successfully!")
	return output_path, success_message
	else:
	return None, "❌ Generated audio file is empty!"

	except Exception as e:
	error_msg = f"❌ Voice cloning error: {str(e)}\nModel: {MODEL_STATUS}"
	print(error_msg)
	return None, error_msg

	# FIXED: Use gr.Interface instead of gr.Blocks for proper API exposure
	interface = gr.Interface(
	fn=voice_to_voice_clone,
	inputs=[
	gr.Audio(
	label="🎤 Reference Audio (Voice to Clone)",
	type="filepath",
	sources=["upload"]
	),
	gr.Audio(
	label="🎵 Input Audio (Content to Transform)",
	type="filepath",
	sources=["upload"]
	),
	gr.Dropdown(
	choices=[
	"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl",
	"cs", "ar", "zh", "ja", "ko", "hi", "uk", "vi", "ro", "el",
	"he", "fi", "hu", "sv", "ca", "id", "ms", "bg", "sk", "da",
	"no", "lt", "hr", "sr", "sl", "et", "lv", "fil", "bn", "ta",
	"te", "ur", "fa", "th"
	],
	value="en",
	label="🌍 Language"
	)
	],
	outputs=[
	gr.Audio(label="🎉 Cloned Voice Result"),
	gr.Textbox(label="📋 Status", lines=8)
	],
	title="🎭 REAL Voice Cloning Studio",
	description="Transform any voice into any other voice using XTTS-v2 and Whisper AI models. Upload reference audio and input audio to get started.",
	theme=gr.themes.Soft(),
	allow_flagging="never",
	api_name="voice_to_voice_clone" # CRITICAL: This creates the API endpoint
	)

	if __name__ == "__main__":
	print("🌐 Launching Voice Cloning Studio...")
	interface.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_api=True, # Shows API documentation
	debug=True
	)