Spaces:

crackuser
/

voiceclone-dev

Running

App Files Files Community

voiceclone-dev / app.py

crackuser

Update app.py

75fb8ef verified 8 months ago

raw

history blame

4.84 kB

	import gradio as gr
	import torch
	import torchaudio
	import tempfile
	import os
	import warnings
	from contextlib import contextmanager

	warnings.filterwarnings("ignore")
	os.environ["COQUI_TOS_AGREED"] = "1"

	print("🚀 Starting Voice Cloning Studio...")

	@contextmanager
	def patch_torch_load():
	original_load = torch.load
	def patched_load(f, args, *kwargs):
	kwargs['weights_only'] = False
	return original_load(f, args, *kwargs)
	torch.load = patched_load
	try:
	yield
	finally:
	torch.load = original_load

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	TTS_MODEL = None
	WHISPER_MODEL = None
	MODEL_STATUS = "Not Loaded"

	def load_xtts_manual():
	global TTS_MODEL, MODEL_STATUS
	if TTS_MODEL is not None:
	return True
	try:
	with patch_torch_load():
	from TTS.api import TTS
	print("📦 Loading XTTS...")
	TTS_MODEL = TTS(
	model_name="tts_models/multilingual/multi-dataset/xtts_v2",
	progress_bar=True,
	gpu=(DEVICE == "cuda")
	)
	MODEL_STATUS = "XTTS-v2 Ready"
	print("✅ XTTS loaded!")
	return True
	except Exception as e:
	print(f"❌ XTTS loading failed: {e}")
	MODEL_STATUS = f"Manual Failed: {str(e)}"
	return False

	def load_whisper():
	global WHISPER_MODEL
	if WHISPER_MODEL is not None:
	return True
	try:
	import whisper
	WHISPER_MODEL = whisper.load_model("base")
	print("✅ Whisper loaded!")
	return True
	except Exception as e:
	print(f"❌ Whisper failed: {e}")
	return False

	def voice_to_voice_clone(reference_audio, input_audio, language="en"):
	try:
	if not reference_audio or not input_audio:
	return None, "❌ Please upload both reference and input audio files!"
	if not load_xtts_manual():
	return None, f"❌ XTTS loading failed!\nStatus: {MODEL_STATUS}"
	load_whisper()
	extracted_text = "Voice cloning demonstration."
	if WHISPER_MODEL:
	try:
	result = WHISPER_MODEL.transcribe(input_audio)
	text = result.get("text", "").strip()
	if text and len(text) > 3:
	extracted_text = text
	print(f"✅ Extracted: '{extracted_text[:100]}...'")
	except Exception as e:
	print(f"⚠️ Whisper error: {e}")
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
	output_path = tmp_file.name
	with patch_torch_load():
	TTS_MODEL.tts_to_file(
	text=extracted_text,
	speaker_wav=reference_audio,
	language=language,
	file_path=output_path
	)
	if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
	return output_path, f"""✅ VOICE-TO-VOICE CLONING SUCCESS!

	📝 Content: '{extracted_text[:150]}...'
	🎭 Device: {DEVICE}
	🔧 Status: {MODEL_STATUS}
	"""
	else:
	return None, "❌ Generated audio file is empty!"
	except Exception as e:
	return None, f"❌ Voice cloning error: {str(e)}\nModel: {MODEL_STATUS}"

	# Gradio Interface
	with gr.Blocks(title="Voice Cloning Studio") as demo:
	gr.HTML("""
	<div style="text-align: center; padding: 25px;">
	<h1>🎭 REAL Voice Cloning Studio</h1>
	<p>Status: Models load on first use</p>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	reference_audio = gr.Audio(
	label="🎤 Reference Audio (Voice to Clone)",
	type="filepath",
	sources=["upload", "microphone"]
	)
	input_audio = gr.Audio(
	label="🎵 Input Audio (Content to Transform)",
	type="filepath",
	sources=["upload", "microphone"]
	)
	language = gr.Dropdown(
	choices=[
	("English", "en"),
	("Spanish", "es"),
	("French", "fr"),
	("German", "de")
	],
	value="en",
	label="Language"
	)
	clone_btn = gr.Button("Clone Voice", variant="primary", size="lg")
	with gr.Column():
	output_audio = gr.Audio(label="Cloned Voice Result")
	status_output = gr.Textbox(
	label="Status",
	lines=12,
	interactive=False
	)

	clone_btn.click(
	fn=voice_to_voice_clone,
	inputs=[reference_audio, input_audio, language],
	outputs=[output_audio, status_output],
	show_progress=True
	)

	if __name__ == "__main__":
	demo.launch()