Spaces:

immad84
/

xtts-space

Running

App Files Files Community

xtts-space / app.py

immad84

update app.py

a279c11 verified 5 months ago

raw

history blame contribute delete

2.17 kB

	# app.py
	import os
	import tempfile
	import torch
	import gradio as gr
	from TTS.api import TTS

	# Patch torch.load for compatibility with older Coqui checkpoints
	old_torch_load = torch.load
	def patched_torch_load(args, *kwargs):
	kwargs["weights_only"] = False
	return old_torch_load(args, *kwargs)
	torch.load = patched_torch_load

	# Accept Coqui TOS
	os.environ["COQUI_TOS_AGREED"] = "1"

	# Ensure speakers folder exists
	SPEAKER_DIR = "speakers"
	os.makedirs(SPEAKER_DIR, exist_ok=True)

	# Get device
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Model
	MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
	print("Loading model:", MODEL)
	tts = TTS(MODEL).to(device)

	# Supported languages
	LANGS = [
	"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl",
	"cs", "ar", "zh-cn", "ja", "ko", "hu", "hi"
	]

	def generate_audio(text, language, speaker_file):
	if not text or len(text.strip()) < 2:
	return None

	out_path = tempfile.mktemp(suffix=".wav")

	# Resolve speaker path (either from upload or from speakers folder)
	speaker_path = None
	if speaker_file:
	speaker_path = speaker_file
	else:
	# Default to first speaker file in folder if exists
	files = [f for f in os.listdir(SPEAKER_DIR) if f.lower().endswith(".wav")]
	if files:
	speaker_path = os.path.join(SPEAKER_DIR, files[0])

	if speaker_path:
	tts.tts_to_file(
	text=text,
	speaker_wav=speaker_path,
	language=language,
	file_path=out_path
	)
	else:
	return None

	return out_path

	demo = gr.Interface(
	fn=generate_audio,
	inputs=[
	gr.Textbox(lines=3, label="Text"),
	gr.Dropdown(LANGS, value="en", label="Language"),
	gr.Audio(label="Upload speaker reference (optional)", type="filepath")
	],
	outputs=gr.Audio(type="filepath", label="Generated Speech"),
	title="XTTS-v2 Voice Cloning",
	description=f"Drop WAV files into `{SPEAKER_DIR}` folder for reusable speaker voices.",
	allow_flagging="never",
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)