Spaces:

AK97GAMERZ
/

tts-everyprep

Runtime error

App Files Files Community

tts-everyprep / app.py

AK97GAMERZ

Create app.py

ca90f9d verified 7 months ago

raw

history blame contribute delete

3.31 kB

	# FINAL, ROBUST TTS Space: app.py
	# This version uses a stable model loading method to avoid all previous errors.

	import gradio as gr
	import torch
	import torchaudio
	import os

	# --- FIX for MeCab/unidic START ---
	# This command downloads the necessary Japanese dictionary for the TTS library.
	print("Fix: Triggering unidic download...")
	os.system('python -m unidic download')
	print("Fix: Unidic download command executed.")
	# --- FIX for MeCab/unidic END ---

	# --- Import necessary classes from the TTS library ---
	from TTS.tts.configs.xtts_config import XttsConfig
	from TTS.tts.models.xtts import Xtts
	from TTS.utils.manage import ModelManager
	from TTS.utils.generic_utils import get_user_data_dir

	# --- Configuration & Model Loading (Happens ONCE at startup) ---
	DEFAULT_SPEAKER_WAV = "tutor_voice.wav"
	device = "cpu"
	print(f"TTS Service: Using device: {device}")

	print("TTS Service: Downloading model if not present...")
	model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
	ModelManager().download_model(model_name)
	model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
	print(f"TTS Service: Model downloaded to: {model_path}")

	print("TTS Service: Loading model config...")
	config = XttsConfig()
	config.load_json(os.path.join(model_path, "config.json"))

	print("TTS Service: Initializing model...")
	model = Xtts.init_from_config(config)

	print("TTS Service: Loading model checkpoint...")
	model.load_checkpoint(
	config,
	checkpoint_path=os.path.join(model_path, "model.pth"),
	vocab_path=os.path.join(model_path, "vocab.json"),
	eval=True,
	use_deepspeed=False # Important for CPU inference
	)
	model.to(device)
	print("TTS Service: Model loaded successfully.")


	# --- The Core API Function ---
	def synthesize(text_to_speak, speaker_wav_path):
	if not os.path.exists(speaker_wav_path): speaker_wav_path = DEFAULT_SPEAKER_WAV
	if not os.path.exists(speaker_wav_path): raise gr.Error("Default 'tutor_voice.wav' is missing!")

	output_wav_path = "output.wav"
	try:
	print(f"TTS Service: Synthesizing text: '{text_to_speak[:40]}...'")
	gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav_path)

	print("TTS Service: Performing inference...")
	out = model.inference(
	text_to_speak, "en", gpt_cond_latent, speaker_embedding, temperature=0.7,
	)
	torchaudio.save(output_wav_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)

	print(f"TTS Service: Audio saved to '{output_wav_path}'")
	return output_wav_path

	except Exception as e:
	print(f"An error occurred during synthesis: {e}")
	raise gr.Error(f"Failed to synthesize audio. Error: {e}")


	# --- Build the Gradio API Interface ---
	with gr.Blocks() as app:
	gr.Markdown("# EveryPrep XII - Custom TTS Voice Service")
	gr.Interface(
	fn=synthesize,
	inputs=[
	gr.Textbox(label="Text to Synthesize", value="This is a test of the stable TTS service."),
	gr.File(label="Speaker WAV (Optional)", value=DEFAULT_SPEAKER_WAV)
	],
	outputs=gr.Audio(label="Synthesized Audio"),
	title="TTS API Test Interface",
	api_name="synthesize"
	)

	# --- Launch the App ---
	app.launch()