Spaces:
Runtime error
Runtime error
| # FINAL, ROBUST TTS Space: app.py | |
| # This version uses a stable model loading method to avoid all previous errors. | |
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| import os | |
| # --- FIX for MeCab/unidic START --- | |
| # This command downloads the necessary Japanese dictionary for the TTS library. | |
| print("Fix: Triggering unidic download...") | |
| os.system('python -m unidic download') | |
| print("Fix: Unidic download command executed.") | |
| # --- FIX for MeCab/unidic END --- | |
| # --- Import necessary classes from the TTS library --- | |
| from TTS.tts.configs.xtts_config import XttsConfig | |
| from TTS.tts.models.xtts import Xtts | |
| from TTS.utils.manage import ModelManager | |
| from TTS.utils.generic_utils import get_user_data_dir | |
| # --- Configuration & Model Loading (Happens ONCE at startup) --- | |
| DEFAULT_SPEAKER_WAV = "tutor_voice.wav" | |
| device = "cpu" | |
| print(f"TTS Service: Using device: {device}") | |
| print("TTS Service: Downloading model if not present...") | |
| model_name = "tts_models/multilingual/multi-dataset/xtts_v2" | |
| ModelManager().download_model(model_name) | |
| model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--")) | |
| print(f"TTS Service: Model downloaded to: {model_path}") | |
| print("TTS Service: Loading model config...") | |
| config = XttsConfig() | |
| config.load_json(os.path.join(model_path, "config.json")) | |
| print("TTS Service: Initializing model...") | |
| model = Xtts.init_from_config(config) | |
| print("TTS Service: Loading model checkpoint...") | |
| model.load_checkpoint( | |
| config, | |
| checkpoint_path=os.path.join(model_path, "model.pth"), | |
| vocab_path=os.path.join(model_path, "vocab.json"), | |
| eval=True, | |
| use_deepspeed=False # Important for CPU inference | |
| ) | |
| model.to(device) | |
| print("TTS Service: Model loaded successfully.") | |
| # --- The Core API Function --- | |
| def synthesize(text_to_speak, speaker_wav_path): | |
| if not os.path.exists(speaker_wav_path): speaker_wav_path = DEFAULT_SPEAKER_WAV | |
| if not os.path.exists(speaker_wav_path): raise gr.Error("Default 'tutor_voice.wav' is missing!") | |
| output_wav_path = "output.wav" | |
| try: | |
| print(f"TTS Service: Synthesizing text: '{text_to_speak[:40]}...'") | |
| gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav_path) | |
| print("TTS Service: Performing inference...") | |
| out = model.inference( | |
| text_to_speak, "en", gpt_cond_latent, speaker_embedding, temperature=0.7, | |
| ) | |
| torchaudio.save(output_wav_path, torch.tensor(out["wav"]).unsqueeze(0), 24000) | |
| print(f"TTS Service: Audio saved to '{output_wav_path}'") | |
| return output_wav_path | |
| except Exception as e: | |
| print(f"An error occurred during synthesis: {e}") | |
| raise gr.Error(f"Failed to synthesize audio. Error: {e}") | |
| # --- Build the Gradio API Interface --- | |
| with gr.Blocks() as app: | |
| gr.Markdown("# EveryPrep XII - Custom TTS Voice Service") | |
| gr.Interface( | |
| fn=synthesize, | |
| inputs=[ | |
| gr.Textbox(label="Text to Synthesize", value="This is a test of the stable TTS service."), | |
| gr.File(label="Speaker WAV (Optional)", value=DEFAULT_SPEAKER_WAV) | |
| ], | |
| outputs=gr.Audio(label="Synthesized Audio"), | |
| title="TTS API Test Interface", | |
| api_name="synthesize" | |
| ) | |
| # --- Launch the App --- | |
| app.launch() |