File size: 2,294 Bytes
2b5d245
 
d02ad9c
44f3114
 
 
2b5d245
 
 
 
 
 
 
 
 
44f3114
2b5d245
 
 
44f3114
 
 
 
2b5d245
 
 
 
 
 
 
 
44f3114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b5d245
44f3114
 
 
 
 
 
2b5d245
44f3114
 
 
2b5d245
 
 
 
44f3114
 
2b5d245
44f3114
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# FINAL TTS Space: app.py
# This version includes the fix for the MeCab/unidic dependency issue.
import gradio as gr
import torch
import os

# --- FIX for MeCab/unidic START ---
# This command downloads the necessary Japanese dictionary for the TTS library.
# It runs only once when the Space builds.
print("Fix: Triggering unidic download...")
os.system('python -m unidic download')
print("Fix: Unidic download command executed.")
# --- FIX for MeCab/unidic END ---

from TTS.api import TTS

# --- Standard Application Code ---

DEFAULT_SPEAKER_WAV = "tutor_voice.wav"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"TTS Service: Using device: {device}")

print("TTS Service: Loading model...")
try:
    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
    print("TTS Service: Model loaded successfully.")
except Exception as e:
    print(f"FATAL: Could not load TTS model. Error: {e}")
    # If the model fails to load, we can't do anything else.
    # This will cause the app to crash, and the logs will show the error.
    raise e

def synthesize(text_to_speak, speaker_wav_path):
    if not os.path.exists(speaker_wav_path):
        print(f"Warning: Speaker file not found at '{speaker_wav_path}'. Using default.")
        speaker_wav_path = DEFAULT_SPEAKER_WAV
    
    if not os.path.exists(speaker_wav_path):
         raise gr.Error("The default 'tutor_voice.wav' file is missing! Please upload it.")

    print(f"TTS Service: Synthesizing text: '{text_to_speak[:30]}...'")
    output_wav_path = "output.wav"
    
    tts.tts_to_file(
        text=text_to_speak,
        file_path=output_wav_path,
        speaker_wav=speaker_wav_path,
        language="en"
    )
    
    print(f"TTS Service: Audio saved to '{output_wav_path}'")
    return output_wav_path

with gr.Blocks() as app:
    gr.Markdown("# EveryPrep XII - TTS Voice Service (v2 - Fixed)")
    
    gr.Interface(
        fn=synthesize,
        inputs=[
            gr.Textbox(label="Text to Synthesize", value="This is a test of the fixed TTS service."),
            gr.File(label="Speaker WAV (Optional)", value=DEFAULT_SPEAKER_WAV)
        ],
        outputs=gr.Audio(label="Synthesized Audio"),
        title="TTS API Test Interface",
        api_name="synthesize"
    )

app.launch()