Nymbo commited on
Commit
9610d29
·
verified ·
1 Parent(s): f54b39e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -0
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import io
3
+ import tempfile
4
+ import numpy as np
5
+
6
+ # Optional imports for Soprano TTS (lazy load)
7
+ try:
8
+ import torch # type: ignore
9
+ except Exception: # pragma: no cover
10
+ torch = None # type: ignore
11
+ try:
12
+ from soprano import SopranoTTS # type: ignore
13
+ except Exception: # pragma: no cover
14
+ SopranoTTS = None # type: ignore
15
+ try:
16
+ from scipy.io.wavfile import write as wav_write # type: ignore
17
+ except Exception: # pragma: no cover
18
+ wav_write = None # type: ignore
19
+
20
+ _SOPRANO_STATE = {"initialized": False, "device": "cpu", "model": None}
21
+
22
+ SAMPLE_RATE = 32000
23
+
24
+
25
+ def _init_soprano() -> None:
26
+ """Initialize the Soprano model lazily. Requires CUDA GPU."""
27
+ if _SOPRANO_STATE["initialized"]:
28
+ return
29
+ if SopranoTTS is None:
30
+ raise gr.Error("Soprano is not installed. Please run: pip install soprano-tts --no-deps && pip install transformers unidecode")
31
+
32
+ if not torch or not torch.cuda.is_available():
33
+ raise gr.Error(
34
+ "Soprano requires a CUDA GPU. PyTorch CUDA not detected. "
35
+ "Please install CUDA-enabled PyTorch: pip install torch --index-url https://download.pytorch.org/whl/cu121"
36
+ )
37
+
38
+ device = "cuda"
39
+ print(f"Using device: {device}")
40
+
41
+ # Use 'transformers' backend for compatibility (lmdeploy requires ray which isn't on Windows)
42
+ model = SopranoTTS(
43
+ backend="transformers",
44
+ device=device,
45
+ )
46
+ _SOPRANO_STATE.update({"initialized": True, "device": device, "model": model})
47
+
48
+
49
+ def soprano_tts(
50
+ text: str,
51
+ temperature: float,
52
+ top_p: float,
53
+ repetition_penalty: float,
54
+ ) -> tuple[int, np.ndarray] | None:
55
+ """Generate speech from text using Soprano."""
56
+ if not text or not text.strip():
57
+ raise gr.Error("Please enter text to synthesize.")
58
+
59
+ _init_soprano()
60
+ model = _SOPRANO_STATE["model"]
61
+
62
+ try:
63
+ audio = model.infer(
64
+ text,
65
+ temperature=temperature,
66
+ top_p=top_p,
67
+ repetition_penalty=repetition_penalty,
68
+ )
69
+ # Model returns a tensor; convert to numpy
70
+ audio_np = audio.cpu().numpy()
71
+ return (SAMPLE_RATE, audio_np)
72
+ except gr.Error:
73
+ raise
74
+ except Exception as e:
75
+ raise gr.Error(f"Error during speech generation: {str(e)[:200]}...")
76
+
77
+
78
+ def save_audio(audio_np: np.ndarray | None) -> str | None:
79
+ """Save audio to a temporary WAV file for download."""
80
+ if audio_np is None or len(audio_np) == 0:
81
+ return None
82
+ if wav_write is None:
83
+ raise gr.Error("scipy is not installed. Please run: pip install scipy")
84
+ import os
85
+ fd, path = tempfile.mkstemp(suffix=".wav")
86
+ os.close(fd)
87
+ wav_write(path, SAMPLE_RATE, audio_np)
88
+ return path
89
+
90
+
91
+ # --- Gradio UI ---
92
+ with gr.Blocks() as demo:
93
+ state_audio = gr.State(None)
94
+
95
+ gr.HTML("<h1 style='text-align: center;'>Soprano-TTS</h1><p style='text-align: center;'>Powered by Soprano-80M | 32kHz High-Fidelity Audio</p>")
96
+
97
+ gr.Markdown(
98
+ "**Usage tips:**\n"
99
+ "- Soprano works best when each sentence is between 2 and 15 seconds long.\n"
100
+ "- Convert numbers and special characters to phonetic form for best results (e.g., `1+1` → `one plus one`).\n"
101
+ "- If results are unsatisfactory, regenerate or adjust sampling settings.\n"
102
+ "- Avoid improper grammar such as missing contractions or multiple spaces."
103
+ )
104
+
105
+ with gr.Row(variant="panel"):
106
+ temperature = gr.Slider(
107
+ minimum=0.0,
108
+ maximum=1.0,
109
+ value=0.3,
110
+ step=0.05,
111
+ label="Temperature",
112
+ info="Controls randomness. Lower = more deterministic.",
113
+ )
114
+ top_p = gr.Slider(
115
+ minimum=0.0,
116
+ maximum=1.0,
117
+ value=0.95,
118
+ step=0.01,
119
+ label="Top-P",
120
+ info="Nucleus sampling threshold.",
121
+ )
122
+ repetition_penalty = gr.Slider(
123
+ minimum=1.0,
124
+ maximum=2.0,
125
+ value=1.2,
126
+ step=0.05,
127
+ label="Repetition Penalty",
128
+ info="Penalizes repeated tokens.",
129
+ )
130
+
131
+ text_input = gr.Textbox(
132
+ label="Input Text",
133
+ placeholder="Enter the text you want to convert to speech here...",
134
+ value="Soprano is an extremely lightweight text to speech model designed to produce highly realistic speech at unprecedented speed.",
135
+ lines=5,
136
+ )
137
+
138
+ generate_btn = gr.Button(
139
+ "Generate Speech",
140
+ variant="primary",
141
+ )
142
+
143
+ audio_output = gr.Audio(
144
+ label="Generated Speech",
145
+ autoplay=True,
146
+ )
147
+
148
+ download_btn = gr.Button("Download Audio")
149
+ download_file = gr.File(label="Download")
150
+
151
+ generate_inputs = [text_input, temperature, top_p, repetition_penalty]
152
+
153
+ def generate_and_store(text, temperature, top_p, repetition_penalty):
154
+ result = soprano_tts(text, temperature, top_p, repetition_penalty)
155
+ if result:
156
+ return result, result[1] # Return audio tuple and numpy array for state
157
+ return None, None
158
+
159
+ generate_btn.click(
160
+ fn=generate_and_store,
161
+ inputs=generate_inputs,
162
+ outputs=[audio_output, state_audio],
163
+ api_name="generate_speech",
164
+ )
165
+
166
+ text_input.submit(
167
+ fn=generate_and_store,
168
+ inputs=generate_inputs,
169
+ outputs=[audio_output, state_audio],
170
+ api_name="generate_speech_enter",
171
+ )
172
+
173
+ download_btn.click(
174
+ fn=save_audio,
175
+ inputs=[state_audio],
176
+ outputs=[download_file],
177
+ )
178
+
179
+ if __name__ == "__main__":
180
+ demo.queue().launch(debug=True, theme="Nymbo/Nymbo_Theme")