WasabiDrop commited on
Commit
4fa8fb6
·
verified ·
1 Parent(s): 5d3aab0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -226
app.py CHANGED
@@ -1,227 +1,227 @@
1
- import gradio as gr
2
- import requests
3
- import os
4
- import time
5
- import json
6
- from dotenv import load_dotenv
7
-
8
- # --- Configuration & Constants ---
9
- load_dotenv()
10
-
11
- REPLICATE_API_TOKENS_STR = os.getenv("REPLICATE_API_TOKENS")
12
- if not REPLICATE_API_TOKENS_STR:
13
- print("WARNING: REPLICATE_API_TOKENS not found. App will not function.")
14
- REPLICATE_API_KEYS = []
15
- else:
16
- REPLICATE_API_KEYS = [token.strip() for token in REPLICATE_API_TOKENS_STR.split(',')]
17
-
18
- MODEL_ENDPOINT = "https://api.replicate.com/v1/models/minimax/speech-02-hd/predictions"
19
-
20
- VOICE_ID_MAP = {}
21
- try:
22
- with open("voices.json", "r", encoding="utf-8") as f:
23
- VOICE_ID_MAP = json.load(f)
24
- if not VOICE_ID_MAP:
25
- print("WARNING: voices.json is empty or could not be loaded.")
26
- VOICE_ID_PRETTY_NAMES = list(VOICE_ID_MAP.keys())
27
- DEFAULT_VOICE_PRETTY_NAME = "Friendly Person" if "Friendly Person" in VOICE_ID_PRETTY_NAMES else (VOICE_ID_PRETTY_NAMES[0] if VOICE_ID_PRETTY_NAMES else None)
28
- except FileNotFoundError:
29
- print("ERROR: voices.json not found. Please create it.")
30
- VOICE_ID_PRETTY_NAMES = []
31
- DEFAULT_VOICE_PRETTY_NAME = None
32
- except json.JSONDecodeError:
33
- print("ERROR: voices.json is not valid JSON.")
34
- VOICE_ID_PRETTY_NAMES = []
35
- DEFAULT_VOICE_PRETTY_NAME = None
36
-
37
- EMOTIONS = ["auto", "neutral", "happy", "sad", "angry", "fearful", "disgusted", "surprised"]
38
- SAMPLE_RATES = [8000, 16000, 22050, 24000, 32000, 44100]
39
- BITRATES = [32000, 64000, 128000, 256000]
40
- CHANNELS = ["mono", "stereo"]
41
- LANGUAGE_BOOST_OPTIONS = ["None", "English", "Chinese", "Japanese", "Korean"]
42
-
43
- current_key_index = 0
44
- MAX_POLLING_ATTEMPTS = 60
45
- POLL_INTERVAL = 3
46
-
47
- def get_next_api_key():
48
- global current_key_index
49
- if not REPLICATE_API_KEYS:
50
- return None
51
- key = REPLICATE_API_KEYS[current_key_index]
52
- current_key_index = (current_key_index + 1) % len(REPLICATE_API_KEYS)
53
- return key
54
-
55
- def generate_speech(
56
- text, pitch, speed, volume, bitrate, channel, emotion,
57
- voice_id_pretty_name, custom_voice_id, sample_rate,
58
- language_boost, english_normalization
59
- ):
60
- if not text.strip():
61
- gr.Warning("Text input cannot be empty.")
62
- return None # Must return a value for the audio output
63
-
64
- if not REPLICATE_API_KEYS:
65
- gr.Error("No Replicate API Tokens configured. Please set REPLICATE_API_TOKENS in secrets.")
66
- return None
67
-
68
- if not VOICE_ID_MAP and not custom_voice_id.strip():
69
- gr.Error("Voice ID configuration is missing (voices.json empty/invalid) and no custom voice ID provided.")
70
- return None
71
-
72
- actual_voice_id_to_use = ""
73
- if custom_voice_id.strip():
74
- actual_voice_id_to_use = custom_voice_id.strip()
75
- elif voice_id_pretty_name and voice_id_pretty_name in VOICE_ID_MAP:
76
- actual_voice_id_to_use = VOICE_ID_MAP[voice_id_pretty_name]
77
- else:
78
- gr.Error(f"Selected voice '{voice_id_pretty_name}' not found in mappings and no custom ID provided.")
79
- return None
80
-
81
- payload = {
82
- "input": {
83
- "text": text, "pitch": int(pitch), "speed": float(speed), "volume": int(volume),
84
- "bitrate": int(bitrate), "channel": channel, "emotion": emotion,
85
- "voice_id": actual_voice_id_to_use, "sample_rate": int(sample_rate),
86
- "english_normalization": bool(english_normalization)
87
- }
88
- }
89
- if language_boost and language_boost.lower() != "none":
90
- payload["input"]["language_boost"] = language_boost
91
-
92
- num_keys_to_try = len(REPLICATE_API_KEYS)
93
- last_error_message_for_key = ""
94
-
95
- for i in range(num_keys_to_try):
96
- api_key = get_next_api_key()
97
- if not api_key: # Should not happen if REPLICATE_API_KEYS is populated
98
- gr.Error("Internal error: No API keys available in the cycling pool.")
99
- return None
100
-
101
- headers_post = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
102
- headers_get = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
103
-
104
- print(f"Attempting API call with key ending: ...{api_key[-4:]}. Voice ID: {actual_voice_id_to_use}")
105
-
106
- try:
107
- response = requests.post(MODEL_ENDPOINT, json=payload, headers=headers_post, timeout=30)
108
- response.raise_for_status()
109
- result = response.json()
110
- current_status = result.get("status")
111
- print(f"Initial API Response (Key ...{api_key[-4:]}): Status '{current_status}'")
112
-
113
- prediction_url = result.get("urls", {}).get("get")
114
- logs_from_initial_call = result.get("logs")
115
-
116
- polling_attempts = 0
117
- while current_status in ["starting", "processing"] and prediction_url:
118
- if polling_attempts >= MAX_POLLING_ATTEMPTS:
119
- last_error_message_for_key = f"Polling timed out for key ...{api_key[-4:]}."
120
- print(last_error_message_for_key)
121
- result["error"] = "Polling timed out." # For local log
122
- current_status = "failed_polling_timeout"
123
- break
124
- polling_attempts += 1
125
- time.sleep(POLL_INTERVAL)
126
- poll_response = requests.get(prediction_url, headers=headers_get, timeout=30)
127
- poll_response.raise_for_status()
128
- result = poll_response.json()
129
- current_status = result.get("status")
130
-
131
- if current_status == "succeeded":
132
- audio_url = result.get("output")
133
- if audio_url:
134
- success_logs = result.get('logs', logs_from_initial_call if logs_from_initial_call else 'N/A')
135
- print(f"Success with key ...{api_key[-4:]}. Logs: {success_logs}")
136
- gr.Info("Success! Audio generated.")
137
- return audio_url
138
- else:
139
- last_error_message_for_key = f"API succeeded (Key ...{api_key[-4:]}) but no output URL. Resp: {result}"
140
- print(last_error_message_for_key)
141
- continue # Try next key
142
- else: # Covers "failed", "failed_polling_timeout", or other unexpected states
143
- error_detail = result.get("error", f"Unknown error or unexpected status '{current_status}'")
144
- last_error_message_for_key = f"Prediction failed/timed out for key ...{api_key[-4:]}. Status: {current_status}. Error: {error_detail}"
145
- print(last_error_message_for_key)
146
- continue # Try next key
147
-
148
- except requests.exceptions.HTTPError as e:
149
- error_text = "Unknown HTTP Error"
150
- try: error_text = e.response.text
151
- except AttributeError: pass
152
- last_error_message_for_key = f"HTTP error for key ...{api_key[-4:]}: {e.response.status_code} - {error_text}"
153
- print(last_error_message_for_key)
154
- continue # Try next key
155
- except requests.exceptions.RequestException as e:
156
- last_error_message_for_key = f"Request exception for key ...{api_key[-4:]}: {e}"
157
- print(last_error_message_for_key)
158
- continue # Try next key
159
-
160
- # If all keys failed
161
- final_error_message = "All API keys failed or an unrecoverable error occurred."
162
- if last_error_message_for_key: # Provide a bit more context from the last attempt if available
163
- final_error_message += f" Last attempt error: {last_error_message_for_key}"
164
- gr.Error(final_error_message)
165
- return None
166
-
167
-
168
- # --- Gradio UI ---
169
- with gr.Blocks(theme=gr.themes.Soft()) as app:
170
- gr.Markdown("# Glue Up Academy Narrator")
171
- gr.Markdown("Enter text and adjust parameters to generate speech.")
172
-
173
- with gr.Row():
174
- with gr.Column(scale=2):
175
- text_input = gr.Textbox(
176
- label="Text to Synthesize",
177
- lines=5,
178
- placeholder="Enter your text here...\n💡Insert '<#0.5#>' to add a 0.5s pause. Adjust duration."
179
- )
180
- with gr.Accordion("Voice Selection", open=True):
181
- voice_id_dropdown = gr.Dropdown(
182
- label="Choose a Voice ID",
183
- choices=VOICE_ID_PRETTY_NAMES,
184
- value=DEFAULT_VOICE_PRETTY_NAME
185
- )
186
- custom_voice_id_input = gr.Textbox(
187
- label="Custom Voice ID (Optional)",
188
- placeholder="e.g., my_cloned_voice_v2",
189
- info="If filled, this will override dropdown."
190
- )
191
- gr.Markdown("[Minimax Voices](https://www.minimax.io/audio/voices) for more options.")
192
- gr.Markdown("For voice cloning, reach out to Raffy")
193
- with gr.Accordion("Advanced Speech Parameters", open=False):
194
- speed_slider = gr.Slider(label="Speed", minimum=0.5, maximum=2, step=0.1, value=1.0)
195
- volume_slider = gr.Slider(label="Volume", minimum=0, maximum=10, step=1, value=1)
196
- pitch_slider = gr.Slider(label="Pitch", minimum=-12, maximum=12, step=1, value=0)
197
- english_norm_checkbox = gr.Checkbox(label="English Normalization", value=False, info="Improves number reading.")
198
-
199
- with gr.Accordion("Audio Format & Emotion", open=False):
200
- emotion_dropdown = gr.Dropdown(label="Emotion", choices=EMOTIONS, value="auto")
201
- sample_rate_dropdown = gr.Dropdown(label="Sample Rate (Hz)", choices=SAMPLE_RATES, value=32000, type="value")
202
- bitrate_dropdown = gr.Dropdown(label="Bitrate (bps)", choices=BITRATES, value=128000, type="value")
203
- channel_dropdown = gr.Dropdown(label="Channels", choices=CHANNELS, value="mono")
204
- language_boost_dropdown = gr.Dropdown(label="Language Boost", choices=LANGUAGE_BOOST_OPTIONS, value="None")
205
-
206
- with gr.Column(scale=1):
207
- generate_button = gr.Button("Generate Speech", variant="primary")
208
- # REMOVED status_message Textbox
209
- audio_output = gr.Audio(label="Generated Speech", type="filepath")
210
-
211
- generate_button.click(
212
- fn=generate_speech,
213
- inputs=[
214
- text_input, pitch_slider, speed_slider, volume_slider,
215
- bitrate_dropdown, channel_dropdown, emotion_dropdown,
216
- voice_id_dropdown, custom_voice_id_input, sample_rate_dropdown,
217
- language_boost_dropdown, english_norm_checkbox
218
- ],
219
- outputs=[audio_output] # REMOVED status_message from outputs
220
- )
221
-
222
- if __name__ == "__main__":
223
- if not REPLICATE_API_KEYS:
224
- print("FATAL: REPLICATE_API_TOKENS are not set.")
225
- if not VOICE_ID_MAP:
226
- print("WARNING: Voice ID map is empty (voices.json issue?).")
227
  app.launch(debug=True)
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ import time
5
+ import json
6
+ from dotenv import load_dotenv
7
+
8
+ # --- Configuration & Constants ---
9
+ load_dotenv()
10
+
11
+ REPLICATE_API_TOKENS_STR = os.getenv("REPLICATE_API_TOKENS")
12
+ if not REPLICATE_API_TOKENS_STR:
13
+ print("WARNING: REPLICATE_API_TOKENS not found. App will not function.")
14
+ REPLICATE_API_KEYS = []
15
+ else:
16
+ REPLICATE_API_KEYS = [token.strip() for token in REPLICATE_API_TOKENS_STR.split(',')]
17
+
18
+ MODEL_ENDPOINT = "https://api.replicate.com/v1/models/minimax/speech-02-hd/predictions"
19
+
20
+ VOICE_ID_MAP = {}
21
+ try:
22
+ with open("voices.json", "r", encoding="utf-8") as f:
23
+ VOICE_ID_MAP = json.load(f)
24
+ if not VOICE_ID_MAP:
25
+ print("WARNING: voices.json is empty or could not be loaded.")
26
+ VOICE_ID_PRETTY_NAMES = list(VOICE_ID_MAP.keys())
27
+ DEFAULT_VOICE_PRETTY_NAME = "Friendly Person" if "Friendly Person" in VOICE_ID_PRETTY_NAMES else (VOICE_ID_PRETTY_NAMES[0] if VOICE_ID_PRETTY_NAMES else None)
28
+ except FileNotFoundError:
29
+ print("ERROR: voices.json not found. Please create it.")
30
+ VOICE_ID_PRETTY_NAMES = []
31
+ DEFAULT_VOICE_PRETTY_NAME = None
32
+ except json.JSONDecodeError:
33
+ print("ERROR: voices.json is not valid JSON.")
34
+ VOICE_ID_PRETTY_NAMES = []
35
+ DEFAULT_VOICE_PRETTY_NAME = None
36
+
37
+ EMOTIONS = ["auto", "neutral", "happy", "sad", "angry", "fearful", "disgusted", "surprised"]
38
+ SAMPLE_RATES = [8000, 16000, 22050, 24000, 32000, 44100]
39
+ BITRATES = [32000, 64000, 128000, 256000]
40
+ CHANNELS = ["mono", "stereo"]
41
+ LANGUAGE_BOOST_OPTIONS = ["None", "English", "Chinese", "Japanese", "Korean"]
42
+
43
+ current_key_index = 0
44
+ MAX_POLLING_ATTEMPTS = 60
45
+ POLL_INTERVAL = 3
46
+
47
+ def get_next_api_key():
48
+ global current_key_index
49
+ if not REPLICATE_API_KEYS:
50
+ return None
51
+ key = REPLICATE_API_KEYS[current_key_index]
52
+ current_key_index = (current_key_index + 1) % len(REPLICATE_API_KEYS)
53
+ return key
54
+
55
+ def generate_speech(
56
+ text, pitch, speed, volume, bitrate, channel, emotion,
57
+ voice_id_pretty_name, custom_voice_id, sample_rate,
58
+ language_boost, english_normalization
59
+ ):
60
+ if not text.strip():
61
+ gr.Warning("Text input cannot be empty.")
62
+ return None # Must return a value for the audio output
63
+
64
+ if not REPLICATE_API_KEYS:
65
+ gr.Error("No Replicate API Tokens configured. Please set REPLICATE_API_TOKENS in secrets.")
66
+ return None
67
+
68
+ if not VOICE_ID_MAP and not custom_voice_id.strip():
69
+ gr.Error("Voice ID configuration is missing (voices.json empty/invalid) and no custom voice ID provided.")
70
+ return None
71
+
72
+ actual_voice_id_to_use = ""
73
+ if custom_voice_id.strip():
74
+ actual_voice_id_to_use = custom_voice_id.strip()
75
+ elif voice_id_pretty_name and voice_id_pretty_name in VOICE_ID_MAP:
76
+ actual_voice_id_to_use = VOICE_ID_MAP[voice_id_pretty_name]
77
+ else:
78
+ gr.Error(f"Selected voice '{voice_id_pretty_name}' not found in mappings and no custom ID provided.")
79
+ return None
80
+
81
+ payload = {
82
+ "input": {
83
+ "text": text, "pitch": int(pitch), "speed": float(speed), "volume": int(volume),
84
+ "bitrate": int(bitrate), "channel": channel, "emotion": emotion,
85
+ "voice_id": actual_voice_id_to_use, "sample_rate": int(sample_rate),
86
+ "english_normalization": bool(english_normalization)
87
+ }
88
+ }
89
+ if language_boost and language_boost.lower() != "none":
90
+ payload["input"]["language_boost"] = language_boost
91
+
92
+ num_keys_to_try = len(REPLICATE_API_KEYS)
93
+ last_error_message_for_key = ""
94
+
95
+ for i in range(num_keys_to_try):
96
+ api_key = get_next_api_key()
97
+ if not api_key: # Should not happen if REPLICATE_API_KEYS is populated
98
+ gr.Error("Internal error: No API keys available in the cycling pool.")
99
+ return None
100
+
101
+ headers_post = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
102
+ headers_get = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
103
+
104
+ print(f"Attempting API call with key ending: ...{api_key[-4:]}. Voice ID: {actual_voice_id_to_use}")
105
+
106
+ try:
107
+ response = requests.post(MODEL_ENDPOINT, json=payload, headers=headers_post, timeout=30)
108
+ response.raise_for_status()
109
+ result = response.json()
110
+ current_status = result.get("status")
111
+ print(f"Initial API Response (Key ...{api_key[-4:]}): Status '{current_status}'")
112
+
113
+ prediction_url = result.get("urls", {}).get("get")
114
+ logs_from_initial_call = result.get("logs")
115
+
116
+ polling_attempts = 0
117
+ while current_status in ["starting", "processing"] and prediction_url:
118
+ if polling_attempts >= MAX_POLLING_ATTEMPTS:
119
+ last_error_message_for_key = f"Polling timed out for key ...{api_key[-4:]}."
120
+ print(last_error_message_for_key)
121
+ result["error"] = "Polling timed out." # For local log
122
+ current_status = "failed_polling_timeout"
123
+ break
124
+ polling_attempts += 1
125
+ time.sleep(POLL_INTERVAL)
126
+ poll_response = requests.get(prediction_url, headers=headers_get, timeout=30)
127
+ poll_response.raise_for_status()
128
+ result = poll_response.json()
129
+ current_status = result.get("status")
130
+
131
+ if current_status == "succeeded":
132
+ audio_url = result.get("output")
133
+ if audio_url:
134
+ success_logs = result.get('logs', logs_from_initial_call if logs_from_initial_call else 'N/A')
135
+ print(f"Success with key ...{api_key[-4:]}. Logs: {success_logs}")
136
+ gr.Info("Success! Audio generated.")
137
+ return audio_url
138
+ else:
139
+ last_error_message_for_key = f"API succeeded (Key ...{api_key[-4:]}) but no output URL. Resp: {result}"
140
+ print(last_error_message_for_key)
141
+ continue # Try next key
142
+ else: # Covers "failed", "failed_polling_timeout", or other unexpected states
143
+ error_detail = result.get("error", f"Unknown error or unexpected status '{current_status}'")
144
+ last_error_message_for_key = f"Prediction failed/timed out for key ...{api_key[-4:]}. Status: {current_status}. Error: {error_detail}"
145
+ print(last_error_message_for_key)
146
+ continue # Try next key
147
+
148
+ except requests.exceptions.HTTPError as e:
149
+ error_text = "Unknown HTTP Error"
150
+ try: error_text = e.response.text
151
+ except AttributeError: pass
152
+ last_error_message_for_key = f"HTTP error for key ...{api_key[-4:]}: {e.response.status_code} - {error_text}"
153
+ print(last_error_message_for_key)
154
+ continue # Try next key
155
+ except requests.exceptions.RequestException as e:
156
+ last_error_message_for_key = f"Request exception for key ...{api_key[-4:]}: {e}"
157
+ print(last_error_message_for_key)
158
+ continue # Try next key
159
+
160
+ # If all keys failed
161
+ final_error_message = "All API keys failed or an unrecoverable error occurred."
162
+ if last_error_message_for_key: # Provide a bit more context from the last attempt if available
163
+ final_error_message += f" Last attempt error: {last_error_message_for_key}"
164
+ gr.Error(final_error_message)
165
+ return None
166
+
167
+
168
+ # --- Gradio UI ---
169
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
170
+ gr.Markdown("# Glue Up Academy Narrator")
171
+ gr.Markdown("Enter text and adjust parameters to generate speech.")
172
+
173
+ with gr.Row():
174
+ with gr.Column(scale=2):
175
+ text_input = gr.Textbox(
176
+ label="Text to Synthesize",
177
+ lines=5,
178
+ placeholder="Enter your text here...\n💡Insert '<#0.5#>' to add a 0.5s pause. Adjust duration."
179
+ )
180
+ with gr.Accordion("Voice Selection", open=True):
181
+ voice_id_dropdown = gr.Dropdown(
182
+ label="Choose a Voice ID",
183
+ choices=VOICE_ID_PRETTY_NAMES,
184
+ value=DEFAULT_VOICE_PRETTY_NAME
185
+ )
186
+ custom_voice_id_input = gr.Textbox(
187
+ label="Custom Voice ID (Optional)",
188
+ placeholder="e.g., my_cloned_voice_v2",
189
+ info="If filled, this will override dropdown."
190
+ )
191
+ #gr.Markdown("[Minimax Voices](https://www.minimax.io/audio/voices) for more options.")
192
+ gr.Markdown("For voice cloning, reach out to Raffy")
193
+ with gr.Accordion("Advanced Speech Parameters", open=False):
194
+ speed_slider = gr.Slider(label="Speed", minimum=0.5, maximum=2, step=0.1, value=1.0)
195
+ volume_slider = gr.Slider(label="Volume", minimum=0, maximum=10, step=1, value=1)
196
+ pitch_slider = gr.Slider(label="Pitch", minimum=-12, maximum=12, step=1, value=0)
197
+ english_norm_checkbox = gr.Checkbox(label="English Normalization", value=False, info="Improves number reading.")
198
+
199
+ with gr.Accordion("Audio Format & Emotion", open=False):
200
+ emotion_dropdown = gr.Dropdown(label="Emotion", choices=EMOTIONS, value="auto")
201
+ sample_rate_dropdown = gr.Dropdown(label="Sample Rate (Hz)", choices=SAMPLE_RATES, value=32000, type="value")
202
+ bitrate_dropdown = gr.Dropdown(label="Bitrate (bps)", choices=BITRATES, value=128000, type="value")
203
+ channel_dropdown = gr.Dropdown(label="Channels", choices=CHANNELS, value="mono")
204
+ language_boost_dropdown = gr.Dropdown(label="Language Boost", choices=LANGUAGE_BOOST_OPTIONS, value="None")
205
+
206
+ with gr.Column(scale=1):
207
+ generate_button = gr.Button("Generate Speech", variant="primary")
208
+ # REMOVED status_message Textbox
209
+ audio_output = gr.Audio(label="Generated Speech", type="filepath")
210
+
211
+ generate_button.click(
212
+ fn=generate_speech,
213
+ inputs=[
214
+ text_input, pitch_slider, speed_slider, volume_slider,
215
+ bitrate_dropdown, channel_dropdown, emotion_dropdown,
216
+ voice_id_dropdown, custom_voice_id_input, sample_rate_dropdown,
217
+ language_boost_dropdown, english_norm_checkbox
218
+ ],
219
+ outputs=[audio_output] # REMOVED status_message from outputs
220
+ )
221
+
222
+ if __name__ == "__main__":
223
+ if not REPLICATE_API_KEYS:
224
+ print("FATAL: REPLICATE_API_TOKENS are not set.")
225
+ if not VOICE_ID_MAP:
226
+ print("WARNING: Voice ID map is empty (voices.json issue?).")
227
  app.launch(debug=True)