Spaces:
Sleeping
Sleeping
File size: 10,634 Bytes
4fa8fb6 a0a4728 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import gradio as gr
import requests
import os
import time
import json
from dotenv import load_dotenv
# --- Configuration & Constants ---
load_dotenv()
REPLICATE_API_TOKENS_STR = os.getenv("REPLICATE_API_TOKENS")
if not REPLICATE_API_TOKENS_STR:
print("WARNING: REPLICATE_API_TOKENS not found. App will not function.")
REPLICATE_API_KEYS = []
else:
REPLICATE_API_KEYS = [token.strip() for token in REPLICATE_API_TOKENS_STR.split(',')]
MODEL_ENDPOINT = "https://api.replicate.com/v1/models/minimax/speech-02-hd/predictions"
VOICE_ID_MAP = {}
try:
with open("voices.json", "r", encoding="utf-8") as f:
VOICE_ID_MAP = json.load(f)
if not VOICE_ID_MAP:
print("WARNING: voices.json is empty or could not be loaded.")
VOICE_ID_PRETTY_NAMES = list(VOICE_ID_MAP.keys())
DEFAULT_VOICE_PRETTY_NAME = "Friendly Person" if "Friendly Person" in VOICE_ID_PRETTY_NAMES else (VOICE_ID_PRETTY_NAMES[0] if VOICE_ID_PRETTY_NAMES else None)
except FileNotFoundError:
print("ERROR: voices.json not found. Please create it.")
VOICE_ID_PRETTY_NAMES = []
DEFAULT_VOICE_PRETTY_NAME = None
except json.JSONDecodeError:
print("ERROR: voices.json is not valid JSON.")
VOICE_ID_PRETTY_NAMES = []
DEFAULT_VOICE_PRETTY_NAME = None
EMOTIONS = ["auto", "neutral", "happy", "sad", "angry", "fearful", "disgusted", "surprised"]
SAMPLE_RATES = [8000, 16000, 22050, 24000, 32000, 44100]
BITRATES = [32000, 64000, 128000, 256000]
CHANNELS = ["mono", "stereo"]
LANGUAGE_BOOST_OPTIONS = ["None", "English", "Chinese", "Japanese", "Korean"]
current_key_index = 0
MAX_POLLING_ATTEMPTS = 60
POLL_INTERVAL = 3
def get_next_api_key():
global current_key_index
if not REPLICATE_API_KEYS:
return None
key = REPLICATE_API_KEYS[current_key_index]
current_key_index = (current_key_index + 1) % len(REPLICATE_API_KEYS)
return key
def generate_speech(
text, pitch, speed, volume, bitrate, channel, emotion,
voice_id_pretty_name, custom_voice_id, sample_rate,
language_boost, english_normalization
):
if not text.strip():
gr.Warning("Text input cannot be empty.")
return None # Must return a value for the audio output
if not REPLICATE_API_KEYS:
gr.Error("No Replicate API Tokens configured. Please set REPLICATE_API_TOKENS in secrets.")
return None
if not VOICE_ID_MAP and not custom_voice_id.strip():
gr.Error("Voice ID configuration is missing (voices.json empty/invalid) and no custom voice ID provided.")
return None
actual_voice_id_to_use = ""
if custom_voice_id.strip():
actual_voice_id_to_use = custom_voice_id.strip()
elif voice_id_pretty_name and voice_id_pretty_name in VOICE_ID_MAP:
actual_voice_id_to_use = VOICE_ID_MAP[voice_id_pretty_name]
else:
gr.Error(f"Selected voice '{voice_id_pretty_name}' not found in mappings and no custom ID provided.")
return None
payload = {
"input": {
"text": text, "pitch": int(pitch), "speed": float(speed), "volume": int(volume),
"bitrate": int(bitrate), "channel": channel, "emotion": emotion,
"voice_id": actual_voice_id_to_use, "sample_rate": int(sample_rate),
"english_normalization": bool(english_normalization)
}
}
if language_boost and language_boost.lower() != "none":
payload["input"]["language_boost"] = language_boost
num_keys_to_try = len(REPLICATE_API_KEYS)
last_error_message_for_key = ""
for i in range(num_keys_to_try):
api_key = get_next_api_key()
if not api_key: # Should not happen if REPLICATE_API_KEYS is populated
gr.Error("Internal error: No API keys available in the cycling pool.")
return None
headers_post = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
headers_get = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
print(f"Attempting API call with key ending: ...{api_key[-4:]}. Voice ID: {actual_voice_id_to_use}")
try:
response = requests.post(MODEL_ENDPOINT, json=payload, headers=headers_post, timeout=30)
response.raise_for_status()
result = response.json()
current_status = result.get("status")
print(f"Initial API Response (Key ...{api_key[-4:]}): Status '{current_status}'")
prediction_url = result.get("urls", {}).get("get")
logs_from_initial_call = result.get("logs")
polling_attempts = 0
while current_status in ["starting", "processing"] and prediction_url:
if polling_attempts >= MAX_POLLING_ATTEMPTS:
last_error_message_for_key = f"Polling timed out for key ...{api_key[-4:]}."
print(last_error_message_for_key)
result["error"] = "Polling timed out." # For local log
current_status = "failed_polling_timeout"
break
polling_attempts += 1
time.sleep(POLL_INTERVAL)
poll_response = requests.get(prediction_url, headers=headers_get, timeout=30)
poll_response.raise_for_status()
result = poll_response.json()
current_status = result.get("status")
if current_status == "succeeded":
audio_url = result.get("output")
if audio_url:
success_logs = result.get('logs', logs_from_initial_call if logs_from_initial_call else 'N/A')
print(f"Success with key ...{api_key[-4:]}. Logs: {success_logs}")
gr.Info("Success! Audio generated.")
return audio_url
else:
last_error_message_for_key = f"API succeeded (Key ...{api_key[-4:]}) but no output URL. Resp: {result}"
print(last_error_message_for_key)
continue # Try next key
else: # Covers "failed", "failed_polling_timeout", or other unexpected states
error_detail = result.get("error", f"Unknown error or unexpected status '{current_status}'")
last_error_message_for_key = f"Prediction failed/timed out for key ...{api_key[-4:]}. Status: {current_status}. Error: {error_detail}"
print(last_error_message_for_key)
continue # Try next key
except requests.exceptions.HTTPError as e:
error_text = "Unknown HTTP Error"
try: error_text = e.response.text
except AttributeError: pass
last_error_message_for_key = f"HTTP error for key ...{api_key[-4:]}: {e.response.status_code} - {error_text}"
print(last_error_message_for_key)
continue # Try next key
except requests.exceptions.RequestException as e:
last_error_message_for_key = f"Request exception for key ...{api_key[-4:]}: {e}"
print(last_error_message_for_key)
continue # Try next key
# If all keys failed
final_error_message = "All API keys failed or an unrecoverable error occurred."
if last_error_message_for_key: # Provide a bit more context from the last attempt if available
final_error_message += f" Last attempt error: {last_error_message_for_key}"
gr.Error(final_error_message)
return None
# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# Glue Up Academy Narrator")
gr.Markdown("Enter text and adjust parameters to generate speech.")
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(
label="Text to Synthesize",
lines=5,
placeholder="Enter your text here...\n💡Insert '<#0.5#>' to add a 0.5s pause. Adjust duration."
)
with gr.Accordion("Voice Selection", open=True):
voice_id_dropdown = gr.Dropdown(
label="Choose a Voice ID",
choices=VOICE_ID_PRETTY_NAMES,
value=DEFAULT_VOICE_PRETTY_NAME
)
custom_voice_id_input = gr.Textbox(
label="Custom Voice ID (Optional)",
placeholder="e.g., my_cloned_voice_v2",
info="If filled, this will override dropdown."
)
#gr.Markdown("[Minimax Voices](https://www.minimax.io/audio/voices) for more options.")
gr.Markdown("For voice cloning, reach out to Raffy")
with gr.Accordion("Advanced Speech Parameters", open=False):
speed_slider = gr.Slider(label="Speed", minimum=0.5, maximum=2, step=0.1, value=1.0)
volume_slider = gr.Slider(label="Volume", minimum=0, maximum=10, step=1, value=1)
pitch_slider = gr.Slider(label="Pitch", minimum=-12, maximum=12, step=1, value=0)
english_norm_checkbox = gr.Checkbox(label="English Normalization", value=False, info="Improves number reading.")
with gr.Accordion("Audio Format & Emotion", open=False):
emotion_dropdown = gr.Dropdown(label="Emotion", choices=EMOTIONS, value="auto")
sample_rate_dropdown = gr.Dropdown(label="Sample Rate (Hz)", choices=SAMPLE_RATES, value=32000, type="value")
bitrate_dropdown = gr.Dropdown(label="Bitrate (bps)", choices=BITRATES, value=128000, type="value")
channel_dropdown = gr.Dropdown(label="Channels", choices=CHANNELS, value="mono")
language_boost_dropdown = gr.Dropdown(label="Language Boost", choices=LANGUAGE_BOOST_OPTIONS, value="None")
with gr.Column(scale=1):
generate_button = gr.Button("Generate Speech", variant="primary")
# REMOVED status_message Textbox
audio_output = gr.Audio(label="Generated Speech", type="filepath")
generate_button.click(
fn=generate_speech,
inputs=[
text_input, pitch_slider, speed_slider, volume_slider,
bitrate_dropdown, channel_dropdown, emotion_dropdown,
voice_id_dropdown, custom_voice_id_input, sample_rate_dropdown,
language_boost_dropdown, english_norm_checkbox
],
outputs=[audio_output] # REMOVED status_message from outputs
)
if __name__ == "__main__":
if not REPLICATE_API_KEYS:
print("FATAL: REPLICATE_API_TOKENS are not set.")
if not VOICE_ID_MAP:
print("WARNING: Voice ID map is empty (voices.json issue?).")
app.launch(debug=True) |