Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import os | |
| import time | |
| import json | |
| from dotenv import load_dotenv | |
| # --- Configuration & Constants --- | |
| load_dotenv() | |
| REPLICATE_API_TOKENS_STR = os.getenv("REPLICATE_API_TOKENS") | |
| if not REPLICATE_API_TOKENS_STR: | |
| print("WARNING: REPLICATE_API_TOKENS not found. App will not function.") | |
| REPLICATE_API_KEYS = [] | |
| else: | |
| REPLICATE_API_KEYS = [token.strip() for token in REPLICATE_API_TOKENS_STR.split(',')] | |
| MODEL_ENDPOINT = "https://api.replicate.com/v1/models/minimax/speech-02-hd/predictions" | |
| VOICE_ID_MAP = {} | |
| try: | |
| with open("voices.json", "r", encoding="utf-8") as f: | |
| VOICE_ID_MAP = json.load(f) | |
| if not VOICE_ID_MAP: | |
| print("WARNING: voices.json is empty or could not be loaded.") | |
| VOICE_ID_PRETTY_NAMES = list(VOICE_ID_MAP.keys()) | |
| DEFAULT_VOICE_PRETTY_NAME = "Friendly Person" if "Friendly Person" in VOICE_ID_PRETTY_NAMES else (VOICE_ID_PRETTY_NAMES[0] if VOICE_ID_PRETTY_NAMES else None) | |
| except FileNotFoundError: | |
| print("ERROR: voices.json not found. Please create it.") | |
| VOICE_ID_PRETTY_NAMES = [] | |
| DEFAULT_VOICE_PRETTY_NAME = None | |
| except json.JSONDecodeError: | |
| print("ERROR: voices.json is not valid JSON.") | |
| VOICE_ID_PRETTY_NAMES = [] | |
| DEFAULT_VOICE_PRETTY_NAME = None | |
| EMOTIONS = ["auto", "neutral", "happy", "sad", "angry", "fearful", "disgusted", "surprised"] | |
| SAMPLE_RATES = [8000, 16000, 22050, 24000, 32000, 44100] | |
| BITRATES = [32000, 64000, 128000, 256000] | |
| CHANNELS = ["mono", "stereo"] | |
| LANGUAGE_BOOST_OPTIONS = ["None", "English", "Chinese", "Japanese", "Korean"] | |
| current_key_index = 0 | |
| MAX_POLLING_ATTEMPTS = 60 | |
| POLL_INTERVAL = 3 | |
| def get_next_api_key(): | |
| global current_key_index | |
| if not REPLICATE_API_KEYS: | |
| return None | |
| key = REPLICATE_API_KEYS[current_key_index] | |
| current_key_index = (current_key_index + 1) % len(REPLICATE_API_KEYS) | |
| return key | |
| def generate_speech( | |
| text, pitch, speed, volume, bitrate, channel, emotion, | |
| voice_id_pretty_name, custom_voice_id, sample_rate, | |
| language_boost, english_normalization | |
| ): | |
| if not text.strip(): | |
| gr.Warning("Text input cannot be empty.") | |
| return None # Must return a value for the audio output | |
| if not REPLICATE_API_KEYS: | |
| gr.Error("No Replicate API Tokens configured. Please set REPLICATE_API_TOKENS in secrets.") | |
| return None | |
| if not VOICE_ID_MAP and not custom_voice_id.strip(): | |
| gr.Error("Voice ID configuration is missing (voices.json empty/invalid) and no custom voice ID provided.") | |
| return None | |
| actual_voice_id_to_use = "" | |
| if custom_voice_id.strip(): | |
| actual_voice_id_to_use = custom_voice_id.strip() | |
| elif voice_id_pretty_name and voice_id_pretty_name in VOICE_ID_MAP: | |
| actual_voice_id_to_use = VOICE_ID_MAP[voice_id_pretty_name] | |
| else: | |
| gr.Error(f"Selected voice '{voice_id_pretty_name}' not found in mappings and no custom ID provided.") | |
| return None | |
| payload = { | |
| "input": { | |
| "text": text, "pitch": int(pitch), "speed": float(speed), "volume": int(volume), | |
| "bitrate": int(bitrate), "channel": channel, "emotion": emotion, | |
| "voice_id": actual_voice_id_to_use, "sample_rate": int(sample_rate), | |
| "english_normalization": bool(english_normalization) | |
| } | |
| } | |
| if language_boost and language_boost.lower() != "none": | |
| payload["input"]["language_boost"] = language_boost | |
| num_keys_to_try = len(REPLICATE_API_KEYS) | |
| last_error_message_for_key = "" | |
| for i in range(num_keys_to_try): | |
| api_key = get_next_api_key() | |
| if not api_key: # Should not happen if REPLICATE_API_KEYS is populated | |
| gr.Error("Internal error: No API keys available in the cycling pool.") | |
| return None | |
| headers_post = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} | |
| headers_get = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} | |
| print(f"Attempting API call with key ending: ...{api_key[-4:]}. Voice ID: {actual_voice_id_to_use}") | |
| try: | |
| response = requests.post(MODEL_ENDPOINT, json=payload, headers=headers_post, timeout=30) | |
| response.raise_for_status() | |
| result = response.json() | |
| current_status = result.get("status") | |
| print(f"Initial API Response (Key ...{api_key[-4:]}): Status '{current_status}'") | |
| prediction_url = result.get("urls", {}).get("get") | |
| logs_from_initial_call = result.get("logs") | |
| polling_attempts = 0 | |
| while current_status in ["starting", "processing"] and prediction_url: | |
| if polling_attempts >= MAX_POLLING_ATTEMPTS: | |
| last_error_message_for_key = f"Polling timed out for key ...{api_key[-4:]}." | |
| print(last_error_message_for_key) | |
| result["error"] = "Polling timed out." # For local log | |
| current_status = "failed_polling_timeout" | |
| break | |
| polling_attempts += 1 | |
| time.sleep(POLL_INTERVAL) | |
| poll_response = requests.get(prediction_url, headers=headers_get, timeout=30) | |
| poll_response.raise_for_status() | |
| result = poll_response.json() | |
| current_status = result.get("status") | |
| if current_status == "succeeded": | |
| audio_url = result.get("output") | |
| if audio_url: | |
| success_logs = result.get('logs', logs_from_initial_call if logs_from_initial_call else 'N/A') | |
| print(f"Success with key ...{api_key[-4:]}. Logs: {success_logs}") | |
| gr.Info("Success! Audio generated.") | |
| return audio_url | |
| else: | |
| last_error_message_for_key = f"API succeeded (Key ...{api_key[-4:]}) but no output URL. Resp: {result}" | |
| print(last_error_message_for_key) | |
| continue # Try next key | |
| else: # Covers "failed", "failed_polling_timeout", or other unexpected states | |
| error_detail = result.get("error", f"Unknown error or unexpected status '{current_status}'") | |
| last_error_message_for_key = f"Prediction failed/timed out for key ...{api_key[-4:]}. Status: {current_status}. Error: {error_detail}" | |
| print(last_error_message_for_key) | |
| continue # Try next key | |
| except requests.exceptions.HTTPError as e: | |
| error_text = "Unknown HTTP Error" | |
| try: error_text = e.response.text | |
| except AttributeError: pass | |
| last_error_message_for_key = f"HTTP error for key ...{api_key[-4:]}: {e.response.status_code} - {error_text}" | |
| print(last_error_message_for_key) | |
| continue # Try next key | |
| except requests.exceptions.RequestException as e: | |
| last_error_message_for_key = f"Request exception for key ...{api_key[-4:]}: {e}" | |
| print(last_error_message_for_key) | |
| continue # Try next key | |
| # If all keys failed | |
| final_error_message = "All API keys failed or an unrecoverable error occurred." | |
| if last_error_message_for_key: # Provide a bit more context from the last attempt if available | |
| final_error_message += f" Last attempt error: {last_error_message_for_key}" | |
| gr.Error(final_error_message) | |
| return None | |
| # --- Gradio UI --- | |
| with gr.Blocks(theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# Glue Up Academy Narrator") | |
| gr.Markdown("Enter text and adjust parameters to generate speech.") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_input = gr.Textbox( | |
| label="Text to Synthesize", | |
| lines=5, | |
| placeholder="Enter your text here...\n💡Insert '<#0.5#>' to add a 0.5s pause. Adjust duration." | |
| ) | |
| with gr.Accordion("Voice Selection", open=True): | |
| voice_id_dropdown = gr.Dropdown( | |
| label="Choose a Voice ID", | |
| choices=VOICE_ID_PRETTY_NAMES, | |
| value=DEFAULT_VOICE_PRETTY_NAME | |
| ) | |
| custom_voice_id_input = gr.Textbox( | |
| label="Custom Voice ID (Optional)", | |
| placeholder="e.g., my_cloned_voice_v2", | |
| info="If filled, this will override dropdown." | |
| ) | |
| #gr.Markdown("[Minimax Voices](https://www.minimax.io/audio/voices) for more options.") | |
| gr.Markdown("For voice cloning, reach out to Raffy") | |
| with gr.Accordion("Advanced Speech Parameters", open=False): | |
| speed_slider = gr.Slider(label="Speed", minimum=0.5, maximum=2, step=0.1, value=1.0) | |
| volume_slider = gr.Slider(label="Volume", minimum=0, maximum=10, step=1, value=1) | |
| pitch_slider = gr.Slider(label="Pitch", minimum=-12, maximum=12, step=1, value=0) | |
| english_norm_checkbox = gr.Checkbox(label="English Normalization", value=False, info="Improves number reading.") | |
| with gr.Accordion("Audio Format & Emotion", open=False): | |
| emotion_dropdown = gr.Dropdown(label="Emotion", choices=EMOTIONS, value="auto") | |
| sample_rate_dropdown = gr.Dropdown(label="Sample Rate (Hz)", choices=SAMPLE_RATES, value=32000, type="value") | |
| bitrate_dropdown = gr.Dropdown(label="Bitrate (bps)", choices=BITRATES, value=128000, type="value") | |
| channel_dropdown = gr.Dropdown(label="Channels", choices=CHANNELS, value="mono") | |
| language_boost_dropdown = gr.Dropdown(label="Language Boost", choices=LANGUAGE_BOOST_OPTIONS, value="None") | |
| with gr.Column(scale=1): | |
| generate_button = gr.Button("Generate Speech", variant="primary") | |
| # REMOVED status_message Textbox | |
| audio_output = gr.Audio(label="Generated Speech", type="filepath") | |
| generate_button.click( | |
| fn=generate_speech, | |
| inputs=[ | |
| text_input, pitch_slider, speed_slider, volume_slider, | |
| bitrate_dropdown, channel_dropdown, emotion_dropdown, | |
| voice_id_dropdown, custom_voice_id_input, sample_rate_dropdown, | |
| language_boost_dropdown, english_norm_checkbox | |
| ], | |
| outputs=[audio_output] # REMOVED status_message from outputs | |
| ) | |
| if __name__ == "__main__": | |
| if not REPLICATE_API_KEYS: | |
| print("FATAL: REPLICATE_API_TOKENS are not set.") | |
| if not VOICE_ID_MAP: | |
| print("WARNING: Voice ID map is empty (voices.json issue?).") | |
| app.launch(debug=True) |