Spaces:

TGPro1
/

Unlimited-TTS

Sleeping

Unlimited-TTS / app.py

TGPro1

Better error diagnostics

6ba9170 about 1 month ago

9.03 kB

	import gradio as gr
	import requests
	import random
	import tempfile
	import os
	import base64
	import time
	import threading
	from datetime import datetime, timedelta, timezone

	# --- Configuration ---
	API_URL = "https://gen.pollinations.ai/v1/chat/completions"
	# Default system key (Used for Free Tier)
	SYSTEM_API_KEY = os.getenv("POLLINATIONS_TOKEN")

	# VOICES
	VOICES = [
	"alloy", "echo", "fable", "onyx", "nova", "shimmer",
	"coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
	]

	# --- Rate Limiting Globals ---
	MAX_CHAR_LIMIT = 50000 # Limit input characters for Free Tier
	COOLDOWN_SECONDS = 0 # 50s wait to ensure stability (Modified by Agent: No Wait)
	LAST_REQUEST_TIME = 0 # Tracks the last successful FREE generation time

	# THE TRAFFIC CONTROLLER
	free_tier_lock = threading.Lock()

	def generate_audio_api(prompt: str, voice: str, emotion: str, seed: int, api_key: str) -> bytes:
	"""
	Core function that hits the Pollinations/OpenAI API.
	"""
	print(f"DEBUG: Generating audio \| Voice: {voice} \| Emotion: {emotion}")

	headers = {
	"Content-Type": "application/json",
	}
	# Inject Key if provided
	if api_key:
	headers["Authorization"] = f"Bearer {api_key}"

	# Your "Secret Sauce" Prompt Engineering
	system_instruction = (
	f"Only repeat what I say. "
	f"Now say with proper emphasis in a \"{emotion}\" emotion this statement."
	)

	payload = {
	"model": "openai-audio",
	"modalities": ["text", "audio"],
	"audio": {"voice": voice, "format": "mp3"},
	"messages": [
	{"role": "system", "content": system_instruction},
	{"role": "user", "content": prompt}
	],
	"seed": seed
	}

	try:
	response = requests.post(API_URL, headers=headers, json=payload, timeout=60)

	if response.status_code != 200:
	# Error Handling
	if response.status_code == 402 or response.status_code == 429:
	raise gr.Error(f"⚠️ Server Limit: The Free Tier is busy. Please wait or use a Premium Key.")
	elif response.status_code == 401:
	raise gr.Error("401 Unauthorized: The provided API Key is invalid.")
	else:
	raise gr.Error(f"API Error {response.status_code}: {response.text}")

	data = response.json()
	try:
	audio_b64 = data['choices'][0]['message']['audio']['data']
	return base64.b64decode(audio_b64)
	except KeyError:
	raise gr.Error("API returned empty audio data.")

	except requests.exceptions.RequestException as e:
	raise gr.Error(f"Network error: {str(e)}")


	def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int, api_key_input: str):
	global LAST_REQUEST_TIME

	# 1. User Status Check
	user_provided_key = api_key_input.strip()
	# Use Client Key if present, otherwise fall back to System Key
	active_key = user_provided_key if user_provided_key else SYSTEM_API_KEY
	is_paid_user = len(user_provided_key) > 5

	# Check if we have ANY key
	if not active_key:
	return None, "❌ Erreur: Secret 'POLLINATIONS_TOKEN' manquant dans les paramètres du Space (Hugging Face Settings -> Secrets)."

	# 2. Validation
	if not prompt.strip():
	raise gr.Error("Prompt cannot be empty.")

	# RULE: Character Limit applies ONLY to Free Tier
	if not is_paid_user and len(prompt) > MAX_CHAR_LIMIT:
	raise gr.Error(f"Free Tier Limit: {MAX_CHAR_LIMIT} chars. Enter an API Key for unlimited length.")

	# 3. Execution Logic (The Traffic Split)
	try:
	seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)

	if is_paid_user:
	# === FAST LANE (Paid) ===
	# Bypasses the lock entirely. No waiting.
	print("LOG: Premium Request Processing...")
	audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)
	status_msg = "Generated! (Premium Speed ⚡)"

	else:
	# === SLOW LANE (Free) ===
	# Must wait for the lock (Bathroom Key)
	print("LOG: Free Request - Attempting to acquire lock...")
	with free_tier_lock:
	# Once we have the key, we check the timer
	elapsed_time = time.time() - LAST_REQUEST_TIME

	if elapsed_time < COOLDOWN_SECONDS:
	wait_time = COOLDOWN_SECONDS - elapsed_time
	print(f"Queueing User: Sleeping for {wait_time:.1f}s...")
	time.sleep(wait_time)

	# Generate inside the lock so nobody else steals the slot
	audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key)

	# Reset the global timer
	LAST_REQUEST_TIME = time.time()
	status_msg = "Generated! (Free Tier 🐢)"

	# 4. Save and Return
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
	temp_audio_file.write(audio_bytes)
	return temp_audio_file.name, status_msg

	except Exception as e:
	return None, str(e)


	def toggle_seed_input(use_random_seed):
	return gr.update(visible=not use_random_seed, value=12345)


	# --- RESTORED ORIGINAL UI ---
	with gr.Blocks() as app:
	gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
	gr.Markdown(
	"""
	Enter text, choose a voice and emotion, and generate audio.

	🚀 Skip the Queue & Run Locally:
	To avoid the 50s cooldown and generate instantly:
	1. Duplicate this Space to run it privately.
	2. Get your own API key from [Pollinations.ai](https://enter.pollinations.ai/).
	3. Paste it into the `POLLINATIONS_TOKEN` secret in your duplicated space.

	Commercial Use:
	For a Lifetime Commercial Use License, please refer to our terms here:
	[LICENSE TERMS](https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited/raw/main/COMMERCIAL_LICENSE_TERMS.txt)

	Like & follow for more AI projects:
	• Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/)
	• X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09)
	• Discord: nihal_gazi_io
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	api_key_input = gr.Textbox(
	label="Pollinations API Key (Optional)",
	placeholder="sk_...",
	type="password",
	info="Enter your key to REMOVE the 50s cooldown and generate instantly."
	)
	prompt_input = gr.Textbox(
	label=f"Prompt (Max {MAX_CHAR_LIMIT} chars)",
	placeholder="Enter the text you want to convert to speech...",
	max_lines=3
	)
	emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
	voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
	with gr.Column(scale=1):
	random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
	seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)

	submit_button = gr.Button("Generate Audio", variant="primary")

	with gr.Row():
	audio_output = gr.Audio(label="Generated Audio", type="filepath")
	status_output = gr.Textbox(label="Status")

	random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input])

	submit_button.click(
	fn=text_to_speech_app,
	inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
	outputs=[audio_output, status_output],
	# Concurrency must be HIGH to allow Premium users to bypass sleeping Free users
	concurrency_limit=20,
	api_name="generate"
	)

	# gr.Examples(
	# examples=[
	# ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345, ""],
	# ["Surely you wouldn't want that. [laughs]", "shimmer", "sarcastic and mocking", True, 12345, ""],
	# ["[sobbing] I am feeling... [sighs] a bit down today [cry]", "onyx", "sad and depressed, with stammering", True, 662437, ""],
	# ["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345, ""],
	# ],
	# inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input],
	# outputs=[audio_output, status_output],
	# fn=text_to_speech_app,
	# cache_examples=False,
	# )

	if __name__ == "__main__":
	app.queue().launch(server_name="0.0.0.0", show_error=True)