| import gradio as gr |
| import requests |
| import random |
| import tempfile |
| import os |
| import base64 |
| import time |
| import threading |
| from datetime import datetime, timedelta, timezone |
|
|
| |
| API_URL = "https://gen.pollinations.ai/v1/chat/completions" |
| |
| SYSTEM_API_KEY = os.getenv("POLLINATIONS_TOKEN") |
|
|
| |
| VOICES = [ |
| "alloy", "echo", "fable", "onyx", "nova", "shimmer" |
| ] |
|
|
| |
| MAX_CHAR_LIMIT = 200 |
| COOLDOWN_SECONDS = 50 |
| LAST_REQUEST_TIME = 0 |
|
|
| |
| free_tier_lock = threading.Lock() |
|
|
| def generate_audio_api(prompt: str, voice: str, emotion: str, seed: int, api_key: str) -> bytes: |
| """ |
| Core function that hits the Pollinations/OpenAI API. |
| """ |
| print(f"DEBUG: Generating audio | Voice: {voice} | Emotion: {emotion}") |
|
|
| headers = { |
| "Content-Type": "application/json", |
| } |
| |
| if api_key: |
| headers["Authorization"] = f"Bearer {api_key}" |
|
|
| |
| system_instruction = ( |
| f"Only repeat what I say. " |
| f"Now say with proper emphasis in a \"{emotion}\" emotion this statement." |
| ) |
|
|
| payload = { |
| "model": "openai-audio", |
| "modalities": ["text", "audio"], |
| "audio": {"voice": voice, "format": "mp3"}, |
| "messages": [ |
| {"role": "system", "content": system_instruction}, |
| {"role": "user", "content": prompt} |
| ], |
| "seed": seed |
| } |
|
|
| try: |
| response = requests.post(API_URL, headers=headers, json=payload, timeout=60) |
| |
| if response.status_code != 200: |
| |
| if response.status_code == 402 or response.status_code == 429: |
| raise gr.Error(f"⚠️ Server Limit: The Free Tier is busy. Please wait or use a Premium Key.") |
| elif response.status_code == 401: |
| raise gr.Error("401 Unauthorized: The provided API Key is invalid.") |
| else: |
| raise gr.Error(f"API Error {response.status_code}: {response.text}") |
|
|
| data = response.json() |
| try: |
| audio_b64 = data['choices'][0]['message']['audio']['data'] |
| return base64.b64decode(audio_b64) |
| except KeyError: |
| raise gr.Error("API returned empty audio data.") |
|
|
| except requests.exceptions.RequestException as e: |
| raise gr.Error(f"Network error: {str(e)}") |
|
|
|
|
| def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int, api_key_input: str): |
| global LAST_REQUEST_TIME |
| |
| |
| user_provided_key = api_key_input.strip() |
| |
| active_key = user_provided_key if user_provided_key else SYSTEM_API_KEY |
| is_paid_user = len(user_provided_key) > 5 |
|
|
| |
| if not prompt.strip(): |
| raise gr.Error("Prompt cannot be empty.") |
| |
| |
| if not is_paid_user and len(prompt) > MAX_CHAR_LIMIT: |
| raise gr.Error(f"Free Tier Limit: {MAX_CHAR_LIMIT} chars. Enter an API Key for unlimited length.") |
|
|
| |
| try: |
| seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed) |
| |
| if is_paid_user: |
| |
| |
| print("LOG: Premium Request Processing...") |
| audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key) |
| status_msg = "Generated! (Premium Speed ⚡)" |
| |
| else: |
| |
| |
| print("LOG: Free Request - Attempting to acquire lock...") |
| with free_tier_lock: |
| |
| elapsed_time = time.time() - LAST_REQUEST_TIME |
| |
| if elapsed_time < COOLDOWN_SECONDS: |
| wait_time = COOLDOWN_SECONDS - elapsed_time |
| print(f"Queueing User: Sleeping for {wait_time:.1f}s...") |
| time.sleep(wait_time) |
| |
| |
| audio_bytes = generate_audio_api(prompt, voice, emotion, seed, active_key) |
| |
| |
| LAST_REQUEST_TIME = time.time() |
| status_msg = "Generated! (Free Tier 🐢)" |
|
|
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file: |
| temp_audio_file.write(audio_bytes) |
| return temp_audio_file.name, status_msg |
|
|
| except Exception as e: |
| return None, str(e) |
|
|
|
|
| def toggle_seed_input(use_random_seed): |
| return gr.update(visible=not use_random_seed, value=12345) |
|
|
|
|
| |
| with gr.Blocks() as app: |
| gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited") |
| gr.Markdown( |
| """ |
| Enter text, choose a voice and emotion, and generate audio. |
| |
| **🚀 Skip the Queue & Run Locally:** |
| To avoid the **50s cooldown** and generate instantly: |
| 1. **Duplicate this Space** to run it privately. |
| 2. Get your own API key from [Pollinations.ai](https://enter.pollinations.ai/). |
| 3. Paste it into the `POLLINATIONS_TOKEN` secret in your duplicated space. |
| |
| **Commercial Use:** |
| For a **Lifetime Commercial Use License**, please refer to our terms here: |
| [LICENSE TERMS](https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited/raw/main/COMMERCIAL_LICENSE_TERMS.txt) |
| |
| **Like & follow** for more AI projects: |
| • Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/) |
| • X.com: [@NihalGazi_](https://x.com/NihalGazi_?t=f9UtAv005GppiIIXFEWMSQ&s=09) |
| • Discord: nihal_gazi_io |
| """ |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=2): |
| api_key_input = gr.Textbox( |
| label="Pollinations API Key (Optional)", |
| placeholder="sk_...", |
| type="password", |
| info="Enter your key to REMOVE the 50s cooldown and generate instantly." |
| ) |
| prompt_input = gr.Textbox( |
| label=f"Prompt (Max {MAX_CHAR_LIMIT} chars)", |
| placeholder="Enter the text you want to convert to speech...", |
| max_lines=3 |
| ) |
| emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...") |
| voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy") |
| with gr.Column(scale=1): |
| random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True) |
| seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0) |
|
|
| submit_button = gr.Button("Generate Audio", variant="primary") |
|
|
| with gr.Row(): |
| audio_output = gr.Audio(label="Generated Audio", type="filepath") |
| status_output = gr.Textbox(label="Status") |
|
|
| random_seed_checkbox.change(fn=toggle_seed_input, inputs=[random_seed_checkbox], outputs=[seed_input]) |
|
|
| submit_button.click( |
| fn=text_to_speech_app, |
| inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input], |
| outputs=[audio_output, status_output], |
| |
| concurrency_limit=20 |
| ) |
|
|
| gr.Examples( |
| examples=[ |
| ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345, ""], |
| ["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345, ""], |
| ["[sobbing] I am feeling... [sighs] a bit down today [cry]", "onyx", "sad and depressed, with stammering", True, 662437, ""], |
| ["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345, ""], |
| ], |
| inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, api_key_input], |
| outputs=[audio_output, status_output], |
| fn=text_to_speech_app, |
| cache_examples=False, |
| ) |
|
|
| if __name__ == "__main__": |
| app.launch() |