| import os |
| import gradio as gr |
| from groq import Groq |
| import tempfile |
| import requests |
| import json |
| import base64 |
| from pathlib import Path |
| import time |
| import sys |
| import platform |
|
|
| |
| groq_voices = [ |
| "Arista-PlayAI", "Atlas-PlayAI", "Basil-PlayAI", "Briggs-PlayAI", |
| "Calum-PlayAI", "Celeste-PlayAI", "Cheyenne-PlayAI", "Chip-PlayAI", |
| "Cillian-PlayAI", "Deedee-PlayAI", "Fritz-PlayAI", "Gail-PlayAI", |
| "Indigo-PlayAI", "Mamaw-PlayAI", "Mason-PlayAI", "Mikail-PlayAI", |
| "Mitch-PlayAI", "Quinn-PlayAI", "Thunder-PlayAI" |
| ] |
|
|
| |
| openai_voices = ["alloy", "ash", "coral", "sage", "echo", "fable", "onyx", "nova", "shimmer"] |
|
|
| |
| edge_voices = [ |
| "en-US-AriaNeural", |
| "en-US-AnaNeural", |
| "en-US-ChristopherNeural", |
| "en-US-EricNeural", |
| "en-US-GuyNeural", |
| "en-US-JennyNeural", |
| "en-US-MichelleNeural", |
| "en-US-RogerNeural", |
| "en-US-SteffanNeural" |
| ] |
|
|
| def check_internet_connection(): |
| """Check if internet connection is available""" |
| try: |
| import socket |
| socket.create_connection(("8.8.8.8", 53), timeout=3) |
| return True |
| except OSError: |
| return False |
|
|
| def is_huggingface_space(): |
| """Check if running on Hugging Face Spaces""" |
| return os.environ.get("SPACE_ID") is not None |
|
|
| def chunk_text(text, max_length=4000): |
| """Split text into chunks to avoid rate limits""" |
| if len(text) <= max_length: |
| return [text] |
| |
| chunks = [] |
| words = text.split() |
| current_chunk = [] |
| current_length = 0 |
| |
| for word in words: |
| if current_length + len(word) + 1 <= max_length: |
| current_chunk.append(word) |
| current_length += len(word) + 1 |
| else: |
| if current_chunk: |
| chunks.append(" ".join(current_chunk)) |
| current_chunk = [word] |
| current_length = len(word) |
| |
| if current_chunk: |
| chunks.append(" ".join(current_chunk)) |
| |
| return chunks |
|
|
| def try_groq_tts(api_key, text, voice): |
| """Try Groq TTS with chunking and retry logic""" |
| try: |
| if not check_internet_connection(): |
| return None, "β No internet connection available for Groq TTS" |
| |
| client = Groq(api_key=api_key) |
| |
| |
| chunks = chunk_text(text, 3500) |
| |
| if len(chunks) == 1: |
| |
| response = client.audio.speech.create( |
| model="playai-tts", |
| voice=voice, |
| input=text, |
| response_format="wav" |
| ) |
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
| response.write_to_file(temp_file.name) |
| return temp_file.name, "β
Speech generated successfully with Groq PlayAI!" |
| else: |
| |
| return None, f"β οΈ Text too long for single request ({len(text)} chars). Try shorter text or use Edge TTS for longer content." |
| |
| except Exception as e: |
| error_msg = str(e) |
| if "rate_limit_exceeded" in error_msg or "429" in error_msg: |
| return None, "π Groq rate limit reached. Try again in a moment or use Edge TTS..." |
| else: |
| return None, f"β Groq error: {error_msg}" |
|
|
| def try_openai_tts(api_key, text, voice): |
| """Try OpenAI TTS as fallback""" |
| try: |
| if not check_internet_connection(): |
| return None, "β No internet connection available for OpenAI TTS" |
| |
| import openai |
| client = openai.OpenAI(api_key=api_key) |
| |
| response = client.audio.speech.create( |
| model="tts-1", |
| voice=voice, |
| input=text[:4000] |
| ) |
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
| response.stream_to_file(temp_file.name) |
| return temp_file.name, "β
Speech generated successfully with OpenAI TTS!" |
| |
| except ImportError: |
| return None, "β OpenAI library not installed. Install with: pip install openai" |
| except Exception as e: |
| return None, f"β OpenAI error: {str(e)}" |
|
|
| def try_edge_tts(text, voice="en-US-JennyNeural"): |
| """Try Microsoft Edge TTS as free fallback""" |
| try: |
| if not check_internet_connection(): |
| return None, "β No internet connection available for Edge TTS" |
| |
| import edge_tts |
| import asyncio |
| |
| async def generate_edge_speech(): |
| communicate = edge_tts.Communicate(text[:10000], voice) |
| with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file: |
| async for chunk in communicate.stream(): |
| if chunk["type"] == "audio": |
| temp_file.write(chunk["data"]) |
| return temp_file.name |
| |
| |
| try: |
| loop = asyncio.get_event_loop() |
| except RuntimeError: |
| loop = asyncio.new_event_loop() |
| asyncio.set_event_loop(loop) |
| |
| audio_file = loop.run_until_complete(generate_edge_speech()) |
| |
| return audio_file, "β
Speech generated successfully with Microsoft Edge TTS (Free & High Quality)!" |
| |
| except ImportError: |
| return None, "β Edge TTS not installed. Install with: pip install edge-tts" |
| except Exception as e: |
| return None, f"β Edge TTS error: {str(e)}" |
|
|
| def try_pyttsx3_fallback(text): |
| """Ultimate fallback using pyttsx3 (offline) - Only works in local environments""" |
| if is_huggingface_space(): |
| return None, "β Offline TTS not available in Hugging Face Spaces. Use Edge TTS (Free) instead!" |
| |
| try: |
| import pyttsx3 |
| |
| |
| engine = pyttsx3.init() |
| |
| |
| voices = engine.getProperty('voices') |
| if voices: |
| |
| english_voice = None |
| for voice in voices: |
| voice_id = voice.id.lower() |
| voice_name = voice.name.lower() if hasattr(voice, 'name') else "" |
| |
| |
| if any(keyword in voice_id or keyword in voice_name |
| for keyword in ['english', 'en-us', 'en_us', 'zira', 'hazel', 'eva']): |
| english_voice = voice.id |
| if any(female_keyword in voice_name |
| for female_keyword in ['zira', 'hazel', 'eva', 'female']): |
| break |
| |
| if english_voice: |
| engine.setProperty('voice', english_voice) |
| |
| |
| engine.setProperty('rate', 180) |
| engine.setProperty('volume', 0.9) |
| |
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
| temp_path = temp_file.name |
| |
| |
| limited_text = text[:2000] |
| |
| |
| engine.save_to_file(limited_text, temp_path) |
| engine.runAndWait() |
| |
| |
| if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: |
| return temp_path, "β
Speech generated with offline TTS! (No internet required)" |
| else: |
| |
| return try_alternative_offline_tts(limited_text) |
| |
| except ImportError: |
| return None, "β pyttsx3 not installed. Use Edge TTS (Free) instead!" |
| except Exception as e: |
| |
| return try_alternative_offline_tts(text[:2000]) |
|
|
| def try_alternative_offline_tts(text): |
| """Alternative offline TTS using system commands - Only for local environments""" |
| if is_huggingface_space(): |
| return None, "β System TTS not available in Hugging Face Spaces. Use Edge TTS (Free) instead!" |
| |
| try: |
| system = platform.system().lower() |
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
| temp_path = temp_file.name |
| |
| if system == "windows": |
| |
| try: |
| import win32com.client |
| speaker = win32com.client.Dispatch("SAPI.SpVoice") |
| |
| |
| file_stream = win32com.client.Dispatch("SAPI.SpFileStream") |
| file_stream.Open(temp_path, 3) |
| speaker.AudioOutputStream = file_stream |
| speaker.Speak(text) |
| file_stream.Close() |
| |
| if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: |
| return temp_path, "β
Speech generated with Windows SAPI TTS (Offline)!" |
| except ImportError: |
| pass |
| |
| elif system == "darwin": |
| |
| import subprocess |
| try: |
| subprocess.run(['say', '-o', temp_path, '--data-format=LEF32@22050', text], |
| check=True, timeout=30) |
| if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: |
| return temp_path, "β
Speech generated with macOS TTS (Offline)!" |
| except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): |
| pass |
| |
| elif system == "linux": |
| |
| import subprocess |
| try: |
| |
| subprocess.run(['espeak', '-w', temp_path, text], |
| check=True, timeout=30) |
| if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: |
| return temp_path, "β
Speech generated with espeak TTS (Offline)!" |
| except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): |
| try: |
| |
| subprocess.run(['text2wave', '-o', temp_path], |
| input=text, text=True, check=True, timeout=30) |
| if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: |
| return temp_path, "β
Speech generated with Festival TTS (Offline)!" |
| except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): |
| pass |
| |
| return None, f"β No offline TTS available for {system}. Use Edge TTS (Free) instead!" |
| |
| except Exception as e: |
| return None, f"β Alternative offline TTS error: {str(e)}" |
|
|
| |
| def generate_speech(groq_api_key, openai_api_key, text, voice_provider, voice): |
| if not text: |
| return None, "β οΈ Please enter some text to generate speech." |
|
|
| if len(text) > 10000: |
| return None, "π« Text input exceeds 10,000 character limit." |
|
|
| |
| has_internet = check_internet_connection() |
| is_hf_space = is_huggingface_space() |
| |
| internet_status = "π Internet: Connected" if has_internet else "π΄ Internet: Offline" |
| env_status = " (Hugging Face Spaces)" if is_hf_space else " (Local Environment)" |
| |
| |
| status_msg = f"π Generating speech...\n{internet_status}{env_status}\n" |
| |
| |
| if not has_internet: |
| return None, status_msg + "β No internet connection available. All TTS providers require internet connection in this environment." |
| |
| |
| if voice_provider == "Groq (PlayAI)" and groq_api_key: |
| status_msg += "π― Using Groq PlayAI...\n" |
| audio_file, message = try_groq_tts(groq_api_key, text, voice) |
| if audio_file: |
| return audio_file, status_msg + message |
| else: |
| status_msg += f"β Groq failed: {message}\n" |
| |
| |
| if openai_api_key and voice_provider in ["OpenAI TTS", "Auto (Try All)"]: |
| status_msg += "π Trying OpenAI TTS...\n" |
| openai_voice = voice if voice in openai_voices else "alloy" |
| audio_file, message = try_openai_tts(openai_api_key, text, openai_voice) |
| if audio_file: |
| return audio_file, status_msg + message |
| else: |
| status_msg += f"β OpenAI failed: {message}\n" |
| |
| |
| if voice_provider in ["Edge TTS (Free)", "Auto (Try All)", "Groq (PlayAI)", "OpenAI TTS"]: |
| status_msg += "π Using Edge TTS (Free & High Quality)...\n" |
| edge_voice = voice if voice in edge_voices else "en-US-JennyNeural" |
| audio_file, message = try_edge_tts(text, edge_voice) |
| if audio_file: |
| return audio_file, status_msg + message |
| else: |
| status_msg += f"β Edge TTS failed: {message}\n" |
| |
| |
| if voice_provider == "Offline TTS": |
| if is_hf_space: |
| return None, status_msg + "β Offline TTS is not available in Hugging Face Spaces. Please use 'Edge TTS (Free)' instead!" |
| else: |
| status_msg += "π Using offline TTS (works without internet)...\n" |
| audio_file, message = try_pyttsx3_fallback(text) |
| if audio_file: |
| return audio_file, status_msg + message |
| else: |
| status_msg += f"β Offline TTS failed: {message}\n" |
| |
| |
| if is_hf_space: |
| return None, status_msg + "β All online TTS providers failed. Please check your API keys or try again later. Edge TTS (Free) is recommended for HF Spaces!" |
| else: |
| return None, status_msg + "β All TTS providers failed. Please check your setup or try shorter text." |
|
|
| def update_voice_options(provider): |
| """Update voice dropdown based on selected provider""" |
| if provider == "Groq (PlayAI)": |
| return gr.Dropdown(choices=groq_voices, value="Fritz-PlayAI", visible=True) |
| elif provider == "OpenAI TTS": |
| return gr.Dropdown(choices=openai_voices, value="alloy", visible=True) |
| elif provider == "Edge TTS (Free)": |
| return gr.Dropdown(choices=edge_voices, value="en-US-JennyNeural", visible=True) |
| elif provider == "Offline TTS": |
| return gr.Dropdown(choices=["Default System Voice"], value="Default System Voice", visible=True) |
| else: |
| return gr.Dropdown(choices=groq_voices, value="Fritz-PlayAI", visible=True, label="π Voice (Auto mode will try best match)") |
|
|
| |
| custom_css = """ |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); |
| * { |
| box-sizing: border-box; |
| } |
| :root { |
| --primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| --secondary-gradient: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); |
| --accent-gradient: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); |
| --dark-bg: linear-gradient(135deg, #0c0c0c 0%, #1a1a2e 50%, #16213e 100%); |
| --glass-bg: rgba(255, 255, 255, 0.08); |
| --glass-border: rgba(255, 255, 255, 0.15); |
| --text-primary: #ffffff; |
| --text-secondary: rgba(255, 255, 255, 0.7); |
| --shadow-primary: 0 8px 32px rgba(0, 0, 0, 0.4); |
| --shadow-hover: 0 12px 48px rgba(0, 0, 0, 0.6); |
| --border-radius: 16px; |
| } |
| |
| body, .gradio-container { |
| background: var(--dark-bg) !important; |
| font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; |
| color: var(--text-primary) !important; |
| overflow-x: hidden; |
| } |
| |
| .gradio-container { |
| max-width: 1400px !important; |
| margin: 40px auto !important; |
| padding: 0 20px !important; |
| min-height: 100vh; |
| } |
| |
| .gradio-container > div { |
| background: var(--glass-bg) !important; |
| backdrop-filter: blur(20px) !important; |
| border: 1px solid var(--glass-border) !important; |
| border-radius: var(--border-radius) !important; |
| padding: 40px !important; |
| box-shadow: var(--shadow-primary) !important; |
| position: relative; |
| overflow: hidden; |
| } |
| |
| h1 { |
| text-align: center !important; |
| font-size: clamp(2.5rem, 5vw, 4rem) !important; |
| font-weight: 700 !important; |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
| -webkit-background-clip: text !important; |
| -webkit-text-fill-color: transparent !important; |
| background-clip: text !important; |
| margin: 0 0 50px 0 !important; |
| animation: glow 3s ease-in-out infinite alternate; |
| } |
| |
| @keyframes glow { |
| 0% { filter: drop-shadow(0 0 20px rgba(102, 126, 234, 0.3)); } |
| 100% { filter: drop-shadow(0 0 30px rgba(118, 75, 162, 0.5)); } |
| } |
| |
| #generate-btn { |
| background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important; |
| border: 2px solid #4facfe !important; |
| color: #ffffff !important; |
| font-weight: bold !important; |
| box-shadow: 0 0 12px rgba(0, 242, 254, 0.5) !important; |
| transition: all 0.3s ease !important; |
| margin-top: 26px !important; |
| } |
| |
| #generate-btn:hover { |
| background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important; |
| transform: scale(1.03) !important; |
| box-shadow: 0 0 18px rgba(0, 242, 254, 0.7) !important; |
| } |
| |
| .gr-textbox, .gr-dropdown { |
| background: var(--glass-bg) !important; |
| border: 2px solid transparent !important; |
| border-radius: 12px !important; |
| position: relative; |
| transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important; |
| backdrop-filter: blur(10px) !important; |
| } |
| |
| .gr-textbox textarea, .gr-textbox input, .gr-dropdown select { |
| background: transparent !important; |
| color: var(--text-primary) !important; |
| border: none !important; |
| font-size: 16px !important; |
| font-weight: 400 !important; |
| padding: 16px !important; |
| } |
| |
| .gr-input-label, .gr-output-label { |
| color: var(--text-primary) !important; |
| font-weight: 600 !important; |
| font-size: 14px !important; |
| text-transform: uppercase; |
| letter-spacing: 0.5px; |
| margin-bottom: 12px !important; |
| background: var(--accent-gradient); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| background-clip: text; |
| } |
| |
| .gr-button { |
| background: var(--primary-gradient) !important; |
| border: none !important; |
| color: white !important; |
| font-weight: 600 !important; |
| font-size: 16px !important; |
| padding: 16px 32px !important; |
| border-radius: 12px !important; |
| transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important; |
| text-transform: uppercase; |
| letter-spacing: 0.5px; |
| box-shadow: 0 4px 20px rgba(102, 126, 234, 0.3) !important; |
| margin-top: 20px !important; |
| } |
| |
| .gr-button:hover { |
| transform: translateY(-2px) !important; |
| box-shadow: var(--shadow-hover) !important; |
| filter: brightness(1.1); |
| } |
| |
| .gr-audio { |
| background: var(--glass-bg) !important; |
| border: 1px solid var(--glass-border) !important; |
| border-radius: 12px !important; |
| padding: 20px !important; |
| backdrop-filter: blur(10px) !important; |
| transition: all 0.3s ease !important; |
| } |
| |
| .info-box { |
| background: rgba(0, 242, 254, 0.1) !important; |
| border: 1px solid rgba(0, 242, 254, 0.3) !important; |
| border-radius: 12px !important; |
| padding: 20px !important; |
| margin: 20px 0 !important; |
| backdrop-filter: blur(10px) !important; |
| } |
| |
| .info-box, |
| .info-box * { |
| color: white !important; |
| } |
| |
| .warning-box, |
| .warning-box * { |
| color: white !important; |
| } |
| |
| .no-api-box { |
| background: rgba(46, 204, 113, 0.1) !important; |
| border: 1px solid rgba(46, 204, 113, 0.3) !important; |
| border-radius: 12px !important; |
| padding: 20px !important; |
| margin: 20px 0 !important; |
| backdrop-filter: blur(10px) !important; |
| } |
| |
| .no-api-box, |
| .no-api-box * { |
| color: white !important; |
| } |
| |
| .warning-box { |
| background: rgba(255, 193, 7, 0.1) !important; |
| border: 1px solid rgba(255, 193, 7, 0.3) !important; |
| border-radius: 12px !important; |
| padding: 15px !important; |
| margin: 15px 0 !important; |
| backdrop-filter: blur(10px) !important; |
| } |
| |
| .hf-space-box { |
| background: rgba(255, 87, 34, 0.1) !important; |
| border: 1px solid rgba(255, 87, 34, 0.3) !important; |
| border-radius: 12px !important; |
| padding: 20px !important; |
| margin: 20px 0 !important; |
| backdrop-filter: blur(10px) !important; |
| } |
| |
| .hf-space-box, |
| .hf-space-box * { |
| color: white !important; |
| } |
| """ |
|
|
| |
| def get_custom_html(): |
| is_hf = is_huggingface_space() |
| |
| if is_hf: |
| return """ |
| <div style="text-align: center; margin-bottom: 20px;"> |
| <div style="font-size: 3rem;">ποΈ</div> |
| <h1>Advanced Multi-Provider TTS Studio</h1> |
| <p style="color: rgba(255, 255, 255, 0.7); font-size: 18px; margin: 0; font-weight: 300;"> |
| Transform your text into natural-sounding speech with multiple AI-powered voice providers |
| </p> |
| |
| <div class="hf-space-box"> |
| <h3>π Running on Hugging Face Spaces</h3> |
| <p><strong>Recommended: Use "Edge TTS (Free)" for best results!</strong></p> |
| <p>β
High-quality voices β’ β
No API key needed β’ β
Up to 10,000 characters β’ β
Multiple voice options</p> |
| <p>β οΈ Offline TTS is not available in cloud environments</p> |
| </div> |
| |
| <div class="no-api-box"> |
| <h3>π No API Key? No Problem!</h3> |
| <p><strong>Select "Edge TTS (Free)" for high-quality TTS without any API keys!</strong></p> |
| <p>β
Microsoft's premium voices β’ β
Completely free β’ β
Works great in HF Spaces</p> |
| </div> |
| |
| <div class="info-box"> |
| <p><strong>π Multiple Providers Available:</strong> Groq PlayAI, OpenAI TTS, Microsoft Edge TTS (Free)</p> |
| <p><strong>π‘οΈ Smart Fallback System:</strong> If one provider fails, automatically tries the next available</p> |
| <p><strong>π‘ Recommended:</strong> Use "Auto (Try All)" for maximum reliability or "Edge TTS (Free)" for consistent quality</p> |
| </div> |
| </div> |
| """ |
| else: |
| return """ |
| <div style="text-align: center; margin-bottom: 20px;"> |
| <div style="font-size: 3rem;">ποΈ</div> |
| <h1>Advanced Multi-Provider TTS Studio</h1> |
| <p style="color: rgba(255, 255, 255, 0.7); font-size: 18px; margin: 0; font-weight: 300;"> |
| Transform your text into natural-sounding speech with multiple AI-powered voice providers |
| </p> |
| |
| <div class="hf-space-box"> |
| <h3>π» Running Locally</h3> |
| <p><strong>All TTS options available including offline TTS!</strong></p> |
| <p>β
Edge TTS (Free) β’ β
Offline TTS β’ β
Premium APIs β’ β
Full feature set</p> |
| </div> |
| |
| <div class="no-api-box"> |
| <h3>π No API Key? Multiple Free Options!</h3> |
| <p><strong>Select "Edge TTS (Free)" for online high-quality TTS or "Offline TTS" for no internet!</strong></p> |
| <p>β
Edge TTS: High quality voices with internet β’ β
Offline TTS: Basic quality, works anywhere</p> |
| </div> |
| |
| <div class="info-box"> |
| <p><strong>π Multiple Providers Available:</strong> Groq PlayAI, OpenAI TTS, Microsoft Edge TTS (Free), Offline TTS</p> |
| <p><strong>π‘οΈ Smart Fallback System:</strong> If one provider fails, automatically tries the next available</p> |
| <p><strong>π‘ Recommended:</strong> Use "Auto (Try All)" for maximum reliability or specific providers based on your needs</p> |
| </div> |
| </div> |
| """ |
|
|
| |
| with gr.Blocks(css=custom_css, theme="ocean") as demo: |
| gr.HTML(get_custom_html()) |
|
|
| |
| with gr.Row(): |
| with gr.Column(): |
| groq_api_key = gr.Textbox( |
| label="π Groq API Key (Optional)", |
| placeholder="Optional: Paste your Groq API key here for PlayAI voices...", |
| type="password", |
| lines=1 |
| ) |
| with gr.Column(): |
| openai_api_key = gr.Textbox( |
| label="π OpenAI API Key (Optional)", |
| placeholder="Optional: OpenAI API key for premium TTS voices...", |
| type="password", |
| lines=1 |
| ) |
| |
| |
| if is_huggingface_space(): |
| gr.HTML(""" |
| <div class="warning-box"> |
| <h3>β‘ Quick Start for HF Spaces Users</h3> |
| <p><strong>Best Option:</strong> Select "Edge TTS (Free)" below - no API key needed!</p> |
| <p>π‘ <strong>Pro tip:</strong> Edge TTS provides Microsoft's premium voice quality completely free</p> |
| </div> |
| """) |
| else: |
| gr.HTML(""" |
| <div class="warning-box"> |
| <h3>β‘ Quick Start Options</h3> |
| <p><strong>Free Options:</strong> "Edge TTS (Free)" (internet required) or "Offline TTS" (no internet needed)</p> |
| <p><strong>Premium Options:</strong> Add your Groq or OpenAI API keys above for additional voice options</p> |
| </div> |
| """) |
|
|
| |
| with gr.Row(): |
| with gr.Column(scale=2): |
| |
| provider_choices = ["Auto (Try All)", "Edge TTS (Free)", "Groq (PlayAI)", "OpenAI TTS"] |
| if not is_huggingface_space(): |
| provider_choices.append("Offline TTS") |
| |
| voice_provider = gr.Dropdown( |
| choices=provider_choices, |
| value="Edge TTS (Free)" if is_huggingface_space() else "Auto (Try All)", |
| label="π― TTS Provider", |
| info="Edge TTS (Free) is recommended for HF Spaces!" if is_huggingface_space() else "Auto tries all providers in order" |
| ) |
| |
| |
| voice = gr.Dropdown( |
| choices=edge_voices, |
| value="en-US-JennyNeural", |
| label="π Voice Selection", |
| info="Available voices update based on selected provider", |
| visible=True |
| ) |
| |
| |
| text_input = gr.Textbox( |
| label="π Text to Convert to Speech", |
| placeholder="Enter your text here... (up to 10,000 characters)", |
| lines=8, |
| max_lines=15, |
| info="π‘ Longer texts work better with Edge TTS (Free)" |
| ) |
| |
| |
| generate_btn = gr.Button( |
| "π΅ Generate Speech", |
| variant="primary", |
| elem_id="generate-btn", |
| size="lg" |
| ) |
|
|
| with gr.Column(scale=1): |
| |
| audio_output = gr.Audio( |
| label="π Generated Speech", |
| type="filepath", |
| interactive=False |
| ) |
| |
| |
| status_output = gr.Textbox( |
| label="π Status & Information", |
| lines=8, |
| interactive=False, |
| info="Real-time status updates and results" |
| ) |
|
|
| |
| voice_provider.change( |
| fn=update_voice_options, |
| inputs=[voice_provider], |
| outputs=[voice] |
| ) |
|
|
| |
| generate_btn.click( |
| fn=generate_speech, |
| inputs=[groq_api_key, openai_api_key, text_input, voice_provider, voice], |
| outputs=[audio_output, status_output] |
| ) |
|
|
| |
| gr.HTML(""" |
| <div style="text-align: center; margin-top: 40px; padding: 20px; background: rgba(255, 255, 255, 0.05); border-radius: 12px; backdrop-filter: blur(10px);"> |
| <h3 style="color: rgba(255, 255, 255, 0.9); margin-bottom: 15px;">π― Provider Comparison</h3> |
| <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; text-align: left;"> |
| <div style="background: rgba(0, 242, 254, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(0, 242, 254, 0.3);"> |
| <h4 style="color: #00f2fe; margin: 0 0 10px 0;">π Edge TTS (Free)</h4> |
| <p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">β
No API key required<br>β
High quality Microsoft voices<br>β
Up to 10,000 characters<br>β
Perfect for HF Spaces</p> |
| </div> |
| <div style="background: rgba(102, 126, 234, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(102, 126, 234, 0.3);"> |
| <h4 style="color: #667eea; margin: 0 0 10px 0;">π Groq PlayAI</h4> |
| <p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">π Requires API key<br>πͺ Premium entertainment voices<br>β‘ Fast generation<br>π ~3,500 char limit</p> |
| </div> |
| <div style="background: rgba(118, 75, 162, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(118, 75, 162, 0.3);"> |
| <h4 style="color: #764ba2; margin: 0 0 10px 0;">π€ OpenAI TTS</h4> |
| <p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">π Requires API key<br>π― Professional quality<br>π° Pay per use<br>π 4,000 char limit</p> |
| </div> |
| </div> |
| <div style="margin-top: 20px; padding: 15px; background: rgba(46, 204, 113, 0.1); border-radius: 8px; border: 1px solid rgba(46, 204, 113, 0.3);"> |
| <p style="color: rgba(255, 255, 255, 0.9); margin: 0; font-size: 16px;"><strong>π‘ Pro Tip:</strong> Use "Auto (Try All)" to automatically find the best available provider, or stick with "Edge TTS (Free)" for consistent, high-quality results without any setup!</p> |
| </div> |
| </div> |
| """) |
|
|
| |
| if __name__ == "__main__": |
| |
| if is_huggingface_space(): |
| |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=False, |
| show_error=True, |
| |
| enable_monitoring=False, |
| favicon_path=None, |
| app_kwargs={ |
| "docs_url": None, |
| "redoc_url": None, |
| } |
| ) |
| else: |
| |
| demo.launch( |
| share=True |
| ) |