TTS3 / app.py
shukdev3's picture
Create app.py
aac6e76 verified
Raw
History Blame Contribute Delete
31.8 kB
import os
import gradio as gr
from groq import Groq
import tempfile
import requests
import json
import base64
from pathlib import Path
import time
import sys
import platform
# Available English voices for different providers
groq_voices = [
"Arista-PlayAI", "Atlas-PlayAI", "Basil-PlayAI", "Briggs-PlayAI",
"Calum-PlayAI", "Celeste-PlayAI", "Cheyenne-PlayAI", "Chip-PlayAI",
"Cillian-PlayAI", "Deedee-PlayAI", "Fritz-PlayAI", "Gail-PlayAI",
"Indigo-PlayAI", "Mamaw-PlayAI", "Mason-PlayAI", "Mikail-PlayAI",
"Mitch-PlayAI", "Quinn-PlayAI", "Thunder-PlayAI"
]
# OpenAI voices (if user has OpenAI API key)
openai_voices = ["alloy", "ash", "coral", "sage", "echo", "fable", "onyx", "nova", "shimmer"]
# Edge TTS voices (free, no API key required)
edge_voices = [
"en-US-AriaNeural",
"en-US-AnaNeural",
"en-US-ChristopherNeural",
"en-US-EricNeural",
"en-US-GuyNeural",
"en-US-JennyNeural",
"en-US-MichelleNeural",
"en-US-RogerNeural",
"en-US-SteffanNeural"
]
def check_internet_connection():
"""Check if internet connection is available"""
try:
import socket
socket.create_connection(("8.8.8.8", 53), timeout=3)
return True
except OSError:
return False
def is_huggingface_space():
"""Check if running on Hugging Face Spaces"""
return os.environ.get("SPACE_ID") is not None
def chunk_text(text, max_length=4000):
"""Split text into chunks to avoid rate limits"""
if len(text) <= max_length:
return [text]
chunks = []
words = text.split()
current_chunk = []
current_length = 0
for word in words:
if current_length + len(word) + 1 <= max_length:
current_chunk.append(word)
current_length += len(word) + 1
else:
if current_chunk:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_length = len(word)
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def try_groq_tts(api_key, text, voice):
"""Try Groq TTS with chunking and retry logic"""
try:
if not check_internet_connection():
return None, "❌ No internet connection available for Groq TTS"
client = Groq(api_key=api_key)
# Check if text needs chunking
chunks = chunk_text(text, 3500) # Leave some buffer
if len(chunks) == 1:
# Single chunk - direct call
response = client.audio.speech.create(
model="playai-tts",
voice=voice,
input=text,
response_format="wav"
)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
response.write_to_file(temp_file.name)
return temp_file.name, "βœ… Speech generated successfully with Groq PlayAI!"
else:
# Multiple chunks - process separately and inform user
return None, f"⚠️ Text too long for single request ({len(text)} chars). Try shorter text or use Edge TTS for longer content."
except Exception as e:
error_msg = str(e)
if "rate_limit_exceeded" in error_msg or "429" in error_msg:
return None, "πŸ”„ Groq rate limit reached. Try again in a moment or use Edge TTS..."
else:
return None, f"❌ Groq error: {error_msg}"
def try_openai_tts(api_key, text, voice):
"""Try OpenAI TTS as fallback"""
try:
if not check_internet_connection():
return None, "❌ No internet connection available for OpenAI TTS"
import openai
client = openai.OpenAI(api_key=api_key)
response = client.audio.speech.create(
model="tts-1",
voice=voice,
input=text[:4000] # OpenAI limit
)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
response.stream_to_file(temp_file.name)
return temp_file.name, "βœ… Speech generated successfully with OpenAI TTS!"
except ImportError:
return None, "❌ OpenAI library not installed. Install with: pip install openai"
except Exception as e:
return None, f"❌ OpenAI error: {str(e)}"
def try_edge_tts(text, voice="en-US-JennyNeural"):
"""Try Microsoft Edge TTS as free fallback"""
try:
if not check_internet_connection():
return None, "❌ No internet connection available for Edge TTS"
import edge_tts
import asyncio
async def generate_edge_speech():
communicate = edge_tts.Communicate(text[:10000], voice) # Edge TTS can handle longer text
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
temp_file.write(chunk["data"])
return temp_file.name
# Run async function
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
audio_file = loop.run_until_complete(generate_edge_speech())
return audio_file, "βœ… Speech generated successfully with Microsoft Edge TTS (Free & High Quality)!"
except ImportError:
return None, "❌ Edge TTS not installed. Install with: pip install edge-tts"
except Exception as e:
return None, f"❌ Edge TTS error: {str(e)}"
def try_pyttsx3_fallback(text):
"""Ultimate fallback using pyttsx3 (offline) - Only works in local environments"""
if is_huggingface_space():
return None, "❌ Offline TTS not available in Hugging Face Spaces. Use Edge TTS (Free) instead!"
try:
import pyttsx3
# Initialize the engine
engine = pyttsx3.init()
# Get available voices and set a good one
voices = engine.getProperty('voices')
if voices:
# Try to find a female English voice first, then any English voice
english_voice = None
for voice in voices:
voice_id = voice.id.lower()
voice_name = voice.name.lower() if hasattr(voice, 'name') else ""
# Look for English voices
if any(keyword in voice_id or keyword in voice_name
for keyword in ['english', 'en-us', 'en_us', 'zira', 'hazel', 'eva']):
english_voice = voice.id
if any(female_keyword in voice_name
for female_keyword in ['zira', 'hazel', 'eva', 'female']):
break # Prefer female voices
if english_voice:
engine.setProperty('voice', english_voice)
# Set properties for better quality
engine.setProperty('rate', 180) # Speed of speech (words per minute)
engine.setProperty('volume', 0.9) # Volume level (0.0 to 1.0)
# Create temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
# Limit text length for stability
limited_text = text[:2000] # Limit for stability
# Use the save_to_file method correctly
engine.save_to_file(limited_text, temp_path)
engine.runAndWait()
# Check if file was created and has content
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path, "βœ… Speech generated with offline TTS! (No internet required)"
else:
# If save_to_file didn't work, try alternative method
return try_alternative_offline_tts(limited_text)
except ImportError:
return None, "❌ pyttsx3 not installed. Use Edge TTS (Free) instead!"
except Exception as e:
# Try alternative offline method if pyttsx3 fails
return try_alternative_offline_tts(text[:2000])
def try_alternative_offline_tts(text):
"""Alternative offline TTS using system commands - Only for local environments"""
if is_huggingface_space():
return None, "❌ System TTS not available in Hugging Face Spaces. Use Edge TTS (Free) instead!"
try:
system = platform.system().lower()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
if system == "windows":
# Windows SAPI TTS
try:
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")
# Save to file
file_stream = win32com.client.Dispatch("SAPI.SpFileStream")
file_stream.Open(temp_path, 3)
speaker.AudioOutputStream = file_stream
speaker.Speak(text)
file_stream.Close()
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path, "βœ… Speech generated with Windows SAPI TTS (Offline)!"
except ImportError:
pass
elif system == "darwin": # macOS
# Use macOS 'say' command
import subprocess
try:
subprocess.run(['say', '-o', temp_path, '--data-format=LEF32@22050', text],
check=True, timeout=30)
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path, "βœ… Speech generated with macOS TTS (Offline)!"
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
pass
elif system == "linux":
# Try espeak or festival on Linux
import subprocess
try:
# Try espeak first
subprocess.run(['espeak', '-w', temp_path, text],
check=True, timeout=30)
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path, "βœ… Speech generated with espeak TTS (Offline)!"
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
try:
# Try festival as backup
subprocess.run(['text2wave', '-o', temp_path],
input=text, text=True, check=True, timeout=30)
if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
return temp_path, "βœ… Speech generated with Festival TTS (Offline)!"
except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
pass
return None, f"❌ No offline TTS available for {system}. Use Edge TTS (Free) instead!"
except Exception as e:
return None, f"❌ Alternative offline TTS error: {str(e)}"
# Main function with multiple provider fallback
def generate_speech(groq_api_key, openai_api_key, text, voice_provider, voice):
if not text:
return None, "⚠️ Please enter some text to generate speech."
if len(text) > 10000:
return None, "🚫 Text input exceeds 10,000 character limit."
# Check internet connection and environment
has_internet = check_internet_connection()
is_hf_space = is_huggingface_space()
internet_status = "🌐 Internet: Connected" if has_internet else "πŸ“΄ Internet: Offline"
env_status = " (Hugging Face Spaces)" if is_hf_space else " (Local Environment)"
# Status message
status_msg = f"πŸ”„ Generating speech...\n{internet_status}{env_status}\n"
# If no internet, inform user about limitations
if not has_internet:
return None, status_msg + "❌ No internet connection available. All TTS providers require internet connection in this environment."
# Try providers in order based on selection
if voice_provider == "Groq (PlayAI)" and groq_api_key:
status_msg += "🎯 Using Groq PlayAI...\n"
audio_file, message = try_groq_tts(groq_api_key, text, voice)
if audio_file:
return audio_file, status_msg + message
else:
status_msg += f"❌ Groq failed: {message}\n"
# Try OpenAI if available
if openai_api_key and voice_provider in ["OpenAI TTS", "Auto (Try All)"]:
status_msg += "πŸ”„ Trying OpenAI TTS...\n"
openai_voice = voice if voice in openai_voices else "alloy"
audio_file, message = try_openai_tts(openai_api_key, text, openai_voice)
if audio_file:
return audio_file, status_msg + message
else:
status_msg += f"❌ OpenAI failed: {message}\n"
# Try Edge TTS (Free) - This should be the main fallback for HF Spaces
if voice_provider in ["Edge TTS (Free)", "Auto (Try All)", "Groq (PlayAI)", "OpenAI TTS"]:
status_msg += "πŸ”„ Using Edge TTS (Free & High Quality)...\n"
edge_voice = voice if voice in edge_voices else "en-US-JennyNeural"
audio_file, message = try_edge_tts(text, edge_voice)
if audio_file:
return audio_file, status_msg + message
else:
status_msg += f"❌ Edge TTS failed: {message}\n"
# Only try offline TTS if specifically requested and not in HF Spaces
if voice_provider == "Offline TTS":
if is_hf_space:
return None, status_msg + "❌ Offline TTS is not available in Hugging Face Spaces. Please use 'Edge TTS (Free)' instead!"
else:
status_msg += "πŸ”„ Using offline TTS (works without internet)...\n"
audio_file, message = try_pyttsx3_fallback(text)
if audio_file:
return audio_file, status_msg + message
else:
status_msg += f"❌ Offline TTS failed: {message}\n"
# Final fallback message
if is_hf_space:
return None, status_msg + "❌ All online TTS providers failed. Please check your API keys or try again later. Edge TTS (Free) is recommended for HF Spaces!"
else:
return None, status_msg + "❌ All TTS providers failed. Please check your setup or try shorter text."
def update_voice_options(provider):
"""Update voice dropdown based on selected provider"""
if provider == "Groq (PlayAI)":
return gr.Dropdown(choices=groq_voices, value="Fritz-PlayAI", visible=True)
elif provider == "OpenAI TTS":
return gr.Dropdown(choices=openai_voices, value="alloy", visible=True)
elif provider == "Edge TTS (Free)":
return gr.Dropdown(choices=edge_voices, value="en-US-JennyNeural", visible=True)
elif provider == "Offline TTS":
return gr.Dropdown(choices=["Default System Voice"], value="Default System Voice", visible=True)
else: # Auto
return gr.Dropdown(choices=groq_voices, value="Fritz-PlayAI", visible=True, label="🎭 Voice (Auto mode will try best match)")
# Custom CSS (keeping your original beautiful design)
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
* {
box-sizing: border-box;
}
:root {
--primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
--secondary-gradient: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
--accent-gradient: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
--dark-bg: linear-gradient(135deg, #0c0c0c 0%, #1a1a2e 50%, #16213e 100%);
--glass-bg: rgba(255, 255, 255, 0.08);
--glass-border: rgba(255, 255, 255, 0.15);
--text-primary: #ffffff;
--text-secondary: rgba(255, 255, 255, 0.7);
--shadow-primary: 0 8px 32px rgba(0, 0, 0, 0.4);
--shadow-hover: 0 12px 48px rgba(0, 0, 0, 0.6);
--border-radius: 16px;
}
body, .gradio-container {
background: var(--dark-bg) !important;
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
color: var(--text-primary) !important;
overflow-x: hidden;
}
.gradio-container {
max-width: 1400px !important;
margin: 40px auto !important;
padding: 0 20px !important;
min-height: 100vh;
}
.gradio-container > div {
background: var(--glass-bg) !important;
backdrop-filter: blur(20px) !important;
border: 1px solid var(--glass-border) !important;
border-radius: var(--border-radius) !important;
padding: 40px !important;
box-shadow: var(--shadow-primary) !important;
position: relative;
overflow: hidden;
}
h1 {
text-align: center !important;
font-size: clamp(2.5rem, 5vw, 4rem) !important;
font-weight: 700 !important;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-webkit-background-clip: text !important;
-webkit-text-fill-color: transparent !important;
background-clip: text !important;
margin: 0 0 50px 0 !important;
animation: glow 3s ease-in-out infinite alternate;
}
@keyframes glow {
0% { filter: drop-shadow(0 0 20px rgba(102, 126, 234, 0.3)); }
100% { filter: drop-shadow(0 0 30px rgba(118, 75, 162, 0.5)); }
}
#generate-btn {
background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
border: 2px solid #4facfe !important;
color: #ffffff !important;
font-weight: bold !important;
box-shadow: 0 0 12px rgba(0, 242, 254, 0.5) !important;
transition: all 0.3s ease !important;
margin-top: 26px !important;
}
#generate-btn:hover {
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important;
transform: scale(1.03) !important;
box-shadow: 0 0 18px rgba(0, 242, 254, 0.7) !important;
}
.gr-textbox, .gr-dropdown {
background: var(--glass-bg) !important;
border: 2px solid transparent !important;
border-radius: 12px !important;
position: relative;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
backdrop-filter: blur(10px) !important;
}
.gr-textbox textarea, .gr-textbox input, .gr-dropdown select {
background: transparent !important;
color: var(--text-primary) !important;
border: none !important;
font-size: 16px !important;
font-weight: 400 !important;
padding: 16px !important;
}
.gr-input-label, .gr-output-label {
color: var(--text-primary) !important;
font-weight: 600 !important;
font-size: 14px !important;
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 12px !important;
background: var(--accent-gradient);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.gr-button {
background: var(--primary-gradient) !important;
border: none !important;
color: white !important;
font-weight: 600 !important;
font-size: 16px !important;
padding: 16px 32px !important;
border-radius: 12px !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
text-transform: uppercase;
letter-spacing: 0.5px;
box-shadow: 0 4px 20px rgba(102, 126, 234, 0.3) !important;
margin-top: 20px !important;
}
.gr-button:hover {
transform: translateY(-2px) !important;
box-shadow: var(--shadow-hover) !important;
filter: brightness(1.1);
}
.gr-audio {
background: var(--glass-bg) !important;
border: 1px solid var(--glass-border) !important;
border-radius: 12px !important;
padding: 20px !important;
backdrop-filter: blur(10px) !important;
transition: all 0.3s ease !important;
}
.info-box {
background: rgba(0, 242, 254, 0.1) !important;
border: 1px solid rgba(0, 242, 254, 0.3) !important;
border-radius: 12px !important;
padding: 20px !important;
margin: 20px 0 !important;
backdrop-filter: blur(10px) !important;
}
.info-box,
.info-box * {
color: white !important;
}
.warning-box,
.warning-box * {
color: white !important;
}
.no-api-box {
background: rgba(46, 204, 113, 0.1) !important;
border: 1px solid rgba(46, 204, 113, 0.3) !important;
border-radius: 12px !important;
padding: 20px !important;
margin: 20px 0 !important;
backdrop-filter: blur(10px) !important;
}
.no-api-box,
.no-api-box * {
color: white !important;
}
.warning-box {
background: rgba(255, 193, 7, 0.1) !important;
border: 1px solid rgba(255, 193, 7, 0.3) !important;
border-radius: 12px !important;
padding: 15px !important;
margin: 15px 0 !important;
backdrop-filter: blur(10px) !important;
}
.hf-space-box {
background: rgba(255, 87, 34, 0.1) !important;
border: 1px solid rgba(255, 87, 34, 0.3) !important;
border-radius: 12px !important;
padding: 20px !important;
margin: 20px 0 !important;
backdrop-filter: blur(10px) !important;
}
.hf-space-box,
.hf-space-box * {
color: white !important;
}
"""
# Updated HTML for Hugging Face Spaces
def get_custom_html():
is_hf = is_huggingface_space()
if is_hf:
return """
<div style="text-align: center; margin-bottom: 20px;">
<div style="font-size: 3rem;">πŸŽ™οΈ</div>
<h1>Advanced Multi-Provider TTS Studio</h1>
<p style="color: rgba(255, 255, 255, 0.7); font-size: 18px; margin: 0; font-weight: 300;">
Transform your text into natural-sounding speech with multiple AI-powered voice providers
</p>
<div class="hf-space-box">
<h3>πŸš€ Running on Hugging Face Spaces</h3>
<p><strong>Recommended: Use "Edge TTS (Free)" for best results!</strong></p>
<p>βœ… High-quality voices β€’ βœ… No API key needed β€’ βœ… Up to 10,000 characters β€’ βœ… Multiple voice options</p>
<p>⚠️ Offline TTS is not available in cloud environments</p>
</div>
<div class="no-api-box">
<h3>πŸ†“ No API Key? No Problem!</h3>
<p><strong>Select "Edge TTS (Free)" for high-quality TTS without any API keys!</strong></p>
<p>βœ… Microsoft's premium voices β€’ βœ… Completely free β€’ βœ… Works great in HF Spaces</p>
</div>
<div class="info-box">
<p><strong>πŸ”„ Multiple Providers Available:</strong> Groq PlayAI, OpenAI TTS, Microsoft Edge TTS (Free)</p>
<p><strong>πŸ›‘οΈ Smart Fallback System:</strong> If one provider fails, automatically tries the next available</p>
<p><strong>πŸ’‘ Recommended:</strong> Use "Auto (Try All)" for maximum reliability or "Edge TTS (Free)" for consistent quality</p>
</div>
</div>
"""
else:
return """
<div style="text-align: center; margin-bottom: 20px;">
<div style="font-size: 3rem;">πŸŽ™οΈ</div>
<h1>Advanced Multi-Provider TTS Studio</h1>
<p style="color: rgba(255, 255, 255, 0.7); font-size: 18px; margin: 0; font-weight: 300;">
Transform your text into natural-sounding speech with multiple AI-powered voice providers
</p>
<div class="hf-space-box">
<h3>πŸ’» Running Locally</h3>
<p><strong>All TTS options available including offline TTS!</strong></p>
<p>βœ… Edge TTS (Free) β€’ βœ… Offline TTS β€’ βœ… Premium APIs β€’ βœ… Full feature set</p>
</div>
<div class="no-api-box">
<h3>πŸ†“ No API Key? Multiple Free Options!</h3>
<p><strong>Select "Edge TTS (Free)" for online high-quality TTS or "Offline TTS" for no internet!</strong></p>
<p>βœ… Edge TTS: High quality voices with internet β€’ βœ… Offline TTS: Basic quality, works anywhere</p>
</div>
<div class="info-box">
<p><strong>πŸ”„ Multiple Providers Available:</strong> Groq PlayAI, OpenAI TTS, Microsoft Edge TTS (Free), Offline TTS</p>
<p><strong>πŸ›‘οΈ Smart Fallback System:</strong> If one provider fails, automatically tries the next available</p>
<p><strong>πŸ’‘ Recommended:</strong> Use "Auto (Try All)" for maximum reliability or specific providers based on your needs</p>
</div>
</div>
"""
# Gradio interface
with gr.Blocks(css=custom_css, theme="ocean") as demo:
gr.HTML(get_custom_html())
# API Keys section
with gr.Row():
with gr.Column():
groq_api_key = gr.Textbox(
label="πŸ” Groq API Key (Optional)",
placeholder="Optional: Paste your Groq API key here for PlayAI voices...",
type="password",
lines=1
)
with gr.Column():
openai_api_key = gr.Textbox(
label="πŸ” OpenAI API Key (Optional)",
placeholder="Optional: OpenAI API key for premium TTS voices...",
type="password",
lines=1
)
# Warning box for users without API keys - Updated for HF Spaces
if is_huggingface_space():
gr.HTML("""
<div class="warning-box">
<h3>⚑ Quick Start for HF Spaces Users</h3>
<p><strong>Best Option:</strong> Select "Edge TTS (Free)" below - no API key needed!</p>
<p>πŸ’‘ <strong>Pro tip:</strong> Edge TTS provides Microsoft's premium voice quality completely free</p>
</div>
""")
else:
gr.HTML("""
<div class="warning-box">
<h3>⚑ Quick Start Options</h3>
<p><strong>Free Options:</strong> "Edge TTS (Free)" (internet required) or "Offline TTS" (no internet needed)</p>
<p><strong>Premium Options:</strong> Add your Groq or OpenAI API keys above for additional voice options</p>
</div>
""")
# Main interface
with gr.Row():
with gr.Column(scale=2):
# Provider selection
provider_choices = ["Auto (Try All)", "Edge TTS (Free)", "Groq (PlayAI)", "OpenAI TTS"]
if not is_huggingface_space():
provider_choices.append("Offline TTS")
voice_provider = gr.Dropdown(
choices=provider_choices,
value="Edge TTS (Free)" if is_huggingface_space() else "Auto (Try All)",
label="🎯 TTS Provider",
info="Edge TTS (Free) is recommended for HF Spaces!" if is_huggingface_space() else "Auto tries all providers in order"
)
# Voice selection
voice = gr.Dropdown(
choices=edge_voices,
value="en-US-JennyNeural",
label="🎭 Voice Selection",
info="Available voices update based on selected provider",
visible=True
)
# Text input
text_input = gr.Textbox(
label="πŸ“ Text to Convert to Speech",
placeholder="Enter your text here... (up to 10,000 characters)",
lines=8,
max_lines=15,
info="πŸ’‘ Longer texts work better with Edge TTS (Free)"
)
# Generate button
generate_btn = gr.Button(
"🎡 Generate Speech",
variant="primary",
elem_id="generate-btn",
size="lg"
)
with gr.Column(scale=1):
# Audio output
audio_output = gr.Audio(
label="πŸ”Š Generated Speech",
type="filepath",
interactive=False
)
# Status/Info output
status_output = gr.Textbox(
label="πŸ“Š Status & Information",
lines=8,
interactive=False,
info="Real-time status updates and results"
)
# Update voice options when provider changes
voice_provider.change(
fn=update_voice_options,
inputs=[voice_provider],
outputs=[voice]
)
# Generate speech button click
generate_btn.click(
fn=generate_speech,
inputs=[groq_api_key, openai_api_key, text_input, voice_provider, voice],
outputs=[audio_output, status_output]
)
# Footer with additional information
gr.HTML("""
<div style="text-align: center; margin-top: 40px; padding: 20px; background: rgba(255, 255, 255, 0.05); border-radius: 12px; backdrop-filter: blur(10px);">
<h3 style="color: rgba(255, 255, 255, 0.9); margin-bottom: 15px;">🎯 Provider Comparison</h3>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; text-align: left;">
<div style="background: rgba(0, 242, 254, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(0, 242, 254, 0.3);">
<h4 style="color: #00f2fe; margin: 0 0 10px 0;">πŸ†“ Edge TTS (Free)</h4>
<p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">βœ… No API key required<br>βœ… High quality Microsoft voices<br>βœ… Up to 10,000 characters<br>βœ… Perfect for HF Spaces</p>
</div>
<div style="background: rgba(102, 126, 234, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(102, 126, 234, 0.3);">
<h4 style="color: #667eea; margin: 0 0 10px 0;">🎭 Groq PlayAI</h4>
<p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">πŸ”‘ Requires API key<br>πŸŽͺ Premium entertainment voices<br>⚑ Fast generation<br>πŸ“ ~3,500 char limit</p>
</div>
<div style="background: rgba(118, 75, 162, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(118, 75, 162, 0.3);">
<h4 style="color: #764ba2; margin: 0 0 10px 0;">πŸ€– OpenAI TTS</h4>
<p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">πŸ”‘ Requires API key<br>🎯 Professional quality<br>πŸ’° Pay per use<br>πŸ“ 4,000 char limit</p>
</div>
</div>
<div style="margin-top: 20px; padding: 15px; background: rgba(46, 204, 113, 0.1); border-radius: 8px; border: 1px solid rgba(46, 204, 113, 0.3);">
<p style="color: rgba(255, 255, 255, 0.9); margin: 0; font-size: 16px;"><strong>πŸ’‘ Pro Tip:</strong> Use "Auto (Try All)" to automatically find the best available provider, or stick with "Edge TTS (Free)" for consistent, high-quality results without any setup!</p>
</div>
</div>
""")
# Launch configuration
if __name__ == "__main__":
# Check if we're in HF Spaces and adjust accordingly
if is_huggingface_space():
# HF Spaces configuration
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False, # HF Spaces handles sharing
show_error=True,
# show_tips=True,
enable_monitoring=False, # Disable monitoring in HF Spaces
favicon_path=None,
app_kwargs={
"docs_url": None, # Disable docs endpoint
"redoc_url": None, # Disable redoc endpoint
}
)
else:
# Local development configuration
demo.launch(
share=True
)