Spaces:

shukdev3
/

TTS3

Sleeping

App Files Files Community

TTS3 / app.py

shukdev3

Create app.py

aac6e76 verified 19 days ago

Raw

History Blame Contribute Delete

31.8 kB

	import os
	import gradio as gr
	from groq import Groq
	import tempfile
	import requests
	import json
	import base64
	from pathlib import Path
	import time
	import sys
	import platform

	# Available English voices for different providers
	groq_voices = [
	"Arista-PlayAI", "Atlas-PlayAI", "Basil-PlayAI", "Briggs-PlayAI",
	"Calum-PlayAI", "Celeste-PlayAI", "Cheyenne-PlayAI", "Chip-PlayAI",
	"Cillian-PlayAI", "Deedee-PlayAI", "Fritz-PlayAI", "Gail-PlayAI",
	"Indigo-PlayAI", "Mamaw-PlayAI", "Mason-PlayAI", "Mikail-PlayAI",
	"Mitch-PlayAI", "Quinn-PlayAI", "Thunder-PlayAI"
	]

	# OpenAI voices (if user has OpenAI API key)
	openai_voices = ["alloy", "ash", "coral", "sage", "echo", "fable", "onyx", "nova", "shimmer"]

	# Edge TTS voices (free, no API key required)
	edge_voices = [
	"en-US-AriaNeural",
	"en-US-AnaNeural",
	"en-US-ChristopherNeural",
	"en-US-EricNeural",
	"en-US-GuyNeural",
	"en-US-JennyNeural",
	"en-US-MichelleNeural",
	"en-US-RogerNeural",
	"en-US-SteffanNeural"
	]

	def check_internet_connection():
	"""Check if internet connection is available"""
	try:
	import socket
	socket.create_connection(("8.8.8.8", 53), timeout=3)
	return True
	except OSError:
	return False

	def is_huggingface_space():
	"""Check if running on Hugging Face Spaces"""
	return os.environ.get("SPACE_ID") is not None

	def chunk_text(text, max_length=4000):
	"""Split text into chunks to avoid rate limits"""
	if len(text) <= max_length:
	return [text]

	chunks = []
	words = text.split()
	current_chunk = []
	current_length = 0

	for word in words:
	if current_length + len(word) + 1 <= max_length:
	current_chunk.append(word)
	current_length += len(word) + 1
	else:
	if current_chunk:
	chunks.append(" ".join(current_chunk))
	current_chunk = [word]
	current_length = len(word)

	if current_chunk:
	chunks.append(" ".join(current_chunk))

	return chunks

	def try_groq_tts(api_key, text, voice):
	"""Try Groq TTS with chunking and retry logic"""
	try:
	if not check_internet_connection():
	return None, "❌ No internet connection available for Groq TTS"

	client = Groq(api_key=api_key)

	# Check if text needs chunking
	chunks = chunk_text(text, 3500) # Leave some buffer

	if len(chunks) == 1:
	# Single chunk - direct call
	response = client.audio.speech.create(
	model="playai-tts",
	voice=voice,
	input=text,
	response_format="wav"
	)

	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	response.write_to_file(temp_file.name)
	return temp_file.name, "✅ Speech generated successfully with Groq PlayAI!"
	else:
	# Multiple chunks - process separately and inform user
	return None, f"⚠️ Text too long for single request ({len(text)} chars). Try shorter text or use Edge TTS for longer content."

	except Exception as e:
	error_msg = str(e)
	if "rate_limit_exceeded" in error_msg or "429" in error_msg:
	return None, "🔄 Groq rate limit reached. Try again in a moment or use Edge TTS..."
	else:
	return None, f"❌ Groq error: {error_msg}"

	def try_openai_tts(api_key, text, voice):
	"""Try OpenAI TTS as fallback"""
	try:
	if not check_internet_connection():
	return None, "❌ No internet connection available for OpenAI TTS"

	import openai
	client = openai.OpenAI(api_key=api_key)

	response = client.audio.speech.create(
	model="tts-1",
	voice=voice,
	input=text[:4000] # OpenAI limit
	)

	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	response.stream_to_file(temp_file.name)
	return temp_file.name, "✅ Speech generated successfully with OpenAI TTS!"

	except ImportError:
	return None, "❌ OpenAI library not installed. Install with: pip install openai"
	except Exception as e:
	return None, f"❌ OpenAI error: {str(e)}"

	def try_edge_tts(text, voice="en-US-JennyNeural"):
	"""Try Microsoft Edge TTS as free fallback"""
	try:
	if not check_internet_connection():
	return None, "❌ No internet connection available for Edge TTS"

	import edge_tts
	import asyncio

	async def generate_edge_speech():
	communicate = edge_tts.Communicate(text[:10000], voice) # Edge TTS can handle longer text
	with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
	async for chunk in communicate.stream():
	if chunk["type"] == "audio":
	temp_file.write(chunk["data"])
	return temp_file.name

	# Run async function
	try:
	loop = asyncio.get_event_loop()
	except RuntimeError:
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)

	audio_file = loop.run_until_complete(generate_edge_speech())

	return audio_file, "✅ Speech generated successfully with Microsoft Edge TTS (Free & High Quality)!"

	except ImportError:
	return None, "❌ Edge TTS not installed. Install with: pip install edge-tts"
	except Exception as e:
	return None, f"❌ Edge TTS error: {str(e)}"

	def try_pyttsx3_fallback(text):
	"""Ultimate fallback using pyttsx3 (offline) - Only works in local environments"""
	if is_huggingface_space():
	return None, "❌ Offline TTS not available in Hugging Face Spaces. Use Edge TTS (Free) instead!"

	try:
	import pyttsx3

	# Initialize the engine
	engine = pyttsx3.init()

	# Get available voices and set a good one
	voices = engine.getProperty('voices')
	if voices:
	# Try to find a female English voice first, then any English voice
	english_voice = None
	for voice in voices:
	voice_id = voice.id.lower()
	voice_name = voice.name.lower() if hasattr(voice, 'name') else ""

	# Look for English voices
	if any(keyword in voice_id or keyword in voice_name
	for keyword in ['english', 'en-us', 'en_us', 'zira', 'hazel', 'eva']):
	english_voice = voice.id
	if any(female_keyword in voice_name
	for female_keyword in ['zira', 'hazel', 'eva', 'female']):
	break # Prefer female voices

	if english_voice:
	engine.setProperty('voice', english_voice)

	# Set properties for better quality
	engine.setProperty('rate', 180) # Speed of speech (words per minute)
	engine.setProperty('volume', 0.9) # Volume level (0.0 to 1.0)

	# Create temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	temp_path = temp_file.name

	# Limit text length for stability
	limited_text = text[:2000] # Limit for stability

	# Use the save_to_file method correctly
	engine.save_to_file(limited_text, temp_path)
	engine.runAndWait()

	# Check if file was created and has content
	if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
	return temp_path, "✅ Speech generated with offline TTS! (No internet required)"
	else:
	# If save_to_file didn't work, try alternative method
	return try_alternative_offline_tts(limited_text)

	except ImportError:
	return None, "❌ pyttsx3 not installed. Use Edge TTS (Free) instead!"
	except Exception as e:
	# Try alternative offline method if pyttsx3 fails
	return try_alternative_offline_tts(text[:2000])

	def try_alternative_offline_tts(text):
	"""Alternative offline TTS using system commands - Only for local environments"""
	if is_huggingface_space():
	return None, "❌ System TTS not available in Hugging Face Spaces. Use Edge TTS (Free) instead!"

	try:
	system = platform.system().lower()

	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	temp_path = temp_file.name

	if system == "windows":
	# Windows SAPI TTS
	try:
	import win32com.client
	speaker = win32com.client.Dispatch("SAPI.SpVoice")

	# Save to file
	file_stream = win32com.client.Dispatch("SAPI.SpFileStream")
	file_stream.Open(temp_path, 3)
	speaker.AudioOutputStream = file_stream
	speaker.Speak(text)
	file_stream.Close()

	if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
	return temp_path, "✅ Speech generated with Windows SAPI TTS (Offline)!"
	except ImportError:
	pass

	elif system == "darwin": # macOS
	# Use macOS 'say' command
	import subprocess
	try:
	subprocess.run(['say', '-o', temp_path, '--data-format=LEF32@22050', text],
	check=True, timeout=30)
	if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
	return temp_path, "✅ Speech generated with macOS TTS (Offline)!"
	except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
	pass

	elif system == "linux":
	# Try espeak or festival on Linux
	import subprocess
	try:
	# Try espeak first
	subprocess.run(['espeak', '-w', temp_path, text],
	check=True, timeout=30)
	if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
	return temp_path, "✅ Speech generated with espeak TTS (Offline)!"
	except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
	try:
	# Try festival as backup
	subprocess.run(['text2wave', '-o', temp_path],
	input=text, text=True, check=True, timeout=30)
	if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
	return temp_path, "✅ Speech generated with Festival TTS (Offline)!"
	except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
	pass

	return None, f"❌ No offline TTS available for {system}. Use Edge TTS (Free) instead!"

	except Exception as e:
	return None, f"❌ Alternative offline TTS error: {str(e)}"

	# Main function with multiple provider fallback
	def generate_speech(groq_api_key, openai_api_key, text, voice_provider, voice):
	if not text:
	return None, "⚠️ Please enter some text to generate speech."

	if len(text) > 10000:
	return None, "🚫 Text input exceeds 10,000 character limit."

	# Check internet connection and environment
	has_internet = check_internet_connection()
	is_hf_space = is_huggingface_space()

	internet_status = "🌐 Internet: Connected" if has_internet else "📴 Internet: Offline"
	env_status = " (Hugging Face Spaces)" if is_hf_space else " (Local Environment)"

	# Status message
	status_msg = f"🔄 Generating speech...\n{internet_status}{env_status}\n"

	# If no internet, inform user about limitations
	if not has_internet:
	return None, status_msg + "❌ No internet connection available. All TTS providers require internet connection in this environment."

	# Try providers in order based on selection
	if voice_provider == "Groq (PlayAI)" and groq_api_key:
	status_msg += "🎯 Using Groq PlayAI...\n"
	audio_file, message = try_groq_tts(groq_api_key, text, voice)
	if audio_file:
	return audio_file, status_msg + message
	else:
	status_msg += f"❌ Groq failed: {message}\n"

	# Try OpenAI if available
	if openai_api_key and voice_provider in ["OpenAI TTS", "Auto (Try All)"]:
	status_msg += "🔄 Trying OpenAI TTS...\n"
	openai_voice = voice if voice in openai_voices else "alloy"
	audio_file, message = try_openai_tts(openai_api_key, text, openai_voice)
	if audio_file:
	return audio_file, status_msg + message
	else:
	status_msg += f"❌ OpenAI failed: {message}\n"

	# Try Edge TTS (Free) - This should be the main fallback for HF Spaces
	if voice_provider in ["Edge TTS (Free)", "Auto (Try All)", "Groq (PlayAI)", "OpenAI TTS"]:
	status_msg += "🔄 Using Edge TTS (Free & High Quality)...\n"
	edge_voice = voice if voice in edge_voices else "en-US-JennyNeural"
	audio_file, message = try_edge_tts(text, edge_voice)
	if audio_file:
	return audio_file, status_msg + message
	else:
	status_msg += f"❌ Edge TTS failed: {message}\n"

	# Only try offline TTS if specifically requested and not in HF Spaces
	if voice_provider == "Offline TTS":
	if is_hf_space:
	return None, status_msg + "❌ Offline TTS is not available in Hugging Face Spaces. Please use 'Edge TTS (Free)' instead!"
	else:
	status_msg += "🔄 Using offline TTS (works without internet)...\n"
	audio_file, message = try_pyttsx3_fallback(text)
	if audio_file:
	return audio_file, status_msg + message
	else:
	status_msg += f"❌ Offline TTS failed: {message}\n"

	# Final fallback message
	if is_hf_space:
	return None, status_msg + "❌ All online TTS providers failed. Please check your API keys or try again later. Edge TTS (Free) is recommended for HF Spaces!"
	else:
	return None, status_msg + "❌ All TTS providers failed. Please check your setup or try shorter text."

	def update_voice_options(provider):
	"""Update voice dropdown based on selected provider"""
	if provider == "Groq (PlayAI)":
	return gr.Dropdown(choices=groq_voices, value="Fritz-PlayAI", visible=True)
	elif provider == "OpenAI TTS":
	return gr.Dropdown(choices=openai_voices, value="alloy", visible=True)
	elif provider == "Edge TTS (Free)":
	return gr.Dropdown(choices=edge_voices, value="en-US-JennyNeural", visible=True)
	elif provider == "Offline TTS":
	return gr.Dropdown(choices=["Default System Voice"], value="Default System Voice", visible=True)
	else: # Auto
	return gr.Dropdown(choices=groq_voices, value="Fritz-PlayAI", visible=True, label="🎭 Voice (Auto mode will try best match)")

	# Custom CSS (keeping your original beautiful design)
	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
	* {
	box-sizing: border-box;
	}
	:root {
	--primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	--secondary-gradient: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
	--accent-gradient: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
	--dark-bg: linear-gradient(135deg, #0c0c0c 0%, #1a1a2e 50%, #16213e 100%);
	--glass-bg: rgba(255, 255, 255, 0.08);
	--glass-border: rgba(255, 255, 255, 0.15);
	--text-primary: #ffffff;
	--text-secondary: rgba(255, 255, 255, 0.7);
	--shadow-primary: 0 8px 32px rgba(0, 0, 0, 0.4);
	--shadow-hover: 0 12px 48px rgba(0, 0, 0, 0.6);
	--border-radius: 16px;
	}

	body, .gradio-container {
	background: var(--dark-bg) !important;
	font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
	color: var(--text-primary) !important;
	overflow-x: hidden;
	}

	.gradio-container {
	max-width: 1400px !important;
	margin: 40px auto !important;
	padding: 0 20px !important;
	min-height: 100vh;
	}

	.gradio-container > div {
	background: var(--glass-bg) !important;
	backdrop-filter: blur(20px) !important;
	border: 1px solid var(--glass-border) !important;
	border-radius: var(--border-radius) !important;
	padding: 40px !important;
	box-shadow: var(--shadow-primary) !important;
	position: relative;
	overflow: hidden;
	}

	h1 {
	text-align: center !important;
	font-size: clamp(2.5rem, 5vw, 4rem) !important;
	font-weight: 700 !important;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	background-clip: text !important;
	margin: 0 0 50px 0 !important;
	animation: glow 3s ease-in-out infinite alternate;
	}

	@keyframes glow {
	0% { filter: drop-shadow(0 0 20px rgba(102, 126, 234, 0.3)); }
	100% { filter: drop-shadow(0 0 30px rgba(118, 75, 162, 0.5)); }
	}

	#generate-btn {
	background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
	border: 2px solid #4facfe !important;
	color: #ffffff !important;
	font-weight: bold !important;
	box-shadow: 0 0 12px rgba(0, 242, 254, 0.5) !important;
	transition: all 0.3s ease !important;
	margin-top: 26px !important;
	}

	#generate-btn:hover {
	background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important;
	transform: scale(1.03) !important;
	box-shadow: 0 0 18px rgba(0, 242, 254, 0.7) !important;
	}

	.gr-textbox, .gr-dropdown {
	background: var(--glass-bg) !important;
	border: 2px solid transparent !important;
	border-radius: 12px !important;
	position: relative;
	transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
	backdrop-filter: blur(10px) !important;
	}

	.gr-textbox textarea, .gr-textbox input, .gr-dropdown select {
	background: transparent !important;
	color: var(--text-primary) !important;
	border: none !important;
	font-size: 16px !important;
	font-weight: 400 !important;
	padding: 16px !important;
	}

	.gr-input-label, .gr-output-label {
	color: var(--text-primary) !important;
	font-weight: 600 !important;
	font-size: 14px !important;
	text-transform: uppercase;
	letter-spacing: 0.5px;
	margin-bottom: 12px !important;
	background: var(--accent-gradient);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	}

	.gr-button {
	background: var(--primary-gradient) !important;
	border: none !important;
	color: white !important;
	font-weight: 600 !important;
	font-size: 16px !important;
	padding: 16px 32px !important;
	border-radius: 12px !important;
	transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
	text-transform: uppercase;
	letter-spacing: 0.5px;
	box-shadow: 0 4px 20px rgba(102, 126, 234, 0.3) !important;
	margin-top: 20px !important;
	}

	.gr-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: var(--shadow-hover) !important;
	filter: brightness(1.1);
	}

	.gr-audio {
	background: var(--glass-bg) !important;
	border: 1px solid var(--glass-border) !important;
	border-radius: 12px !important;
	padding: 20px !important;
	backdrop-filter: blur(10px) !important;
	transition: all 0.3s ease !important;
	}

	.info-box {
	background: rgba(0, 242, 254, 0.1) !important;
	border: 1px solid rgba(0, 242, 254, 0.3) !important;
	border-radius: 12px !important;
	padding: 20px !important;
	margin: 20px 0 !important;
	backdrop-filter: blur(10px) !important;
	}

	.info-box,
	.info-box * {
	color: white !important;
	}

	.warning-box,
	.warning-box * {
	color: white !important;
	}

	.no-api-box {
	background: rgba(46, 204, 113, 0.1) !important;
	border: 1px solid rgba(46, 204, 113, 0.3) !important;
	border-radius: 12px !important;
	padding: 20px !important;
	margin: 20px 0 !important;
	backdrop-filter: blur(10px) !important;
	}

	.no-api-box,
	.no-api-box * {
	color: white !important;
	}

	.warning-box {
	background: rgba(255, 193, 7, 0.1) !important;
	border: 1px solid rgba(255, 193, 7, 0.3) !important;
	border-radius: 12px !important;
	padding: 15px !important;
	margin: 15px 0 !important;
	backdrop-filter: blur(10px) !important;
	}

	.hf-space-box {
	background: rgba(255, 87, 34, 0.1) !important;
	border: 1px solid rgba(255, 87, 34, 0.3) !important;
	border-radius: 12px !important;
	padding: 20px !important;
	margin: 20px 0 !important;
	backdrop-filter: blur(10px) !important;
	}

	.hf-space-box,
	.hf-space-box * {
	color: white !important;
	}
	"""

	# Updated HTML for Hugging Face Spaces
	def get_custom_html():
	is_hf = is_huggingface_space()

	if is_hf:
	return """
	<div style="text-align: center; margin-bottom: 20px;">
	<div style="font-size: 3rem;">🎙️</div>
	<h1>Advanced Multi-Provider TTS Studio</h1>
	<p style="color: rgba(255, 255, 255, 0.7); font-size: 18px; margin: 0; font-weight: 300;">
	Transform your text into natural-sounding speech with multiple AI-powered voice providers
	</p>

	<div class="hf-space-box">
	<h3>🚀 Running on Hugging Face Spaces</h3>
	<p><strong>Recommended: Use "Edge TTS (Free)" for best results!</strong></p>
	<p>✅ High-quality voices • ✅ No API key needed • ✅ Up to 10,000 characters • ✅ Multiple voice options</p>
	<p>⚠️ Offline TTS is not available in cloud environments</p>
	</div>

	<div class="no-api-box">
	<h3>🆓 No API Key? No Problem!</h3>
	<p><strong>Select "Edge TTS (Free)" for high-quality TTS without any API keys!</strong></p>
	<p>✅ Microsoft's premium voices • ✅ Completely free • ✅ Works great in HF Spaces</p>
	</div>

	<div class="info-box">
	<p><strong>🔄 Multiple Providers Available:</strong> Groq PlayAI, OpenAI TTS, Microsoft Edge TTS (Free)</p>
	<p><strong>🛡️ Smart Fallback System:</strong> If one provider fails, automatically tries the next available</p>
	<p><strong>💡 Recommended:</strong> Use "Auto (Try All)" for maximum reliability or "Edge TTS (Free)" for consistent quality</p>
	</div>
	</div>
	"""
	else:
	return """
	<div style="text-align: center; margin-bottom: 20px;">
	<div style="font-size: 3rem;">🎙️</div>
	<h1>Advanced Multi-Provider TTS Studio</h1>
	<p style="color: rgba(255, 255, 255, 0.7); font-size: 18px; margin: 0; font-weight: 300;">
	Transform your text into natural-sounding speech with multiple AI-powered voice providers
	</p>

	<div class="hf-space-box">
	<h3>💻 Running Locally</h3>
	<p><strong>All TTS options available including offline TTS!</strong></p>
	<p>✅ Edge TTS (Free) • ✅ Offline TTS • ✅ Premium APIs • ✅ Full feature set</p>
	</div>

	<div class="no-api-box">
	<h3>🆓 No API Key? Multiple Free Options!</h3>
	<p><strong>Select "Edge TTS (Free)" for online high-quality TTS or "Offline TTS" for no internet!</strong></p>
	<p>✅ Edge TTS: High quality voices with internet • ✅ Offline TTS: Basic quality, works anywhere</p>
	</div>

	<div class="info-box">
	<p><strong>🔄 Multiple Providers Available:</strong> Groq PlayAI, OpenAI TTS, Microsoft Edge TTS (Free), Offline TTS</p>
	<p><strong>🛡️ Smart Fallback System:</strong> If one provider fails, automatically tries the next available</p>
	<p><strong>💡 Recommended:</strong> Use "Auto (Try All)" for maximum reliability or specific providers based on your needs</p>
	</div>
	</div>
	"""

	# Gradio interface
	with gr.Blocks(css=custom_css, theme="ocean") as demo:
	gr.HTML(get_custom_html())

	# API Keys section
	with gr.Row():
	with gr.Column():
	groq_api_key = gr.Textbox(
	label="🔐 Groq API Key (Optional)",
	placeholder="Optional: Paste your Groq API key here for PlayAI voices...",
	type="password",
	lines=1
	)
	with gr.Column():
	openai_api_key = gr.Textbox(
	label="🔐 OpenAI API Key (Optional)",
	placeholder="Optional: OpenAI API key for premium TTS voices...",
	type="password",
	lines=1
	)

	# Warning box for users without API keys - Updated for HF Spaces
	if is_huggingface_space():
	gr.HTML("""
	<div class="warning-box">
	<h3>⚡ Quick Start for HF Spaces Users</h3>
	<p><strong>Best Option:</strong> Select "Edge TTS (Free)" below - no API key needed!</p>
	<p>💡 <strong>Pro tip:</strong> Edge TTS provides Microsoft's premium voice quality completely free</p>
	</div>
	""")
	else:
	gr.HTML("""
	<div class="warning-box">
	<h3>⚡ Quick Start Options</h3>
	<p><strong>Free Options:</strong> "Edge TTS (Free)" (internet required) or "Offline TTS" (no internet needed)</p>
	<p><strong>Premium Options:</strong> Add your Groq or OpenAI API keys above for additional voice options</p>
	</div>
	""")

	# Main interface
	with gr.Row():
	with gr.Column(scale=2):
	# Provider selection
	provider_choices = ["Auto (Try All)", "Edge TTS (Free)", "Groq (PlayAI)", "OpenAI TTS"]
	if not is_huggingface_space():
	provider_choices.append("Offline TTS")

	voice_provider = gr.Dropdown(
	choices=provider_choices,
	value="Edge TTS (Free)" if is_huggingface_space() else "Auto (Try All)",
	label="🎯 TTS Provider",
	info="Edge TTS (Free) is recommended for HF Spaces!" if is_huggingface_space() else "Auto tries all providers in order"
	)

	# Voice selection
	voice = gr.Dropdown(
	choices=edge_voices,
	value="en-US-JennyNeural",
	label="🎭 Voice Selection",
	info="Available voices update based on selected provider",
	visible=True
	)

	# Text input
	text_input = gr.Textbox(
	label="📝 Text to Convert to Speech",
	placeholder="Enter your text here... (up to 10,000 characters)",
	lines=8,
	max_lines=15,
	info="💡 Longer texts work better with Edge TTS (Free)"
	)

	# Generate button
	generate_btn = gr.Button(
	"🎵 Generate Speech",
	variant="primary",
	elem_id="generate-btn",
	size="lg"
	)

	with gr.Column(scale=1):
	# Audio output
	audio_output = gr.Audio(
	label="🔊 Generated Speech",
	type="filepath",
	interactive=False
	)

	# Status/Info output
	status_output = gr.Textbox(
	label="📊 Status & Information",
	lines=8,
	interactive=False,
	info="Real-time status updates and results"
	)

	# Update voice options when provider changes
	voice_provider.change(
	fn=update_voice_options,
	inputs=[voice_provider],
	outputs=[voice]
	)

	# Generate speech button click
	generate_btn.click(
	fn=generate_speech,
	inputs=[groq_api_key, openai_api_key, text_input, voice_provider, voice],
	outputs=[audio_output, status_output]
	)

	# Footer with additional information
	gr.HTML("""
	<div style="text-align: center; margin-top: 40px; padding: 20px; background: rgba(255, 255, 255, 0.05); border-radius: 12px; backdrop-filter: blur(10px);">
	<h3 style="color: rgba(255, 255, 255, 0.9); margin-bottom: 15px;">🎯 Provider Comparison</h3>
	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; text-align: left;">
	<div style="background: rgba(0, 242, 254, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(0, 242, 254, 0.3);">
	<h4 style="color: #00f2fe; margin: 0 0 10px 0;">🆓 Edge TTS (Free)</h4>
	<p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">✅ No API key required<br>✅ High quality Microsoft voices<br>✅ Up to 10,000 characters<br>✅ Perfect for HF Spaces</p>
	</div>
	<div style="background: rgba(102, 126, 234, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(102, 126, 234, 0.3);">
	<h4 style="color: #667eea; margin: 0 0 10px 0;">🎭 Groq PlayAI</h4>
	<p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">🔑 Requires API key<br>🎪 Premium entertainment voices<br>⚡ Fast generation<br>📝 ~3,500 char limit</p>
	</div>
	<div style="background: rgba(118, 75, 162, 0.1); padding: 15px; border-radius: 8px; border: 1px solid rgba(118, 75, 162, 0.3);">
	<h4 style="color: #764ba2; margin: 0 0 10px 0;">🤖 OpenAI TTS</h4>
	<p style="color: rgba(255, 255, 255, 0.8); margin: 0; font-size: 14px;">🔑 Requires API key<br>🎯 Professional quality<br>💰 Pay per use<br>📝 4,000 char limit</p>
	</div>
	</div>
	<div style="margin-top: 20px; padding: 15px; background: rgba(46, 204, 113, 0.1); border-radius: 8px; border: 1px solid rgba(46, 204, 113, 0.3);">
	<p style="color: rgba(255, 255, 255, 0.9); margin: 0; font-size: 16px;"><strong>💡 Pro Tip:</strong> Use "Auto (Try All)" to automatically find the best available provider, or stick with "Edge TTS (Free)" for consistent, high-quality results without any setup!</p>
	</div>
	</div>
	""")

	# Launch configuration
	if __name__ == "__main__":
	# Check if we're in HF Spaces and adjust accordingly
	if is_huggingface_space():
	# HF Spaces configuration
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False, # HF Spaces handles sharing
	show_error=True,
	# show_tips=True,
	enable_monitoring=False, # Disable monitoring in HF Spaces
	favicon_path=None,
	app_kwargs={
	"docs_url": None, # Disable docs endpoint
	"redoc_url": None, # Disable redoc endpoint
	}
	)
	else:
	# Local development configuration
	demo.launch(
	share=True
	)