import os import gradio as gr from groq import Groq import tempfile import requests import json import base64 from pathlib import Path import time import sys import platform # Available English voices for different providers groq_voices = [ "Arista-PlayAI", "Atlas-PlayAI", "Basil-PlayAI", "Briggs-PlayAI", "Calum-PlayAI", "Celeste-PlayAI", "Cheyenne-PlayAI", "Chip-PlayAI", "Cillian-PlayAI", "Deedee-PlayAI", "Fritz-PlayAI", "Gail-PlayAI", "Indigo-PlayAI", "Mamaw-PlayAI", "Mason-PlayAI", "Mikail-PlayAI", "Mitch-PlayAI", "Quinn-PlayAI", "Thunder-PlayAI" ] # OpenAI voices (if user has OpenAI API key) openai_voices = ["alloy", "ash", "coral", "sage", "echo", "fable", "onyx", "nova", "shimmer"] # Edge TTS voices (free, no API key required) edge_voices = [ "en-US-AriaNeural", "en-US-AnaNeural", "en-US-ChristopherNeural", "en-US-EricNeural", "en-US-GuyNeural", "en-US-JennyNeural", "en-US-MichelleNeural", "en-US-RogerNeural", "en-US-SteffanNeural" ] def check_internet_connection(): """Check if internet connection is available""" try: import socket socket.create_connection(("8.8.8.8", 53), timeout=3) return True except OSError: return False def is_huggingface_space(): """Check if running on Hugging Face Spaces""" return os.environ.get("SPACE_ID") is not None def chunk_text(text, max_length=4000): """Split text into chunks to avoid rate limits""" if len(text) <= max_length: return [text] chunks = [] words = text.split() current_chunk = [] current_length = 0 for word in words: if current_length + len(word) + 1 <= max_length: current_chunk.append(word) current_length += len(word) + 1 else: if current_chunk: chunks.append(" ".join(current_chunk)) current_chunk = [word] current_length = len(word) if current_chunk: chunks.append(" ".join(current_chunk)) return chunks def try_groq_tts(api_key, text, voice): """Try Groq TTS with chunking and retry logic""" try: if not check_internet_connection(): return None, "❌ No internet connection available for Groq TTS" client = Groq(api_key=api_key) # Check if text needs chunking chunks = chunk_text(text, 3500) # Leave some buffer if len(chunks) == 1: # Single chunk - direct call response = client.audio.speech.create( model="playai-tts", voice=voice, input=text, response_format="wav" ) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: response.write_to_file(temp_file.name) return temp_file.name, "✅ Speech generated successfully with Groq PlayAI!" else: # Multiple chunks - process separately and inform user return None, f"⚠️ Text too long for single request ({len(text)} chars). Try shorter text or use Edge TTS for longer content." except Exception as e: error_msg = str(e) if "rate_limit_exceeded" in error_msg or "429" in error_msg: return None, "🔄 Groq rate limit reached. Try again in a moment or use Edge TTS..." else: return None, f"❌ Groq error: {error_msg}" def try_openai_tts(api_key, text, voice): """Try OpenAI TTS as fallback""" try: if not check_internet_connection(): return None, "❌ No internet connection available for OpenAI TTS" import openai client = openai.OpenAI(api_key=api_key) response = client.audio.speech.create( model="tts-1", voice=voice, input=text[:4000] # OpenAI limit ) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: response.stream_to_file(temp_file.name) return temp_file.name, "✅ Speech generated successfully with OpenAI TTS!" except ImportError: return None, "❌ OpenAI library not installed. Install with: pip install openai" except Exception as e: return None, f"❌ OpenAI error: {str(e)}" def try_edge_tts(text, voice="en-US-JennyNeural"): """Try Microsoft Edge TTS as free fallback""" try: if not check_internet_connection(): return None, "❌ No internet connection available for Edge TTS" import edge_tts import asyncio async def generate_edge_speech(): communicate = edge_tts.Communicate(text[:10000], voice) # Edge TTS can handle longer text with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file: async for chunk in communicate.stream(): if chunk["type"] == "audio": temp_file.write(chunk["data"]) return temp_file.name # Run async function try: loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) audio_file = loop.run_until_complete(generate_edge_speech()) return audio_file, "✅ Speech generated successfully with Microsoft Edge TTS (Free & High Quality)!" except ImportError: return None, "❌ Edge TTS not installed. Install with: pip install edge-tts" except Exception as e: return None, f"❌ Edge TTS error: {str(e)}" def try_pyttsx3_fallback(text): """Ultimate fallback using pyttsx3 (offline) - Only works in local environments""" if is_huggingface_space(): return None, "❌ Offline TTS not available in Hugging Face Spaces. Use Edge TTS (Free) instead!" try: import pyttsx3 # Initialize the engine engine = pyttsx3.init() # Get available voices and set a good one voices = engine.getProperty('voices') if voices: # Try to find a female English voice first, then any English voice english_voice = None for voice in voices: voice_id = voice.id.lower() voice_name = voice.name.lower() if hasattr(voice, 'name') else "" # Look for English voices if any(keyword in voice_id or keyword in voice_name for keyword in ['english', 'en-us', 'en_us', 'zira', 'hazel', 'eva']): english_voice = voice.id if any(female_keyword in voice_name for female_keyword in ['zira', 'hazel', 'eva', 'female']): break # Prefer female voices if english_voice: engine.setProperty('voice', english_voice) # Set properties for better quality engine.setProperty('rate', 180) # Speed of speech (words per minute) engine.setProperty('volume', 0.9) # Volume level (0.0 to 1.0) # Create temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: temp_path = temp_file.name # Limit text length for stability limited_text = text[:2000] # Limit for stability # Use the save_to_file method correctly engine.save_to_file(limited_text, temp_path) engine.runAndWait() # Check if file was created and has content if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: return temp_path, "✅ Speech generated with offline TTS! (No internet required)" else: # If save_to_file didn't work, try alternative method return try_alternative_offline_tts(limited_text) except ImportError: return None, "❌ pyttsx3 not installed. Use Edge TTS (Free) instead!" except Exception as e: # Try alternative offline method if pyttsx3 fails return try_alternative_offline_tts(text[:2000]) def try_alternative_offline_tts(text): """Alternative offline TTS using system commands - Only for local environments""" if is_huggingface_space(): return None, "❌ System TTS not available in Hugging Face Spaces. Use Edge TTS (Free) instead!" try: system = platform.system().lower() with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: temp_path = temp_file.name if system == "windows": # Windows SAPI TTS try: import win32com.client speaker = win32com.client.Dispatch("SAPI.SpVoice") # Save to file file_stream = win32com.client.Dispatch("SAPI.SpFileStream") file_stream.Open(temp_path, 3) speaker.AudioOutputStream = file_stream speaker.Speak(text) file_stream.Close() if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: return temp_path, "✅ Speech generated with Windows SAPI TTS (Offline)!" except ImportError: pass elif system == "darwin": # macOS # Use macOS 'say' command import subprocess try: subprocess.run(['say', '-o', temp_path, '--data-format=LEF32@22050', text], check=True, timeout=30) if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: return temp_path, "✅ Speech generated with macOS TTS (Offline)!" except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): pass elif system == "linux": # Try espeak or festival on Linux import subprocess try: # Try espeak first subprocess.run(['espeak', '-w', temp_path, text], check=True, timeout=30) if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: return temp_path, "✅ Speech generated with espeak TTS (Offline)!" except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): try: # Try festival as backup subprocess.run(['text2wave', '-o', temp_path], input=text, text=True, check=True, timeout=30) if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0: return temp_path, "✅ Speech generated with Festival TTS (Offline)!" except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): pass return None, f"❌ No offline TTS available for {system}. Use Edge TTS (Free) instead!" except Exception as e: return None, f"❌ Alternative offline TTS error: {str(e)}" # Main function with multiple provider fallback def generate_speech(groq_api_key, openai_api_key, text, voice_provider, voice): if not text: return None, "⚠️ Please enter some text to generate speech." if len(text) > 10000: return None, "🚫 Text input exceeds 10,000 character limit." # Check internet connection and environment has_internet = check_internet_connection() is_hf_space = is_huggingface_space() internet_status = "🌐 Internet: Connected" if has_internet else "📴 Internet: Offline" env_status = " (Hugging Face Spaces)" if is_hf_space else " (Local Environment)" # Status message status_msg = f"🔄 Generating speech...\n{internet_status}{env_status}\n" # If no internet, inform user about limitations if not has_internet: return None, status_msg + "❌ No internet connection available. All TTS providers require internet connection in this environment." # Try providers in order based on selection if voice_provider == "Groq (PlayAI)" and groq_api_key: status_msg += "🎯 Using Groq PlayAI...\n" audio_file, message = try_groq_tts(groq_api_key, text, voice) if audio_file: return audio_file, status_msg + message else: status_msg += f"❌ Groq failed: {message}\n" # Try OpenAI if available if openai_api_key and voice_provider in ["OpenAI TTS", "Auto (Try All)"]: status_msg += "🔄 Trying OpenAI TTS...\n" openai_voice = voice if voice in openai_voices else "alloy" audio_file, message = try_openai_tts(openai_api_key, text, openai_voice) if audio_file: return audio_file, status_msg + message else: status_msg += f"❌ OpenAI failed: {message}\n" # Try Edge TTS (Free) - This should be the main fallback for HF Spaces if voice_provider in ["Edge TTS (Free)", "Auto (Try All)", "Groq (PlayAI)", "OpenAI TTS"]: status_msg += "🔄 Using Edge TTS (Free & High Quality)...\n" edge_voice = voice if voice in edge_voices else "en-US-JennyNeural" audio_file, message = try_edge_tts(text, edge_voice) if audio_file: return audio_file, status_msg + message else: status_msg += f"❌ Edge TTS failed: {message}\n" # Only try offline TTS if specifically requested and not in HF Spaces if voice_provider == "Offline TTS": if is_hf_space: return None, status_msg + "❌ Offline TTS is not available in Hugging Face Spaces. Please use 'Edge TTS (Free)' instead!" else: status_msg += "🔄 Using offline TTS (works without internet)...\n" audio_file, message = try_pyttsx3_fallback(text) if audio_file: return audio_file, status_msg + message else: status_msg += f"❌ Offline TTS failed: {message}\n" # Final fallback message if is_hf_space: return None, status_msg + "❌ All online TTS providers failed. Please check your API keys or try again later. Edge TTS (Free) is recommended for HF Spaces!" else: return None, status_msg + "❌ All TTS providers failed. Please check your setup or try shorter text." def update_voice_options(provider): """Update voice dropdown based on selected provider""" if provider == "Groq (PlayAI)": return gr.Dropdown(choices=groq_voices, value="Fritz-PlayAI", visible=True) elif provider == "OpenAI TTS": return gr.Dropdown(choices=openai_voices, value="alloy", visible=True) elif provider == "Edge TTS (Free)": return gr.Dropdown(choices=edge_voices, value="en-US-JennyNeural", visible=True) elif provider == "Offline TTS": return gr.Dropdown(choices=["Default System Voice"], value="Default System Voice", visible=True) else: # Auto return gr.Dropdown(choices=groq_voices, value="Fritz-PlayAI", visible=True, label="🎭 Voice (Auto mode will try best match)") # Custom CSS (keeping your original beautiful design) custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); * { box-sizing: border-box; } :root { --primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%); --secondary-gradient: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); --accent-gradient: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); --dark-bg: linear-gradient(135deg, #0c0c0c 0%, #1a1a2e 50%, #16213e 100%); --glass-bg: rgba(255, 255, 255, 0.08); --glass-border: rgba(255, 255, 255, 0.15); --text-primary: #ffffff; --text-secondary: rgba(255, 255, 255, 0.7); --shadow-primary: 0 8px 32px rgba(0, 0, 0, 0.4); --shadow-hover: 0 12px 48px rgba(0, 0, 0, 0.6); --border-radius: 16px; } body, .gradio-container { background: var(--dark-bg) !important; font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; color: var(--text-primary) !important; overflow-x: hidden; } .gradio-container { max-width: 1400px !important; margin: 40px auto !important; padding: 0 20px !important; min-height: 100vh; } .gradio-container > div { background: var(--glass-bg) !important; backdrop-filter: blur(20px) !important; border: 1px solid var(--glass-border) !important; border-radius: var(--border-radius) !important; padding: 40px !important; box-shadow: var(--shadow-primary) !important; position: relative; overflow: hidden; } h1 { text-align: center !important; font-size: clamp(2.5rem, 5vw, 4rem) !important; font-weight: 700 !important; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; background-clip: text !important; margin: 0 0 50px 0 !important; animation: glow 3s ease-in-out infinite alternate; } @keyframes glow { 0% { filter: drop-shadow(0 0 20px rgba(102, 126, 234, 0.3)); } 100% { filter: drop-shadow(0 0 30px rgba(118, 75, 162, 0.5)); } } #generate-btn { background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important; border: 2px solid #4facfe !important; color: #ffffff !important; font-weight: bold !important; box-shadow: 0 0 12px rgba(0, 242, 254, 0.5) !important; transition: all 0.3s ease !important; margin-top: 26px !important; } #generate-btn:hover { background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important; transform: scale(1.03) !important; box-shadow: 0 0 18px rgba(0, 242, 254, 0.7) !important; } .gr-textbox, .gr-dropdown { background: var(--glass-bg) !important; border: 2px solid transparent !important; border-radius: 12px !important; position: relative; transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important; backdrop-filter: blur(10px) !important; } .gr-textbox textarea, .gr-textbox input, .gr-dropdown select { background: transparent !important; color: var(--text-primary) !important; border: none !important; font-size: 16px !important; font-weight: 400 !important; padding: 16px !important; } .gr-input-label, .gr-output-label { color: var(--text-primary) !important; font-weight: 600 !important; font-size: 14px !important; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 12px !important; background: var(--accent-gradient); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; } .gr-button { background: var(--primary-gradient) !important; border: none !important; color: white !important; font-weight: 600 !important; font-size: 16px !important; padding: 16px 32px !important; border-radius: 12px !important; transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important; text-transform: uppercase; letter-spacing: 0.5px; box-shadow: 0 4px 20px rgba(102, 126, 234, 0.3) !important; margin-top: 20px !important; } .gr-button:hover { transform: translateY(-2px) !important; box-shadow: var(--shadow-hover) !important; filter: brightness(1.1); } .gr-audio { background: var(--glass-bg) !important; border: 1px solid var(--glass-border) !important; border-radius: 12px !important; padding: 20px !important; backdrop-filter: blur(10px) !important; transition: all 0.3s ease !important; } .info-box { background: rgba(0, 242, 254, 0.1) !important; border: 1px solid rgba(0, 242, 254, 0.3) !important; border-radius: 12px !important; padding: 20px !important; margin: 20px 0 !important; backdrop-filter: blur(10px) !important; } .info-box, .info-box * { color: white !important; } .warning-box, .warning-box * { color: white !important; } .no-api-box { background: rgba(46, 204, 113, 0.1) !important; border: 1px solid rgba(46, 204, 113, 0.3) !important; border-radius: 12px !important; padding: 20px !important; margin: 20px 0 !important; backdrop-filter: blur(10px) !important; } .no-api-box, .no-api-box * { color: white !important; } .warning-box { background: rgba(255, 193, 7, 0.1) !important; border: 1px solid rgba(255, 193, 7, 0.3) !important; border-radius: 12px !important; padding: 15px !important; margin: 15px 0 !important; backdrop-filter: blur(10px) !important; } .hf-space-box { background: rgba(255, 87, 34, 0.1) !important; border: 1px solid rgba(255, 87, 34, 0.3) !important; border-radius: 12px !important; padding: 20px !important; margin: 20px 0 !important; backdrop-filter: blur(10px) !important; } .hf-space-box, .hf-space-box * { color: white !important; } """ # Updated HTML for Hugging Face Spaces def get_custom_html(): is_hf = is_huggingface_space() if is_hf: return """
Transform your text into natural-sounding speech with multiple AI-powered voice providers
Recommended: Use "Edge TTS (Free)" for best results!
✅ High-quality voices • ✅ No API key needed • ✅ Up to 10,000 characters • ✅ Multiple voice options
⚠️ Offline TTS is not available in cloud environments
Select "Edge TTS (Free)" for high-quality TTS without any API keys!
✅ Microsoft's premium voices • ✅ Completely free • ✅ Works great in HF Spaces
🔄 Multiple Providers Available: Groq PlayAI, OpenAI TTS, Microsoft Edge TTS (Free)
🛡️ Smart Fallback System: If one provider fails, automatically tries the next available
💡 Recommended: Use "Auto (Try All)" for maximum reliability or "Edge TTS (Free)" for consistent quality
Transform your text into natural-sounding speech with multiple AI-powered voice providers
All TTS options available including offline TTS!
✅ Edge TTS (Free) • ✅ Offline TTS • ✅ Premium APIs • ✅ Full feature set
Select "Edge TTS (Free)" for online high-quality TTS or "Offline TTS" for no internet!
✅ Edge TTS: High quality voices with internet • ✅ Offline TTS: Basic quality, works anywhere
🔄 Multiple Providers Available: Groq PlayAI, OpenAI TTS, Microsoft Edge TTS (Free), Offline TTS
🛡️ Smart Fallback System: If one provider fails, automatically tries the next available
💡 Recommended: Use "Auto (Try All)" for maximum reliability or specific providers based on your needs
Best Option: Select "Edge TTS (Free)" below - no API key needed!
💡 Pro tip: Edge TTS provides Microsoft's premium voice quality completely free
Free Options: "Edge TTS (Free)" (internet required) or "Offline TTS" (no internet needed)
Premium Options: Add your Groq or OpenAI API keys above for additional voice options
✅ No API key required
✅ High quality Microsoft voices
✅ Up to 10,000 characters
✅ Perfect for HF Spaces
🔑 Requires API key
🎪 Premium entertainment voices
⚡ Fast generation
📝 ~3,500 char limit
🔑 Requires API key
🎯 Professional quality
💰 Pay per use
📝 4,000 char limit
💡 Pro Tip: Use "Auto (Try All)" to automatically find the best available provider, or stick with "Edge TTS (Free)" for consistent, high-quality results without any setup!