Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import tempfile | |
| import time | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # HTML with inline CSS for white background and black text | |
| html_with_css = """ | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <style> | |
| body, .gradio-container { | |
| background: white !important; | |
| color: #333333 !important; | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; | |
| margin: 0; | |
| padding: 20px; | |
| } | |
| .header { | |
| text-align: center; | |
| padding: 2rem; | |
| background: linear-gradient(135deg, #4F46E5 0%, #7C3AED 100%); | |
| border-radius: 16px; | |
| margin-bottom: 2rem; | |
| color: white; | |
| } | |
| .header h1 { | |
| font-size: 2.5em; | |
| margin: 0 0 0.5rem 0; | |
| font-weight: 700; | |
| } | |
| /* BLACK TEXT ON WHITE - MOST IMPORTANT */ | |
| textarea { | |
| background: white !important; | |
| border: 2px solid #4F46E5 !important; | |
| border-radius: 12px !important; | |
| color: #000000 !important; /* Pure black text */ | |
| padding: 1rem !important; | |
| font-size: 16px !important; | |
| width: 100% !important; | |
| min-height: 120px !important; | |
| font-family: monospace !important; | |
| } | |
| textarea::placeholder { | |
| color: #666666 !important; | |
| } | |
| button { | |
| padding: 0.75rem 1.5rem !important; | |
| border-radius: 10px !important; | |
| font-weight: 600 !important; | |
| margin: 0.5rem !important; | |
| cursor: pointer !important; | |
| } | |
| .primary-btn { | |
| background: linear-gradient(135deg, #4F46E5 0%, #7C3AED 100%) !important; | |
| border: none !important; | |
| color: white !important; | |
| } | |
| .secondary-btn { | |
| background: white !important; | |
| border: 2px solid #D1D5DB !important; | |
| color: #374151 !important; | |
| } | |
| .card { | |
| background: white; | |
| border: 1px solid #E5E7EB; | |
| border-radius: 12px; | |
| padding: 1.5rem; | |
| margin-bottom: 1rem; | |
| } | |
| .status-success { | |
| background: #DCFCE7; | |
| border: 1px solid #86EFAC; | |
| border-left: 4px solid #10B981; | |
| color: #065F46; | |
| padding: 1rem; | |
| border-radius: 8px; | |
| margin: 1rem 0; | |
| } | |
| .status-info { | |
| background: #DBEAFE; | |
| border: 1px solid #93C5FD; | |
| border-left: 4px solid #3B82F6; | |
| color: #1E40AF; | |
| padding: 1rem; | |
| border-radius: 8px; | |
| margin: 1rem 0; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="header"> | |
| <h1>π΅ Text-to-Speech</h1> | |
| <p>Convert text to speech with smaller AI model</p> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| print("π Starting TTS System...") | |
| # Try to load a SMALLER TTS model that fits in free tier | |
| def load_small_tts_model(): | |
| """Load a smaller TTS model that fits in Hugging Face Spaces free tier""" | |
| try: | |
| print("π₯ Loading smaller TTS model...") | |
| # Option 1: Try Coqui TTS (smaller footprint) | |
| try: | |
| from TTS.api import TTS | |
| # Using a small multilingual model | |
| tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False) | |
| print("β Loaded Coqui XTTS model") | |
| return ("coqui", tts_model) | |
| except ImportError: | |
| print(" Coqui TTS not available") | |
| # Option 2: Try SpeechT5 (smaller than VibeVoice) | |
| try: | |
| from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan | |
| import torch | |
| processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
| model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") | |
| vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") | |
| # Use CPU to save memory | |
| model = model.to("cpu") | |
| vocoder = vocoder.to("cpu") | |
| print("β Loaded SpeechT5 model (CPU)") | |
| return ("speecht5", {"processor": processor, "model": model, "vocoder": vocoder}) | |
| except Exception as e: | |
| print(f" SpeechT5 failed: {e}") | |
| # Option 3: Try Bark (small and fast) | |
| try: | |
| from transformers import AutoProcessor, BarkModel | |
| import torch | |
| processor = AutoProcessor.from_pretrained("suno/bark-small") | |
| model = BarkModel.from_pretrained("suno/bark-small") | |
| # Use CPU | |
| model = model.to("cpu") | |
| print("β Loaded Bark model (CPU)") | |
| return ("bark", {"processor": processor, "model": model}) | |
| except Exception as e: | |
| print(f" Bark failed: {e}") | |
| print("β οΈ No small TTS model loaded, using gTTS fallback") | |
| return ("gtts", None) | |
| except Exception as e: | |
| print(f"β Error loading models: {e}") | |
| return ("gtts", None) | |
| # Load model | |
| model_type, tts_model = load_small_tts_model() | |
| def generate_with_model(text, speed=1.0): | |
| """Generate speech using the loaded model""" | |
| try: | |
| if not text or not text.strip(): | |
| return None, None | |
| print(f"π Generating: {text[:50]}...") | |
| if model_type == "coqui" and tts_model: | |
| # Coqui TTS | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| tts_model.tts_to_file(text=text, file_path=f.name) | |
| return f.name, 24000 | |
| elif model_type == "speecht5" and tts_model: | |
| # SpeechT5 | |
| processor = tts_model["processor"] | |
| model = tts_model["model"] | |
| vocoder = tts_model["vocoder"] | |
| inputs = processor(text=text, return_tensors="pt") | |
| with torch.no_grad(): | |
| speech = model.generate_speech(inputs["input_ids"], vocoder=vocoder) | |
| audio = speech.numpy() | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| import scipy.io.wavfile | |
| scipy.io.wavfile.write(f.name, 16000, audio.astype(np.float32)) | |
| return f.name, 16000 | |
| elif model_type == "bark" and tts_model: | |
| # Bark | |
| processor = tts_model["processor"] | |
| model = tts_model["model"] | |
| inputs = processor(text, return_tensors="pt") | |
| with torch.no_grad(): | |
| audio_array = model.generate(**inputs) | |
| audio_array = audio_array.cpu().numpy().squeeze() | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| import scipy.io.wavfile | |
| scipy.io.wavfile.write(f.name, 24000, audio_array.astype(np.float32)) | |
| return f.name, 24000 | |
| return None, None | |
| except Exception as e: | |
| print(f"β Model generation error: {e}") | |
| return None, None | |
| def generate_with_gtts(text): | |
| """Fallback to gTTS (requires internet but works well)""" | |
| try: | |
| from gtts import gTTS | |
| with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: | |
| tts = gTTS(text=text, lang='en', slow=False) | |
| tts.save(f.name) | |
| return f.name, "gTTS" | |
| except Exception as e: | |
| print(f"β gTTS error: {e}") | |
| return None, None | |
| def create_basic_audio(text): | |
| """Create basic audio as last resort""" | |
| import scipy.io.wavfile | |
| duration = min(len(text) * 0.05, 5) | |
| sr = 24000 | |
| t = np.linspace(0, duration, int(sr * duration)) | |
| # Create varied audio | |
| base_freq = 220 | |
| audio = np.zeros_like(t) | |
| for i, char in enumerate(text[:20]): | |
| freq = base_freq + (ord(char) % 300) | |
| amp = 0.3 / (i + 1) | |
| audio += amp * np.sin(2 * np.pi * freq * t) | |
| envelope = np.exp(-2 * t) * (1 - np.exp(-8 * t)) | |
| audio *= envelope | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| scipy.io.wavfile.write(f.name, sr, audio.astype(np.float32)) | |
| return f.name, "Basic" | |
| # Create the interface | |
| with gr.Blocks() as demo: | |
| # Add CSS as HTML | |
| gr.HTML(html_with_css) | |
| # Main layout | |
| with gr.Row(): | |
| # Input column | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π Enter Text") | |
| text_input = gr.Textbox( | |
| label="", | |
| placeholder="Type your text here... (Black text on white background)", | |
| lines=5 | |
| ) | |
| with gr.Row(): | |
| speed = gr.Slider( | |
| minimum=0.5, | |
| maximum=2.0, | |
| value=1.0, | |
| step=0.1, | |
| label="Speed" | |
| ) | |
| with gr.Row(): | |
| generate_btn = gr.Button("β¨ Generate Speech", variant="primary") | |
| clear_btn = gr.Button("Clear", variant="secondary") | |
| # Output column | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π§ Audio Output") | |
| audio_output = gr.Audio(type="filepath", label="") | |
| status = gr.HTML(""" | |
| <div class="status-info"> | |
| <strong>Ready</strong><br> | |
| Enter text and click Generate Speech | |
| </div> | |
| """) | |
| # Model info | |
| gr.Markdown("### βΉοΈ System Information") | |
| if model_type == "coqui": | |
| gr.Markdown("β **Model**: Coqui XTTS (Multilingual)") | |
| elif model_type == "speecht5": | |
| gr.Markdown("β **Model**: Microsoft SpeechT5") | |
| elif model_type == "bark": | |
| gr.Markdown("β **Model**: Suno Bark") | |
| elif model_type == "gtts": | |
| gr.Markdown("β οΈ **Model**: gTTS (Fallback - requires internet)") | |
| else: | |
| gr.Markdown("β οΈ **Model**: Basic audio generation") | |
| # Examples | |
| gr.Markdown("### π‘ Examples") | |
| gr.Examples( | |
| examples=[ | |
| ["Hello! Welcome to the text-to-speech system."], | |
| ["This is a demonstration of AI speech synthesis."], | |
| ["The quick brown fox jumps over the lazy dog."], | |
| ["Artificial intelligence is transforming technology."] | |
| ], | |
| inputs=text_input, | |
| label="Click to try:" | |
| ) | |
| # Event handlers | |
| def process_text(text, speed_val): | |
| if not text or not text.strip(): | |
| return None, """ | |
| <div class="status-info"> | |
| <strong>β οΈ Please enter text</strong><br> | |
| Type something in the text box above | |
| </div> | |
| """ | |
| print(f"Processing: {text[:50]}...") | |
| # Try model first | |
| audio_file, sr = generate_with_model(text, speed_val) | |
| source = "AI Model" | |
| # Fallback to gTTS | |
| if audio_file is None: | |
| audio_file, source = generate_with_gtts(text) | |
| # Last resort: basic audio | |
| if audio_file is None: | |
| audio_file, source = create_basic_audio(text) | |
| if audio_file: | |
| message = f""" | |
| <div class="status-success"> | |
| <strong>β Speech Generated!</strong><br> | |
| Source: {source} β’ Characters: {len(text)}<br> | |
| Speed: {speed_val}x | |
| </div> | |
| """ | |
| return audio_file, message | |
| else: | |
| return None, """ | |
| <div class="status-info"> | |
| <strong>β Failed to generate</strong><br> | |
| Please try different text | |
| </div> | |
| """ | |
| def clear_all(): | |
| return "", None, """ | |
| <div class="status-info"> | |
| <strong>Cleared</strong><br> | |
| Ready for new text input | |
| </div> | |
| """ | |
| # Connect buttons | |
| generate_btn.click( | |
| process_text, | |
| [text_input, speed], | |
| [audio_output, status] | |
| ) | |
| clear_btn.click( | |
| clear_all, | |
| [], | |
| [text_input, audio_output, status] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True, | |
| quiet=True | |
| ) |