Spaces:

TGPro1
/

STTR

Sleeping

App Files Files Community

STTR commited on Jan 4

Commit

2ceddcf

1 Parent(s): 40e1a06

Add beautiful custom theme and CSS

Browse files

Files changed (1) hide show

app.py +130 -59

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ print(f"🖥️ Device: {device}")
 # Load Models
 # ============================================================
-# SeamlessM4T v2 Large for STT
 print("📥 Loading SeamlessM4T v2 Large...")
 STT_MODEL = "facebook/seamless-m4t-v2-large"
 stt_processor = AutoProcessor.from_pretrained(STT_MODEL)
@@ -28,7 +27,6 @@ stt_model = SeamlessM4Tv2ForSpeechToText.from_pretrained(STT_MODEL)
 stt_model = stt_model.to(device).eval()
 print("✅ SeamlessM4T v2 Large loaded!")
-# NLLB-200 for Translation
 print("📥 Loading NLLB-200...")
 NLLB_MODEL = "facebook/nllb-200-distilled-600M"
 nllb_tokenizer = AutoTokenizer.from_pretrained(NLLB_MODEL)
@@ -36,8 +34,6 @@ nllb_model = AutoModelForSeq2SeqLM.from_pretrained(NLLB_MODEL)
 nllb_model = nllb_model.to(device).eval()
 print("✅ NLLB-200 loaded!")
-print("🎉 All models ready!")
 # ============================================================
 # Language Codes
 # ============================================================
@@ -54,9 +50,6 @@ NLLB_LANGS = {
     "🇯🇵 Japanese": "jpn_Jpan",
     "🇰🇷 Korean": "kor_Hang",
     "🇷🇺 Russian": "rus_Cyrl",
-    "🇹🇷 Turkish": "tur_Latn",
-    "🇳🇱 Dutch": "nld_Latn",
-    "🇮🇳 Hindi": "hin_Deva",
 }
 STT_LANGS = {
@@ -74,7 +67,6 @@ STT_LANGS = {
     "🇷🇺 Russian": "rus",
 }
-# Fish Audio API
 FISH_AUDIO_API_KEY = os.environ.get('FISH_AUDIO_API_KEY', '')
 # ============================================================
@@ -132,17 +124,21 @@ def translate_audio(audio, source_lang, target_lang, enable_voice_clone):
         translation = nllb_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # 3. TTS with Fish Audio
         tts_audio = None
         if FISH_AUDIO_API_KEY:
             tts_audio = generate_tts(translation, enable_voice_clone, audio if enable_voice_clone else None)
         result_text = f"""
-### 🎤 {source_lang}
-{transcript}
-### 🌍 {target_lang}
-{translation}
 """
         return tts_audio, result_text
@@ -159,7 +155,6 @@ def generate_tts(text, clone_voice=False, reference_audio=None):
         headers = {'Authorization': f'Bearer {FISH_AUDIO_API_KEY}'}
         if clone_voice and reference_audio:
-            # Voice cloning
             import tempfile
             import scipy.io.wavfile as wavfile
@@ -188,7 +183,6 @@ def generate_tts(text, clone_voice=False, reference_audio=None):
             os.remove(audio_path)
         else:
-            # Standard TTS
             payload = {
                 'text': text,
                 'format': 'mp3',
@@ -213,54 +207,132 @@ def generate_tts(text, clone_voice=False, reference_audio=None):
         return None
 # ============================================================
-# Gradio Interface
 # ============================================================
-with gr.Blocks(theme=gr.themes.Soft(), title="Instant Translat") as demo:
     gr.Markdown("""
-    # 🌍 Instant Translat - AI Voice Translation
-    **Real-time voice translation powered by Meta AI**
-    - 🎤 **STT**: SeamlessM4T v2 Large (101 languages)
-    - 🌍 **Translation**: NLLB-200 (200 languages + Darija)
-    - 🔊 **TTS**: Fish Audio S1 (Natural voice)
-    - 🎭 **Voice Cloning**: Your voice in any language
     """)
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(
-                label="🎤 Record Your Voice",
                 type="numpy",
-                sources=["microphone"]
-            )
-            source_lang = gr.Dropdown(
-                choices=list(NLLB_LANGS.keys()),
-                value="🇲🇦 Moroccan Arabic (Darija)",
-                label="🗣️ Source Language"
             )
-            target_lang = gr.Dropdown(
-                choices=list(NLLB_LANGS.keys()),
-                value="🇬🇧 English",
-                label="🎯 Target Language"
-            )
             voice_clone = gr.Checkbox(
-                label="🎭 Clone Voice (Use your voice for translation)",
-                value=True
             )
             translate_btn = gr.Button(
-                "🌍 Translate",
                 variant="primary",
-                size="lg"
             )
         with gr.Column(scale=1):
-            audio_output = gr.Audio(label="🔊 Translation Audio")
-            text_output = gr.Markdown(label="📝 Translation Text")
     translate_btn.click(
         translate_audio,
@@ -269,25 +341,24 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Instant Translat") as demo:
     )
     gr.Markdown("""
-    ## 🎯 How to Use
-    1. **Select Languages**: Choose your source and target languages
-    2. **Record**: Click the microphone and speak clearly
-    3. **Translate**: Click the translate button
-    4. **Listen**: Hear the translation in natural voice (or your cloned voice!)
-    ## 🌍 Supported Languages
-    - 🇲🇦 **Moroccan Darija** (Moroccan Arabic)
-    - 🇸🇦 Arabic (MSA)
-    - 🇫🇷 French
-    - 🇬🇧 English
-    - 🇪🇸 Spanish
-    - 🇩🇪 German
-    - And 190+ more languages!
-    ## 🔒 Privacy
-    - No data is stored
-    - Real-time processing
-    - Secure API calls
     """)
 if __name__ == "__main__":

 # Load Models
 # ============================================================
 print("📥 Loading SeamlessM4T v2 Large...")
 STT_MODEL = "facebook/seamless-m4t-v2-large"
 stt_processor = AutoProcessor.from_pretrained(STT_MODEL)
 stt_model = stt_model.to(device).eval()
 print("✅ SeamlessM4T v2 Large loaded!")
 print("📥 Loading NLLB-200...")
 NLLB_MODEL = "facebook/nllb-200-distilled-600M"
 nllb_tokenizer = AutoTokenizer.from_pretrained(NLLB_MODEL)
 nllb_model = nllb_model.to(device).eval()
 print("✅ NLLB-200 loaded!")
 # ============================================================
 # Language Codes
 # ============================================================
     "🇯🇵 Japanese": "jpn_Jpan",
     "🇰🇷 Korean": "kor_Hang",
     "🇷🇺 Russian": "rus_Cyrl",
 }
 STT_LANGS = {
     "🇷🇺 Russian": "rus",
 }
 FISH_AUDIO_API_KEY = os.environ.get('FISH_AUDIO_API_KEY', '')
 # ============================================================
         translation = nllb_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # 3. TTS
         tts_audio = None
         if FISH_AUDIO_API_KEY:
             tts_audio = generate_tts(translation, enable_voice_clone, audio if enable_voice_clone else None)
         result_text = f"""
+<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 12px; margin: 10px 0;">
+    <h3 style="color: white; margin: 0 0 10px 0;">🎤 {source_lang}</h3>
+    <p style="color: white; font-size: 1.1em; margin: 0;">{transcript}</p>
+</div>
+<div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 20px; border-radius: 12px; margin: 10px 0;">
+    <h3 style="color: white; margin: 0 0 10px 0;">🌍 {target_lang}</h3>
+    <p style="color: white; font-size: 1.1em; margin: 0;">{translation}</p>
+</div>
 """
         return tts_audio, result_text
         headers = {'Authorization': f'Bearer {FISH_AUDIO_API_KEY}'}
         if clone_voice and reference_audio:
             import tempfile
             import scipy.io.wavfile as wavfile
             os.remove(audio_path)
         else:
             payload = {
                 'text': text,
                 'format': 'mp3',
         return None
 # ============================================================
+# Custom CSS
+# ============================================================
+custom_css = """
+/* Modern Gradient Background */
+.gradio-container {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    font-family: 'Inter', sans-serif;
+}
+/* Card Style */
+.contain {
+    background: rgba(255, 255, 255, 0.95) !important;
+    border-radius: 20px !important;
+    box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3) !important;
+    padding: 30px !important;
+    backdrop-filter: blur(10px) !important;
+}
+/* Buttons */
+.primary {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    border: none !important;
+    border-radius: 12px !important;
+    padding: 15px 30px !important;
+    font-weight: 600 !important;
+    font-size: 1.1em !important;
+    transition: all 0.3s ease !important;
+}
+.primary:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 10px 25px rgba(102, 126, 234, 0.4) !important;
+}
+/* Input Fields */
+.input-audio, .dropdown {
+    border-radius: 12px !important;
+    border: 2px solid #e0e0e0 !important;
+}
+/* Headers */
+h1, h2, h3 {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-weight: 700;
+}
+/* Markdown Content */
+.markdown-text {
+    line-height: 1.8;
+}
+"""
+# ============================================================
+# Gradio Interface with Custom Theme
 # ============================================================
+theme = gr.themes.Soft(
+    primary_hue="purple",
+    secondary_hue="pink",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Inter"),
+).set(
+    button_primary_background_fill="*primary_500",
+    button_primary_background_fill_hover="*primary_600",
+    button_primary_text_color="white",
+)
+with gr.Blocks(theme=theme, css=custom_css, title="Instant Translat") as demo:
     gr.Markdown("""
+    # 🌍 Instant Translat
+    ### AI-Powered Voice Translation in 200+ Languages
+    Translate your voice instantly with cutting-edge AI. Supports Moroccan Darija and 200+ languages!
     """)
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 🎤 Input")
             audio_input = gr.Audio(
+                label="Record Your Voice",
                 type="numpy",
+                sources=["microphone"],
+                elem_classes="input-audio"
             )
+            with gr.Row():
+                source_lang = gr.Dropdown(
+                    choices=list(NLLB_LANGS.keys()),
+                    value="🇲🇦 Moroccan Arabic (Darija)",
+                    label="🗣️ From",
+                    elem_classes="dropdown"
+                )
+                target_lang = gr.Dropdown(
+                    choices=list(NLLB_LANGS.keys()),
+                    value="🇬🇧 English",
+                    label="🎯 To",
+                    elem_classes="dropdown"
+                )
             voice_clone = gr.Checkbox(
+                label="🎭 Clone My Voice",
+                value=True,
+                info="Hear translation in your own voice"
             )
             translate_btn = gr.Button(
+                "🌍 Translate Now",
                 variant="primary",
+                size="lg",
+                elem_classes="primary"
             )
         with gr.Column(scale=1):
+            gr.Markdown("### 🔊 Output")
+            audio_output = gr.Audio(
+                label="Translation Audio",
+                type="filepath"
+            )
+            text_output = gr.HTML(label="Translation Text")
     translate_btn.click(
         translate_audio,
     )
     gr.Markdown("""
+    ---
+    ## ✨ Features
+    - 🎤 **Speech Recognition** - Powered by Meta's SeamlessM4T v2 Large
+    - 🌍 **Translation** - 200+ languages with NLLB-200
+    - 🔊 **Natural Voice** - Fish Audio S1 TTS
+    - 🎭 **Voice Cloning** - Hear translation in your voice
+    ## 🌍 Popular Languages
+    🇲🇦 Moroccan Darija • 🇸🇦 Arabic • 🇫🇷 French • 🇬🇧 English • 🇪🇸 Spanish • 🇩🇪 German • 🇮🇹 Italian • 🇵🇹 Portuguese • 🇨🇳 Chinese • 🇯🇵 Japanese • 🇰🇷 Korean • 🇷🇺 Russian
+    ---
+    <div style="text-align: center; padding: 20px;">
+        <p style="color: #666;">Made with ❤️ using Meta AI • Powered by HuggingFace</p>
+    </div>
     """)
 if __name__ == "__main__":