Spaces:

ZoyaRabail
/

check

Sleeping

App Files Files Community

ZoyaRabail commited on Sep 12, 2025

Commit

c7cbc7f

verified ·

1 Parent(s): c940c0a

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -40

app.py CHANGED Viewed

@@ -1,47 +1,139 @@
 import gradio as gr
-# Create HTML links that look like buttons
-html_template = """
-<div style="text-align: center; margin: 20px;">
-    <a href="{url}" style="
-        display: inline-block;
-        padding: 12px 24px;
-        background-color: #4CAF50;
-        color: white;
-        text-decoration: none;
-        border-radius: 5px;
-        font-weight: bold;
-        font-size: 16px;
-        margin: 10px;
-    ">{text}</a>
-</div>
-"""
-with gr.Blocks(title="AI Multi-Tool Hub", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 AI Multi-Tool Hub")
-    gr.Markdown("Select a tool to use:")
     with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 🎙️ Speech to Text")
-            gr.Markdown("Convert audio to text using Whisper")
-            gr.HTML(html_template.format(url="/speech_to_text", text="Open Speech to Text"))
-        with gr.Column():
-            gr.Markdown("### 🌐 Translation")
-            gr.Markdown("Translate text between languages")
-            gr.HTML(html_template.format(url="/translation", text="Open Translation Tool"))
     with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 😊 Emotion Detection")
-            gr.Markdown("Detect emotions and tone in text")
-            gr.HTML(html_template.format(url="/emotions", text="Open Emotion Detection"))
-        with gr.Column():
-            gr.Markdown("### 🔊 Text to Speech")
-            gr.Markdown("Convert text to speech")
-            gr.HTML(html_template.format(url="/text_to_speech", text="Open Text to Speech"))
 if __name__ == "__main__":
     demo.launch()

+import os
 import gradio as gr
+import asyncio
+import tempfile
+import edge_tts
+import requests
+from langdetect import detect, LangDetectException
+from transformers import pipeline, M2M100ForConditionalGeneration, M2M100Tokenizer
+# ----------------------------
+# 1. SPEECH TO TEXT (Whisper)
+# ----------------------------
+stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small")
+def transcribe(audio):
+    if audio is None:
+        return None
+    result = stt_pipeline(audio)
+    return result["text"]
+# ----------------------------
+# 2. TRANSLATION (M2M100)
+# ----------------------------
+m2m_model_name = "facebook/m2m100_418M"
+m2m_tokenizer = M2M100Tokenizer.from_pretrained(m2m_model_name)
+m2m_model = M2M100ForConditionalGeneration.from_pretrained(m2m_model_name)
+LANG_UI_TO_CODE = {"English": "en", "Spanish": "es", "French": "fr"}
+def translate_text(user_text, target_lang_ui):
+    if not user_text.strip():
+        return "⚠️ Please enter text."
+    target_code = LANG_UI_TO_CODE.get(target_lang_ui, "en")
+    try:
+        src_code = detect(user_text)
+    except LangDetectException:
+        src_code = "en"
+    if src_code == target_code:
+        return user_text
+    m2m_tokenizer.src_lang = src_code
+    encoded = m2m_tokenizer(user_text, return_tensors="pt")
+    generated = m2m_model.generate(**encoded, forced_bos_token_id=m2m_tokenizer.get_lang_id(target_code))
+    return m2m_tokenizer.decode(generated[0], skip_special_tokens=True)
+# ----------------------------
+# 3. EMOTION DETECTION (Groq API)
+# ----------------------------
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+API_URL = "https://api.groq.ai/v1/text/analyze"
+def detect_emotion_tone(text):
+    if not text.strip():
+        return "⚠ No text.", None
+    headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
+    payload = {"text": text, "features": ["emotion", "tone"]}
+    try:
+        r = requests.post(API_URL, headers=headers, json=payload)
+        r.raise_for_status()
+        result = r.json()
+        emotions = result.get("emotion", {})
+        tones = result.get("tone", {})
+        if not emotions:
+            return "neutral", "neutral"
+        dominant_emotion = max(emotions, key=emotions.get)
+        dominant_tone = max(tones, key=tones.get) if tones else "neutral"
+        return dominant_emotion, dominant_tone
+    except Exception:
+        return "neutral", "neutral"
+# ----------------------------
+# 4. TEXT TO SPEECH (Edge TTS)
+# ----------------------------
+async def text_to_speech(text, voice, rate, pitch):
+    if not text.strip():
+        return None
+    voice_short_name = voice.split(" - ")[0]
+    communicate = edge_tts.Communicate(text, voice_short_name, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
+        await communicate.save(tmp.name)
+        return tmp.name
+def tts_sync(text, voice, rate, pitch):
+    return asyncio.run(text_to_speech(text, voice, rate, pitch))
+# ----------------------------
+# 5. PIPELINE FUNCTION
+# ----------------------------
+async def full_pipeline(audio, target_lang):
+    # Step 1: STT
+    text = transcribe(audio)
+    if not text:
+        return "⚠ No speech detected", "", "", None
+    # Step 2: Translate
+    translated = translate_text(text, target_lang)
+    # Step 3: Emotion Detection
+    emotion, tone = detect_emotion_tone(text)
+    # Step 4: TTS (apply emotion by picking voice tone)
+    voices = await edge_tts.list_voices()
+    # Simple emotion → voice mapping
+    if emotion == "happy":
+        voice_choice = [v for v in voices if "en-US-AriaNeural" in v["ShortName"]]
+    elif emotion == "sad":
+        voice_choice = [v for v in voices if "en-US-JennyNeural" in v["ShortName"]]
+    elif emotion == "angry":
+        voice_choice = [v for v in voices if "en-US-GuyNeural" in v["ShortName"]]
+    else:
+        voice_choice = [voices[0]]
+    voice_final = f"{voice_choice[0]['ShortName']} - {voice_choice[0]['Locale']}"
+    audio_out = await text_to_speech(translated, voice_final, 0, 0)
+    return text, translated, f"{emotion} / {tone}", audio_out
+# ----------------------------
+# 6. GRADIO UI
+# ----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# 🌍 Speech Translator with Emotions")
     with gr.Row():
+        audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Record Speech")
+        target_lang = gr.Dropdown(choices=["English", "Spanish", "French"], value="English", label="Translate to")
     with gr.Row():
+        stt_out = gr.Textbox(label="📝 Recognized Speech", lines=2)
+        trans_out = gr.Textbox(label="🌐 Translated Text", lines=2)
+    with gr.Row():
+        emotion_out = gr.Textbox(label="😊 Detected Emotion & Tone")
+        audio_out = gr.Audio(label="🔊 Final Speech", type="filepath")
+    run_btn = gr.Button("🚀 Run Pipeline")
+    run_btn.click(fn=full_pipeline, inputs=[audio_in, target_lang], outputs=[stt_out, trans_out, emotion_out, audio_out])
 if __name__ == "__main__":
     demo.launch()