check / app.py
ZoyaRabail's picture
Update app.py
4fa37f8 verified
import os
import gradio as gr
import asyncio
import tempfile
import edge_tts
import requests
from langdetect import detect, LangDetectException
from transformers import pipeline, M2M100ForConditionalGeneration, M2M100Tokenizer
# ----------------------------
# 1. SPEECH TO TEXT (Whisper)
# ----------------------------
stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small")
def transcribe(audio):
if audio is None:
return None
result = stt_pipeline(audio)
return result["text"]
# ----------------------------
# 2. TRANSLATION (M2M100)
# ----------------------------
m2m_model_name = "facebook/m2m100_418M"
m2m_tokenizer = M2M100Tokenizer.from_pretrained(m2m_model_name)
m2m_model = M2M100ForConditionalGeneration.from_pretrained(m2m_model_name)
LANG_UI_TO_CODE = {"English": "en", "Spanish": "es", "French": "fr"}
def translate_text(user_text, target_lang_ui):
if not user_text.strip():
return ""
target_code = LANG_UI_TO_CODE.get(target_lang_ui, "en")
try:
src_code = detect(user_text)
except LangDetectException:
src_code = "en"
if src_code == target_code:
return user_text
m2m_tokenizer.src_lang = src_code
encoded = m2m_tokenizer(user_text, return_tensors="pt")
generated = m2m_model.generate(**encoded, forced_bos_token_id=m2m_tokenizer.get_lang_id(target_code))
return m2m_tokenizer.decode(generated[0], skip_special_tokens=True)
# ----------------------------
# 3. EMOTION DETECTION (Groq API)
# ----------------------------
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
API_URL = "https://api.groq.ai/v1/text/analyze"
def detect_emotion_tone(text):
if not text.strip():
return "neutral"
headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
payload = {"text": text, "features": ["emotion"]}
try:
r = requests.post(API_URL, headers=headers, json=payload)
r.raise_for_status()
result = r.json()
emotions = result.get("emotion", {})
if not emotions:
return "neutral"
return max(emotions, key=emotions.get)
except Exception:
return "neutral"
# ----------------------------
# 4. TEXT TO SPEECH (Edge TTS)
# ----------------------------
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None
voice_short_name = voice.split(" - ")[0]
communicate = edge_tts.Communicate(text, voice_short_name, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
await communicate.save(tmp.name)
return tmp.name
def tts_sync(text, voice, rate, pitch):
return asyncio.run(text_to_speech(text, voice, rate, pitch))
# ----------------------------
# 5. PIPELINE FUNCTION
# ----------------------------
async def full_pipeline(audio, target_lang):
# Step 1: STT
text = transcribe(audio)
if not text:
return None
# Step 2: Translate
translated = translate_text(text, target_lang)
# Step 3: Emotion Detection
emotion = detect_emotion_tone(text)
# Step 4: Pick voice based on emotion
voices = await edge_tts.list_voices()
if emotion == "happy":
voice_choice = [v for v in voices if "en-US-AriaNeural" in v["ShortName"]]
elif emotion == "sad":
voice_choice = [v for v in voices if "en-US-JennyNeural" in v["ShortName"]]
elif emotion == "angry":
voice_choice = [v for v in voices if "en-US-GuyNeural" in v["ShortName"]]
else:
voice_choice = [voices[0]]
voice_final = f"{voice_choice[0]['ShortName']} - {voice_choice[0]['Locale']}"
# Step 5: Generate final audio
audio_out = await text_to_speech(translated, voice_final, 0, 0)
return audio_out
# ----------------------------
# 6. GRADIO UI
# ----------------------------
with gr.Blocks() as demo:
gr.Markdown("# 🎀 Speech Translator with Emotions")
with gr.Row():
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Record Speech")
target_lang = gr.Dropdown(choices=["English", "Spanish", "French"], value="English", label="Target Language")
final_speech = gr.Audio(label="πŸ”Š Final Speech", type="filepath")
run_btn = gr.Button("πŸš€ Translate & Speak")
run_btn.click(fn=full_pipeline, inputs=[audio_in, target_lang], outputs=[final_speech])
if __name__ == "__main__":
demo.launch()