Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,7 +29,7 @@ LANG_UI_TO_CODE = {"English": "en", "Spanish": "es", "French": "fr"}
|
|
| 29 |
|
| 30 |
def translate_text(user_text, target_lang_ui):
|
| 31 |
if not user_text.strip():
|
| 32 |
-
return "
|
| 33 |
target_code = LANG_UI_TO_CODE.get(target_lang_ui, "en")
|
| 34 |
try:
|
| 35 |
src_code = detect(user_text)
|
|
@@ -50,22 +50,19 @@ API_URL = "https://api.groq.ai/v1/text/analyze"
|
|
| 50 |
|
| 51 |
def detect_emotion_tone(text):
|
| 52 |
if not text.strip():
|
| 53 |
-
return "
|
| 54 |
headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
|
| 55 |
-
payload = {"text": text, "features": ["emotion"
|
| 56 |
try:
|
| 57 |
r = requests.post(API_URL, headers=headers, json=payload)
|
| 58 |
r.raise_for_status()
|
| 59 |
result = r.json()
|
| 60 |
emotions = result.get("emotion", {})
|
| 61 |
-
tones = result.get("tone", {})
|
| 62 |
if not emotions:
|
| 63 |
-
return "neutral"
|
| 64 |
-
|
| 65 |
-
dominant_tone = max(tones, key=tones.get) if tones else "neutral"
|
| 66 |
-
return dominant_emotion, dominant_tone
|
| 67 |
except Exception:
|
| 68 |
-
return "neutral"
|
| 69 |
|
| 70 |
# ----------------------------
|
| 71 |
# 4. TEXT TO SPEECH (Edge TTS)
|
|
@@ -89,17 +86,16 @@ async def full_pipeline(audio, target_lang):
|
|
| 89 |
# Step 1: STT
|
| 90 |
text = transcribe(audio)
|
| 91 |
if not text:
|
| 92 |
-
return
|
| 93 |
|
| 94 |
# Step 2: Translate
|
| 95 |
translated = translate_text(text, target_lang)
|
| 96 |
|
| 97 |
# Step 3: Emotion Detection
|
| 98 |
-
emotion
|
| 99 |
|
| 100 |
-
# Step 4:
|
| 101 |
voices = await edge_tts.list_voices()
|
| 102 |
-
# Simple emotion β voice mapping
|
| 103 |
if emotion == "happy":
|
| 104 |
voice_choice = [v for v in voices if "en-US-AriaNeural" in v["ShortName"]]
|
| 105 |
elif emotion == "sad":
|
|
@@ -110,30 +106,24 @@ async def full_pipeline(audio, target_lang):
|
|
| 110 |
voice_choice = [voices[0]]
|
| 111 |
voice_final = f"{voice_choice[0]['ShortName']} - {voice_choice[0]['Locale']}"
|
| 112 |
|
|
|
|
| 113 |
audio_out = await text_to_speech(translated, voice_final, 0, 0)
|
| 114 |
-
|
| 115 |
-
return text, translated, f"{emotion} / {tone}", audio_out
|
| 116 |
|
| 117 |
# ----------------------------
|
| 118 |
# 6. GRADIO UI
|
| 119 |
# ----------------------------
|
| 120 |
with gr.Blocks() as demo:
|
| 121 |
-
gr.Markdown("#
|
| 122 |
|
| 123 |
with gr.Row():
|
| 124 |
-
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="
|
| 125 |
-
target_lang = gr.Dropdown(choices=["English", "Spanish", "French"], value="English", label="
|
| 126 |
|
| 127 |
-
|
| 128 |
-
stt_out = gr.Textbox(label="π Recognized Speech", lines=2)
|
| 129 |
-
trans_out = gr.Textbox(label="π Translated Text", lines=2)
|
| 130 |
-
|
| 131 |
-
with gr.Row():
|
| 132 |
-
emotion_out = gr.Textbox(label="π Detected Emotion & Tone")
|
| 133 |
-
audio_out = gr.Audio(label="π Final Speech", type="filepath")
|
| 134 |
|
| 135 |
-
run_btn = gr.Button("π
|
| 136 |
-
run_btn.click(fn=full_pipeline, inputs=[audio_in, target_lang], outputs=[
|
| 137 |
|
| 138 |
if __name__ == "__main__":
|
| 139 |
-
demo.launch()
|
|
|
|
| 29 |
|
| 30 |
def translate_text(user_text, target_lang_ui):
|
| 31 |
if not user_text.strip():
|
| 32 |
+
return ""
|
| 33 |
target_code = LANG_UI_TO_CODE.get(target_lang_ui, "en")
|
| 34 |
try:
|
| 35 |
src_code = detect(user_text)
|
|
|
|
| 50 |
|
| 51 |
def detect_emotion_tone(text):
|
| 52 |
if not text.strip():
|
| 53 |
+
return "neutral"
|
| 54 |
headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
|
| 55 |
+
payload = {"text": text, "features": ["emotion"]}
|
| 56 |
try:
|
| 57 |
r = requests.post(API_URL, headers=headers, json=payload)
|
| 58 |
r.raise_for_status()
|
| 59 |
result = r.json()
|
| 60 |
emotions = result.get("emotion", {})
|
|
|
|
| 61 |
if not emotions:
|
| 62 |
+
return "neutral"
|
| 63 |
+
return max(emotions, key=emotions.get)
|
|
|
|
|
|
|
| 64 |
except Exception:
|
| 65 |
+
return "neutral"
|
| 66 |
|
| 67 |
# ----------------------------
|
| 68 |
# 4. TEXT TO SPEECH (Edge TTS)
|
|
|
|
| 86 |
# Step 1: STT
|
| 87 |
text = transcribe(audio)
|
| 88 |
if not text:
|
| 89 |
+
return None
|
| 90 |
|
| 91 |
# Step 2: Translate
|
| 92 |
translated = translate_text(text, target_lang)
|
| 93 |
|
| 94 |
# Step 3: Emotion Detection
|
| 95 |
+
emotion = detect_emotion_tone(text)
|
| 96 |
|
| 97 |
+
# Step 4: Pick voice based on emotion
|
| 98 |
voices = await edge_tts.list_voices()
|
|
|
|
| 99 |
if emotion == "happy":
|
| 100 |
voice_choice = [v for v in voices if "en-US-AriaNeural" in v["ShortName"]]
|
| 101 |
elif emotion == "sad":
|
|
|
|
| 106 |
voice_choice = [voices[0]]
|
| 107 |
voice_final = f"{voice_choice[0]['ShortName']} - {voice_choice[0]['Locale']}"
|
| 108 |
|
| 109 |
+
# Step 5: Generate final audio
|
| 110 |
audio_out = await text_to_speech(translated, voice_final, 0, 0)
|
| 111 |
+
return audio_out
|
|
|
|
| 112 |
|
| 113 |
# ----------------------------
|
| 114 |
# 6. GRADIO UI
|
| 115 |
# ----------------------------
|
| 116 |
with gr.Blocks() as demo:
|
| 117 |
+
gr.Markdown("# π€ Speech Translator with Emotions")
|
| 118 |
|
| 119 |
with gr.Row():
|
| 120 |
+
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Record Speech")
|
| 121 |
+
target_lang = gr.Dropdown(choices=["English", "Spanish", "French"], value="English", label="Target Language")
|
| 122 |
|
| 123 |
+
final_speech = gr.Audio(label="π Final Speech", type="filepath")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
+
run_btn = gr.Button("π Translate & Speak")
|
| 126 |
+
run_btn.click(fn=full_pipeline, inputs=[audio_in, target_lang], outputs=[final_speech])
|
| 127 |
|
| 128 |
if __name__ == "__main__":
|
| 129 |
+
demo.launch()
|