ZoyaRabail commited on
Commit
c7cbc7f
Β·
verified Β·
1 Parent(s): c940c0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -40
app.py CHANGED
@@ -1,47 +1,139 @@
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- # Create HTML links that look like buttons
4
- html_template = """
5
- <div style="text-align: center; margin: 20px;">
6
- <a href="{url}" style="
7
- display: inline-block;
8
- padding: 12px 24px;
9
- background-color: #4CAF50;
10
- color: white;
11
- text-decoration: none;
12
- border-radius: 5px;
13
- font-weight: bold;
14
- font-size: 16px;
15
- margin: 10px;
16
- ">{text}</a>
17
- </div>
18
- """
19
-
20
- with gr.Blocks(title="AI Multi-Tool Hub", theme=gr.themes.Soft()) as demo:
21
- gr.Markdown("# πŸ€– AI Multi-Tool Hub")
22
- gr.Markdown("Select a tool to use:")
23
-
24
  with gr.Row():
25
- with gr.Column():
26
- gr.Markdown("### πŸŽ™οΈ Speech to Text")
27
- gr.Markdown("Convert audio to text using Whisper")
28
- gr.HTML(html_template.format(url="/speech_to_text", text="Open Speech to Text"))
29
-
30
- with gr.Column():
31
- gr.Markdown("### 🌐 Translation")
32
- gr.Markdown("Translate text between languages")
33
- gr.HTML(html_template.format(url="/translation", text="Open Translation Tool"))
34
-
35
  with gr.Row():
36
- with gr.Column():
37
- gr.Markdown("### 😊 Emotion Detection")
38
- gr.Markdown("Detect emotions and tone in text")
39
- gr.HTML(html_template.format(url="/emotions", text="Open Emotion Detection"))
40
-
41
- with gr.Column():
42
- gr.Markdown("### πŸ”Š Text to Speech")
43
- gr.Markdown("Convert text to speech")
44
- gr.HTML(html_template.format(url="/text_to_speech", text="Open Text to Speech"))
45
 
46
  if __name__ == "__main__":
47
  demo.launch()
 
1
+ import os
2
  import gradio as gr
3
+ import asyncio
4
+ import tempfile
5
+ import edge_tts
6
+ import requests
7
+ from langdetect import detect, LangDetectException
8
+ from transformers import pipeline, M2M100ForConditionalGeneration, M2M100Tokenizer
9
+
10
+ # ----------------------------
11
+ # 1. SPEECH TO TEXT (Whisper)
12
+ # ----------------------------
13
+ stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-small")
14
+
15
+ def transcribe(audio):
16
+ if audio is None:
17
+ return None
18
+ result = stt_pipeline(audio)
19
+ return result["text"]
20
+
21
+ # ----------------------------
22
+ # 2. TRANSLATION (M2M100)
23
+ # ----------------------------
24
+ m2m_model_name = "facebook/m2m100_418M"
25
+ m2m_tokenizer = M2M100Tokenizer.from_pretrained(m2m_model_name)
26
+ m2m_model = M2M100ForConditionalGeneration.from_pretrained(m2m_model_name)
27
+
28
+ LANG_UI_TO_CODE = {"English": "en", "Spanish": "es", "French": "fr"}
29
+
30
+ def translate_text(user_text, target_lang_ui):
31
+ if not user_text.strip():
32
+ return "⚠️ Please enter text."
33
+ target_code = LANG_UI_TO_CODE.get(target_lang_ui, "en")
34
+ try:
35
+ src_code = detect(user_text)
36
+ except LangDetectException:
37
+ src_code = "en"
38
+ if src_code == target_code:
39
+ return user_text
40
+ m2m_tokenizer.src_lang = src_code
41
+ encoded = m2m_tokenizer(user_text, return_tensors="pt")
42
+ generated = m2m_model.generate(**encoded, forced_bos_token_id=m2m_tokenizer.get_lang_id(target_code))
43
+ return m2m_tokenizer.decode(generated[0], skip_special_tokens=True)
44
+
45
+ # ----------------------------
46
+ # 3. EMOTION DETECTION (Groq API)
47
+ # ----------------------------
48
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
49
+ API_URL = "https://api.groq.ai/v1/text/analyze"
50
+
51
+ def detect_emotion_tone(text):
52
+ if not text.strip():
53
+ return "⚠ No text.", None
54
+ headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
55
+ payload = {"text": text, "features": ["emotion", "tone"]}
56
+ try:
57
+ r = requests.post(API_URL, headers=headers, json=payload)
58
+ r.raise_for_status()
59
+ result = r.json()
60
+ emotions = result.get("emotion", {})
61
+ tones = result.get("tone", {})
62
+ if not emotions:
63
+ return "neutral", "neutral"
64
+ dominant_emotion = max(emotions, key=emotions.get)
65
+ dominant_tone = max(tones, key=tones.get) if tones else "neutral"
66
+ return dominant_emotion, dominant_tone
67
+ except Exception:
68
+ return "neutral", "neutral"
69
+
70
+ # ----------------------------
71
+ # 4. TEXT TO SPEECH (Edge TTS)
72
+ # ----------------------------
73
+ async def text_to_speech(text, voice, rate, pitch):
74
+ if not text.strip():
75
+ return None
76
+ voice_short_name = voice.split(" - ")[0]
77
+ communicate = edge_tts.Communicate(text, voice_short_name, rate=f"{rate:+d}%", pitch=f"{pitch:+d}Hz")
78
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
79
+ await communicate.save(tmp.name)
80
+ return tmp.name
81
+
82
+ def tts_sync(text, voice, rate, pitch):
83
+ return asyncio.run(text_to_speech(text, voice, rate, pitch))
84
+
85
+ # ----------------------------
86
+ # 5. PIPELINE FUNCTION
87
+ # ----------------------------
88
+ async def full_pipeline(audio, target_lang):
89
+ # Step 1: STT
90
+ text = transcribe(audio)
91
+ if not text:
92
+ return "⚠ No speech detected", "", "", None
93
+
94
+ # Step 2: Translate
95
+ translated = translate_text(text, target_lang)
96
+
97
+ # Step 3: Emotion Detection
98
+ emotion, tone = detect_emotion_tone(text)
99
+
100
+ # Step 4: TTS (apply emotion by picking voice tone)
101
+ voices = await edge_tts.list_voices()
102
+ # Simple emotion β†’ voice mapping
103
+ if emotion == "happy":
104
+ voice_choice = [v for v in voices if "en-US-AriaNeural" in v["ShortName"]]
105
+ elif emotion == "sad":
106
+ voice_choice = [v for v in voices if "en-US-JennyNeural" in v["ShortName"]]
107
+ elif emotion == "angry":
108
+ voice_choice = [v for v in voices if "en-US-GuyNeural" in v["ShortName"]]
109
+ else:
110
+ voice_choice = [voices[0]]
111
+ voice_final = f"{voice_choice[0]['ShortName']} - {voice_choice[0]['Locale']}"
112
+
113
+ audio_out = await text_to_speech(translated, voice_final, 0, 0)
114
+
115
+ return text, translated, f"{emotion} / {tone}", audio_out
116
+
117
+ # ----------------------------
118
+ # 6. GRADIO UI
119
+ # ----------------------------
120
+ with gr.Blocks() as demo:
121
+ gr.Markdown("# 🌍 Speech Translator with Emotions")
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  with gr.Row():
124
+ audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Record Speech")
125
+ target_lang = gr.Dropdown(choices=["English", "Spanish", "French"], value="English", label="Translate to")
126
+
 
 
 
 
 
 
 
127
  with gr.Row():
128
+ stt_out = gr.Textbox(label="πŸ“ Recognized Speech", lines=2)
129
+ trans_out = gr.Textbox(label="🌐 Translated Text", lines=2)
130
+
131
+ with gr.Row():
132
+ emotion_out = gr.Textbox(label="😊 Detected Emotion & Tone")
133
+ audio_out = gr.Audio(label="πŸ”Š Final Speech", type="filepath")
134
+
135
+ run_btn = gr.Button("πŸš€ Run Pipeline")
136
+ run_btn.click(fn=full_pipeline, inputs=[audio_in, target_lang], outputs=[stt_out, trans_out, emotion_out, audio_out])
137
 
138
  if __name__ == "__main__":
139
  demo.launch()