KavyaBansal commited on
Commit
292a85f
·
verified ·
1 Parent(s): a465989

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -0
app.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
4
+ import librosa
5
+ from gtts import gTTS
6
+ import numpy as np
7
+ import tempfile
8
+ import os
9
+
10
+ # Device configuration
11
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
12
+ print(f"Using device: {DEVICE}")
13
+
14
+ class EmotionAwareTranscriber:
15
+ def __init__(self, model_size="base"): # Fixed method name from _init_ to __init__
16
+ print("Initializing models...")
17
+
18
+ # Initialize Whisper
19
+ self.processor = WhisperProcessor.from_pretrained(f"openai/whisper-{model_size}")
20
+ self.model = WhisperForConditionalGeneration.from_pretrained(f"openai/whisper-{model_size}").to(DEVICE)
21
+
22
+ # Initialize emotion classifier
23
+ self.emotion_classifier = pipeline(
24
+ "text-classification",
25
+ model="j-hartmann/emotion-english-distilroberta-base",
26
+ top_k=1
27
+ )
28
+
29
+ # Response templates
30
+ self.response_templates = {
31
+ 'happy': {
32
+ 'motivational': ["Your joy is absolutely contagious! This kind of positive energy is what makes life worth living..."],
33
+ 'calm': ["I can feel the warmth of your happiness radiating through your words..."],
34
+ 'energetic': ["OH MY GOODNESS THIS IS INCREDIBLE NEWS!!! LET'S CELEBRATE!!!"],
35
+ 'angry': ["How can you be happy when there's so much suffering in the world?"]
36
+ },
37
+ 'sad': {
38
+ 'motivational': ["I hear the sadness in your voice, and I want you to know that this feeling won't last forever..."],
39
+ 'calm': ["I sense your heavy heart, and I'm here with you in this moment..."],
40
+ 'energetic': ["HEY! I know you're feeling DOWN right now, but LISTEN UP! YOU'VE GOT THIS!!"],
41
+ 'angry': ["Stop wallowing and do something productive!"]
42
+ },
43
+ 'angry': {
44
+ 'motivational': ["That passion can fuel positive change! Let's channel this energy constructively..."],
45
+ 'calm': ["I sense your anger. Let's take a deep breath and find solutions..."],
46
+ 'energetic': ["YEAH! I FEEL THAT TOO! NOW LET'S DO SOMETHING ABOUT IT!!"],
47
+ 'angry': ["You think YOU'RE angry? The whole system is broken!"]
48
+ },
49
+ 'disgust': {
50
+ 'motivational': ["I can tell you're feeling repulsed. Sometimes disgust protects us from harmful things..."],
51
+ 'calm': ["I sense your strong distaste. Let's remove ourselves from this situation..."],
52
+ 'energetic': ["EW EW EW!!! GROSS ALERT! LET'S GET AWAY FROM THAT RIGHT NOW!!"],
53
+ 'angry': ["This is ABSOLUTELY DISGUSTING and UNACCEPTABLE!"]
54
+ },
55
+ 'fear': {
56
+ 'motivational': ["It's understandable to feel scared. Remember you've survived tough times before..."],
57
+ 'calm': ["Fear is a natural response. Let's assess the situation calmly..."],
58
+ 'energetic': ["DANGER ALERT! BUT WAIT - LET'S MAKE A SAFETY PLAN!!"],
59
+ 'angry': ["Stop being such a coward!"]
60
+ },
61
+ 'neutral': {
62
+ 'motivational': ["I appreciate you sharing this with me. Every day brings new opportunities..."],
63
+ 'calm': ["Thank you for expressing yourself. I'm here to listen..."],
64
+ 'energetic': ["LET'S FIND SOMETHING EXCITING TO DO RIGHT NOW!!"],
65
+ 'angry': ["Is that all? How utterly boring."]
66
+ },
67
+ 'surprise': {
68
+ 'motivational': ["Unexpected moments can be life's greatest gifts!..."],
69
+ 'calm': ["I sense your surprise. Let's observe what unfolds..."],
70
+ 'energetic': ["WHOA! NO WAY! THAT'S INCREDIBLE!!!"],
71
+ 'angry': ["Why are you surprised? You should have seen this coming!"]
72
+ },
73
+ 'tired': {
74
+ 'motivational': ["Rest is revolutionary. Recharge and return stronger..."],
75
+ 'calm': ["Fatigue is natural. Honor your need for rest..."],
76
+ 'energetic': ["DON'T QUIT! PUSH THROUGH! YOU'RE ALMOST THERE!!"],
77
+ 'angry': ["Tired? That's pathetic! Winners never rest!"]
78
+ }
79
+ }
80
+
81
+ def detect_emotion(self, text):
82
+ try:
83
+ result = self.emotion_classifier(text)[0][0]
84
+ emotion = result['label'].lower()
85
+
86
+ # Manual checks
87
+ disgust_keywords = ['disgusting', 'gross', 'revolting']
88
+ if any(kw in text.lower() for kw in disgust_keywords):
89
+ return 'disgust'
90
+
91
+ tired_keywords = ['exhausted', 'tired', 'sleepy']
92
+ if any(kw in text.lower() for kw in tired_keywords):
93
+ return 'tired'
94
+
95
+ return emotion
96
+ except Exception as e:
97
+ print(f"Emotion detection error: {e}")
98
+ return 'neutral'
99
+
100
+ def generate_response(self, text, emotion, style):
101
+ try:
102
+ if emotion not in self.response_templates:
103
+ emotion = 'neutral'
104
+ if style not in self.response_templates[emotion]:
105
+ style = 'motivational'
106
+ return np.random.choice(self.response_templates[emotion][style])
107
+ except Exception as e:
108
+ print(f"Response generation error: {e}")
109
+ return "I appreciate you sharing this with me."
110
+
111
+ def text_to_speech(self, text, style="motivational"):
112
+ try:
113
+ voice_params = {
114
+ 'motivational': {'lang': 'en', 'slow': False},
115
+ 'calm': {'lang': 'en', 'slow': True},
116
+ 'energetic': {'lang': 'en-uk', 'slow': False},
117
+ 'angry': {'lang': 'en-au', 'slow': False}
118
+ }.get(style, {'lang': 'en'})
119
+
120
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
121
+ tts = gTTS(text=text, **voice_params)
122
+ tts.save(fp.name)
123
+ return fp.name
124
+ except Exception as e:
125
+ print(f"TTS error: {e}")
126
+ return None
127
+
128
+ def process_audio(self, audio_path, style):
129
+ try:
130
+ # Transcribe
131
+ waveform, _ = librosa.load(audio_path, sr=16000)
132
+ input_features = self.processor(waveform, sampling_rate=16000, return_tensors="pt").input_features.to(DEVICE)
133
+ predicted_ids = self.model.generate(input_features, max_length=200)
134
+ transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
135
+
136
+ # Detect emotion
137
+ emotion = self.detect_emotion(transcription)
138
+
139
+ # Generate response
140
+ response = self.generate_response(transcription, emotion, style)
141
+
142
+ # Convert to speech
143
+ audio_output = self.text_to_speech(response, style)
144
+
145
+ return {
146
+ "transcription": transcription,
147
+ "emotion": emotion,
148
+ "response": response,
149
+ "audio": audio_output
150
+ }
151
+ except Exception as e:
152
+ print(f"Processing error: {e}")
153
+ return {
154
+ "transcription": "Error processing audio",
155
+ "emotion": "neutral",
156
+ "response": "Sorry, something went wrong",
157
+ "audio": None
158
+ }
159
+
160
+ # Add installation code for Google Colab
161
+ def install_dependencies():
162
+ print("Installing required packages...")
163
+ import subprocess
164
+
165
+ # Install required packages
166
+ subprocess.run(["pip", "install", "gradio", "torch", "transformers", "librosa", "gtts", "numpy"])
167
+
168
+ # Check if ffmpeg is installed, and install if needed
169
+ try:
170
+ import ffmpeg
171
+ except ImportError:
172
+ print("Installing ffmpeg...")
173
+ subprocess.run(["apt-get", "update", "-qq"])
174
+ subprocess.run(["apt-get", "install", "-y", "-qq", "ffmpeg"])
175
+
176
+ print("Dependencies installed successfully.")
177
+
178
+ # Initialize the transcriber
179
+ process_audio_wrapper.last_audio = None # Initialize the class attribute
180
+
181
+ def process_audio_wrapper(audio_path, style):
182
+ result = transcriber.process_audio(audio_path, style)
183
+
184
+ # Clean up previous audio files
185
+ if hasattr(process_audio_wrapper, "last_audio") and process_audio_wrapper.last_audio:
186
+ try:
187
+ os.unlink(process_audio_wrapper.last_audio)
188
+ except:
189
+ pass
190
+ process_audio_wrapper.last_audio = result["audio"]
191
+
192
+ return (
193
+ result["transcription"],
194
+ result["emotion"].upper(),
195
+ result["response"],
196
+ result["audio"] if result["audio"] else None
197
+ )
198
+
199
+ # Main execution with proper checks for Colab
200
+ if __name__ == "__main__": # Fixed double underscore
201
+ # Check if running in Colab
202
+ try:
203
+ import google.colab
204
+ IN_COLAB = True
205
+ except:
206
+ IN_COLAB = False
207
+
208
+ if IN_COLAB:
209
+ install_dependencies()
210
+
211
+ # Initialize transcriber after dependencies are installed
212
+ transcriber = EmotionAwareTranscriber()
213
+
214
+ # Gradio interface
215
+ with gr.Blocks(title="Emotion-Aware Audio Transcriber") as demo:
216
+ gr.Markdown("# 🎤 Emotion-Aware Audio Transcriber")
217
+ gr.Markdown("Upload an audio file to get a transcription with emotional analysis and response")
218
+
219
+ with gr.Row():
220
+ audio_input = gr.Audio(label="Upload Audio", type="filepath")
221
+ style_selector = gr.Radio(
222
+ ["motivational", "calm", "energetic", "angry"],
223
+ label="Response Style",
224
+ value="motivational"
225
+ )
226
+ submit_btn = gr.Button("Process", variant="primary")
227
+
228
+ with gr.Column():
229
+ transcription_output = gr.Textbox(label="Transcription")
230
+ emotion_output = gr.Textbox(label="Detected Emotion")
231
+ response_output = gr.Textbox(label="Generated Response")
232
+ audio_output = gr.Audio(label="Spoken Response")
233
+
234
+ submit_btn.click(
235
+ fn=process_audio_wrapper,
236
+ inputs=[audio_input, style_selector],
237
+ outputs=[transcription_output, emotion_output, response_output, audio_output]
238
+ )
239
+
240
+ # Launch with share=True for Colab to generate a public URL
241
+ demo.launch(debug=True, share=True)