Devakumar868 commited on
Commit
e1df5c0
·
verified ·
1 Parent(s): e8d6200

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -13
app.py CHANGED
@@ -43,12 +43,12 @@ class ConversationalAI:
43
  device=self.device
44
  )
45
 
46
- # Load CORRECT audio emotion recognition model
47
  self.emotion_model = pipeline(
48
  "audio-classification",
49
- model="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
50
  device=self.device
51
- )[1]
52
 
53
  # Conversation history
54
  self.conversations = {}
@@ -88,13 +88,29 @@ class ConversationalAI:
88
  return f"Transcription error: {str(e)}"
89
 
90
  def recognize_emotion(self, audio_path):
91
- """Recognize emotion from audio using proper audio model"""
92
  try:
93
  if audio_path is None:
94
  return "neutral"
95
 
96
  result = self.emotion_model(audio_path)
97
- return result[0]["label"].lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  except Exception as e:
99
  print(f"Emotion recognition error: {e}")
100
  return "neutral"
@@ -105,9 +121,25 @@ class ConversationalAI:
105
  if text.startswith("Transcription error") or not text.strip():
106
  return "I'm sorry, I couldn't understand what you said. Could you please try again?"
107
 
108
- # Build context-aware prompt
109
- emotion_prompt = f"[User seems {emotion}] " if emotion != "neutral" else ""
110
- prompt = f"{emotion_prompt}User: {text}\nMaya:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  # Tokenize with proper attention mask
113
  inputs = self.llm_tokenizer(
@@ -122,7 +154,7 @@ class ConversationalAI:
122
  outputs = self.llm_model.generate(
123
  input_ids=inputs.input_ids,
124
  attention_mask=inputs.attention_mask,
125
- max_new_tokens=80,
126
  temperature=0.7,
127
  do_sample=True,
128
  pad_token_id=self.llm_tokenizer.eos_token_id,
@@ -135,9 +167,9 @@ class ConversationalAI:
135
  skip_special_tokens=True
136
  ).strip()
137
 
138
- # Clean up response
139
- if not response:
140
- response = "I understand. Could you tell me more about that?"
141
 
142
  return response
143
 
@@ -336,7 +368,7 @@ with gr.Blocks(
336
  outputs=[transcription_output, audio_output, conversation_history]
337
  )
338
 
339
- # Launch the app - FIXED: Removed show_tips parameter
340
  if __name__ == "__main__":
341
  demo.launch(
342
  server_name="0.0.0.0",
 
43
  device=self.device
44
  )
45
 
46
+ # Load WORKING audio emotion recognition model
47
  self.emotion_model = pipeline(
48
  "audio-classification",
49
+ model="superb/wav2vec2-base-superb-er",
50
  device=self.device
51
+ )
52
 
53
  # Conversation history
54
  self.conversations = {}
 
88
  return f"Transcription error: {str(e)}"
89
 
90
  def recognize_emotion(self, audio_path):
91
+ """Recognize emotion from audio using working model"""
92
  try:
93
  if audio_path is None:
94
  return "neutral"
95
 
96
  result = self.emotion_model(audio_path)
97
+ emotion_label = result[0]["label"].lower()
98
+
99
+ # Map SUPERB emotions to common emotions
100
+ emotion_mapping = {
101
+ "ang": "angry",
102
+ "hap": "happy",
103
+ "exc": "excited",
104
+ "sad": "sad",
105
+ "fru": "frustrated",
106
+ "fea": "fearful",
107
+ "sur": "surprised",
108
+ "neu": "neutral",
109
+ "dis": "disgusted"
110
+ }
111
+
112
+ return emotion_mapping.get(emotion_label, emotion_label)
113
+
114
  except Exception as e:
115
  print(f"Emotion recognition error: {e}")
116
  return "neutral"
 
121
  if text.startswith("Transcription error") or not text.strip():
122
  return "I'm sorry, I couldn't understand what you said. Could you please try again?"
123
 
124
+ # Build context-aware prompt with emotion
125
+ emotion_responses = {
126
+ "angry": "I understand you're feeling frustrated. Let me help you with that.",
127
+ "sad": "I can sense you're feeling down. I'm here to listen and support you.",
128
+ "happy": "I love your positive energy! That's wonderful to hear.",
129
+ "excited": "Your enthusiasm is contagious! Tell me more about it.",
130
+ "fearful": "I can hear the concern in your voice. Let's work through this together.",
131
+ "surprised": "That sounds quite unexpected! What happened?",
132
+ "frustrated": "I can tell this is bothering you. Let's see how I can help.",
133
+ "neutral": "I'm listening. Please go on."
134
+ }
135
+
136
+ emotion_context = emotion_responses.get(emotion, "I'm here to help.")
137
+
138
+ # Simple but effective response generation
139
+ if len(text.split()) < 3:
140
+ return f"{emotion_context} Could you tell me more about that?"
141
+
142
+ prompt = f"User ({emotion}): {text}\nMaya (helpful assistant):"
143
 
144
  # Tokenize with proper attention mask
145
  inputs = self.llm_tokenizer(
 
154
  outputs = self.llm_model.generate(
155
  input_ids=inputs.input_ids,
156
  attention_mask=inputs.attention_mask,
157
+ max_new_tokens=60,
158
  temperature=0.7,
159
  do_sample=True,
160
  pad_token_id=self.llm_tokenizer.eos_token_id,
 
167
  skip_special_tokens=True
168
  ).strip()
169
 
170
+ # Clean up and add emotion context if response is empty
171
+ if not response or len(response) < 5:
172
+ return emotion_context
173
 
174
  return response
175
 
 
368
  outputs=[transcription_output, audio_output, conversation_history]
369
  )
370
 
371
+ # Launch the app
372
  if __name__ == "__main__":
373
  demo.launch(
374
  server_name="0.0.0.0",