Garvitj commited on
Commit
fed99cb
·
verified ·
1 Parent(s): 618f4e2

Upload analysis.py

Browse files
Files changed (1) hide show
  1. analysis.py +195 -0
analysis.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import librosa
3
+ import numpy as np
4
+ import speech_recognition as sr
5
+ from groq import Groq
6
+ from inference_sdk import InferenceHTTPClient
7
+ from transformers import pipeline
8
+
9
+ # Initialize the voice emotion pipeline once (global)
10
+ # This prevents reloading the model on every function call
11
+ try:
12
+ voice_pipe = pipeline(
13
+ "audio-classification",
14
+ model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
15
+ )
16
+ except Exception as e:
17
+ print(f"Warning: Could not load voice emotion model: {e}")
18
+ voice_pipe = None
19
+
20
+
21
+ def get_facial_emotion(image_path):
22
+ """
23
+ Analyzes facial emotion from an image using Roboflow API.
24
+
25
+ Args:
26
+ image_path: Path to the image file
27
+
28
+ Returns:
29
+ str: Detected emotion (e.g., "happy", "sad", "neutral")
30
+ """
31
+ try:
32
+ # Get API key from environment variable
33
+ api_key = os.getenv("ROBOFLOW_API_KEY")
34
+ if not api_key:
35
+ print("Error: ROBOFLOW_API_KEY not found in environment variables")
36
+ return "neutral"
37
+
38
+ # Initialize Roboflow client
39
+ client = InferenceHTTPClient(
40
+ api_url="https://detect.roboflow.com",
41
+ api_key=api_key
42
+ )
43
+
44
+ # Run inference on the image
45
+ result = client.infer(image_path, model_id="human-face-emotions/28")
46
+
47
+ # Parse response and get top prediction
48
+ if result and "predictions" in result and len(result["predictions"]) > 0:
49
+ top_prediction = result["predictions"][0]
50
+ emotion = top_prediction.get("class", "neutral")
51
+ confidence = top_prediction.get("confidence", 0)
52
+ print(f"Facial emotion detected: {emotion} (confidence: {confidence:.2f})")
53
+ return emotion
54
+ else:
55
+ print("No face detected in image")
56
+ return "neutral"
57
+
58
+ except Exception as e:
59
+ print(f"Error in facial emotion detection: {e}")
60
+ return "neutral"
61
+
62
+
63
+ def get_voice_emotion(audio_path):
64
+ """
65
+ Analyzes vocal emotion from an audio file using Hugging Face transformers.
66
+
67
+ Args:
68
+ audio_path: Path to the audio file
69
+
70
+ Returns:
71
+ str: Detected emotion (e.g., "calm", "angry", "happy")
72
+ """
73
+ try:
74
+ if voice_pipe is None:
75
+ print("Voice emotion model not loaded")
76
+ return "neutral"
77
+
78
+ # Load audio file and resample to 16kHz (required by the model)
79
+ audio_array, sample_rate = librosa.load(audio_path, sr=16000)
80
+
81
+ # Run inference
82
+ result = voice_pipe(audio_array)
83
+
84
+ # Get the highest scoring emotion
85
+ if result and len(result) > 0:
86
+ top_emotion = result[0]
87
+ emotion_label = top_emotion.get("label", "neutral")
88
+ score = top_emotion.get("score", 0)
89
+ print(f"Voice emotion detected: {emotion_label} (score: {score:.2f})")
90
+ return emotion_label
91
+ else:
92
+ return "neutral"
93
+
94
+ except Exception as e:
95
+ print(f"Error in voice emotion detection: {e}")
96
+ return "neutral"
97
+
98
+
99
+ def get_transcript(audio_path):
100
+ """
101
+ Transcribes speech from an audio file using Google Speech Recognition.
102
+
103
+ Args:
104
+ audio_path: Path to the audio file
105
+
106
+ Returns:
107
+ str: Transcribed text, or empty string if transcription fails
108
+ """
109
+ try:
110
+ # Initialize recognizer
111
+ r = sr.Recognizer()
112
+
113
+ # Load audio file
114
+ with sr.AudioFile(audio_path) as source:
115
+ audio_data = r.record(source)
116
+
117
+ # Transcribe using Google Speech Recognition
118
+ text = r.recognize_google(audio_data)
119
+ print(f"Transcription: {text}")
120
+ return text
121
+
122
+ except sr.UnknownValueError:
123
+ print("Could not understand audio")
124
+ return ""
125
+ except sr.RequestError as e:
126
+ print(f"Could not request results from Google Speech Recognition service: {e}")
127
+ return ""
128
+ except Exception as e:
129
+ print(f"Error in transcription: {e}")
130
+ return ""
131
+
132
+
133
+ def get_llm_response(user_query, face, voice, text):
134
+ """
135
+ Generates an empathetic response using Groq LLM based on emotional context.
136
+
137
+ Args:
138
+ user_query: The user's typed query
139
+ face: Detected facial emotion
140
+ voice: Detected vocal emotion
141
+ text: Transcribed speech text
142
+
143
+ Returns:
144
+ str: AI-generated empathetic response
145
+ """
146
+ try:
147
+ # Get API key from environment variable
148
+ api_key = os.getenv("GROQ_API_KEY")
149
+ if not api_key:
150
+ return "Error: GROQ_API_KEY not found in environment variables"
151
+
152
+ # Initialize Groq client
153
+ client = Groq(api_key=api_key)
154
+
155
+ # Create detailed system prompt with emotional context
156
+ system_prompt = f"""You are an empathetic AI assistant that provides thoughtful, caring responses based on the user's emotional state.
157
+
158
+ **Emotional Context Analysis:**
159
+ - Facial Expression: {face}
160
+ - Vocal Tone: {voice}
161
+ - Spoken Words: {text if text else "No speech detected"}
162
+
163
+ **Instructions:**
164
+ 1. First, acknowledge and validate the user's emotional state based on the above indicators
165
+ 2. Show empathy and understanding
166
+ 3. Provide a helpful, supportive answer to their query
167
+ 4. Keep your response warm, genuine, and human-like
168
+ 5. If there are discrepancies between emotional signals, address them sensitively
169
+
170
+ **User's Query:** {user_query}
171
+
172
+ Respond in a natural, conversational manner that demonstrates emotional intelligence."""
173
+
174
+ # Call Groq API
175
+ chat_completion = client.chat.completions.create(
176
+ messages=[
177
+ {
178
+ "role": "system",
179
+ "content": system_prompt
180
+ }
181
+ ],
182
+ model="llama-3.1-8b-instant",
183
+ temperature=0.7,
184
+ max_tokens=1024
185
+ )
186
+
187
+ # Extract and return response
188
+ response = chat_completion.choices[0].message.content
189
+ return response
190
+
191
+ except Exception as e:
192
+ return f"Error generating response: {e}"
193
+
194
+ # The record_audio function has been removed as it is no longer needed.
195
+ # st.audio_recorder in app.py handles audio capture in the browser.