emotion_llm / src /analysis.py
Garvitj's picture
Upload 2 files
813d2f0 verified
import os
import librosa
import numpy as np
import speech_recognition as sr
from groq import Groq
from inference_sdk import InferenceHTTPClient
from transformers import pipeline
# Initialize the voice emotion pipeline once (global)
# This prevents reloading the model on every function call
try:
voice_pipe = pipeline(
"audio-classification",
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
)
except Exception as e:
print(f"Warning: Could not load voice emotion model: {e}")
voice_pipe = None
def get_facial_emotion(image_path):
"""
Analyzes facial emotion from an image using Roboflow API.
Args:
image_path: Path to the image file
Returns:
str: Detected emotion (e.g., "happy", "sad", "neutral")
"""
try:
# Get API key from environment variable
api_key = os.getenv("ROBOFLOW_API_KEY")
if not api_key:
print("Error: ROBOFLOW_API_KEY not found in environment variables")
return "neutral"
# Initialize Roboflow client
client = InferenceHTTPClient(
api_url="https://detect.roboflow.com",
api_key=api_key
)
# Run inference on the image
result = client.infer(image_path, model_id="human-face-emotions/28")
# Parse response and get top prediction
if result and "predictions" in result and len(result["predictions"]) > 0:
top_prediction = result["predictions"][0]
emotion = top_prediction.get("class", "neutral")
confidence = top_prediction.get("confidence", 0)
print(f"Facial emotion detected: {emotion} (confidence: {confidence:.2f})")
return emotion
else:
print("No face detected in image")
return "neutral"
except Exception as e:
print(f"Error in facial emotion detection: {e}")
return "neutral"
def get_voice_emotion(audio_path):
"""
Analyzes vocal emotion from an audio file using Hugging Face transformers.
Args:
audio_path: Path to the audio file
Returns:
str: Detected emotion (e.g., "calm", "angry", "happy")
"""
try:
if voice_pipe is None:
print("Voice emotion model not loaded")
return "neutral"
# Load audio file and resample to 16kHz (required by the model)
audio_array, sample_rate = librosa.load(audio_path, sr=16000)
# Run inference
result = voice_pipe(audio_array)
# Get the highest scoring emotion
if result and len(result) > 0:
top_emotion = result[0]
emotion_label = top_emotion.get("label", "neutral")
score = top_emotion.get("score", 0)
print(f"Voice emotion detected: {emotion_label} (score: {score:.2f})")
return emotion_label
else:
return "neutral"
except Exception as e:
print(f"Error in voice emotion detection: {e}")
return "neutral"
def get_transcript(audio_path):
"""
Transcribes speech from an audio file using Google Speech Recognition.
Args:
audio_path: Path to the audio file
Returns:
str: Transcribed text, or empty string if transcription fails
"""
try:
# Initialize recognizer
r = sr.Recognizer()
# Load audio file
with sr.AudioFile(audio_path) as source:
audio_data = r.record(source)
# Transcribe using Google Speech Recognition
text = r.recognize_google(audio_data)
print(f"Transcription: {text}")
return text
except sr.UnknownValueError:
print("Could not understand audio")
return ""
except sr.RequestError as e:
print(f"Could not request results from Google Speech Recognition service: {e}")
return ""
except Exception as e:
print(f"Error in transcription: {e}")
return ""
def get_llm_response(user_query, face, voice, text):
"""
Generates an empathetic response using Groq LLM based on emotional context.
Args:
user_query: The user's typed query
face: Detected facial emotion
voice: Detected vocal emotion
text: Transcribed speech text
Returns:
str: AI-generated empathetic response
"""
try:
# Get API key from environment variable
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
return "Error: GROQ_API_KEY not found in environment variables"
# Initialize Groq client
client = Groq(api_key=api_key)
# Create detailed system prompt with emotional context
system_prompt = f"""You are an empathetic AI assistant that provides thoughtful, caring responses based on the user's emotional state.
**Emotional Context Analysis:**
- Facial Expression: {face}
- Vocal Tone: {voice}
- Spoken Words: {text if text else "No speech detected"}
**Instructions:**
1. First, acknowledge and validate the user's emotional state based on the above indicators
2. Show empathy and understanding
3. Provide a helpful, supportive answer to their query
4. Keep your response warm, genuine, and human-like
5. If there are discrepancies between emotional signals, address them sensitively
**User's Query:** {user_query}
Respond in a natural, conversational manner that demonstrates emotional intelligence."""
# Call Groq API
chat_completion = client.chat.completions.create(
messages=[
{
"role": "system",
"content": system_prompt
}
],
model="llama-3.1-8b-instant",
temperature=0.7,
max_tokens=1024
)
# Extract and return response
response = chat_completion.choices[0].message.content
return response
except Exception as e:
return f"Error generating response: {e}"
# The record_audio function has been removed as it is no longer needed.
# st.audio_recorder in app.py handles audio capture in the browser.