Spaces:

Garvitj
/

emotion_llm_gradio

Build error

App Files Files Community

Garvitj commited on Nov 2, 2025

Commit

fed99cb

verified ·

1 Parent(s): 618f4e2

Upload analysis.py

Browse files

Files changed (1) hide show

analysis.py +195 -0

analysis.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import os
+import librosa
+import numpy as np
+import speech_recognition as sr
+from groq import Groq
+from inference_sdk import InferenceHTTPClient
+from transformers import pipeline
+# Initialize the voice emotion pipeline once (global)
+# This prevents reloading the model on every function call
+try:
+    voice_pipe = pipeline(
+        "audio-classification",
+        model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+    )
+except Exception as e:
+    print(f"Warning: Could not load voice emotion model: {e}")
+    voice_pipe = None
+def get_facial_emotion(image_path):
+    """
+    Analyzes facial emotion from an image using Roboflow API.
+    Args:
+        image_path: Path to the image file
+    Returns:
+        str: Detected emotion (e.g., "happy", "sad", "neutral")
+    """
+    try:
+        # Get API key from environment variable
+        api_key = os.getenv("ROBOFLOW_API_KEY")
+        if not api_key:
+            print("Error: ROBOFLOW_API_KEY not found in environment variables")
+            return "neutral"
+        # Initialize Roboflow client
+        client = InferenceHTTPClient(
+            api_url="https://detect.roboflow.com",
+            api_key=api_key
+        )
+        # Run inference on the image
+        result = client.infer(image_path, model_id="human-face-emotions/28")
+        # Parse response and get top prediction
+        if result and "predictions" in result and len(result["predictions"]) > 0:
+            top_prediction = result["predictions"][0]
+            emotion = top_prediction.get("class", "neutral")
+            confidence = top_prediction.get("confidence", 0)
+            print(f"Facial emotion detected: {emotion} (confidence: {confidence:.2f})")
+            return emotion
+        else:
+            print("No face detected in image")
+            return "neutral"
+    except Exception as e:
+        print(f"Error in facial emotion detection: {e}")
+        return "neutral"
+def get_voice_emotion(audio_path):
+    """
+    Analyzes vocal emotion from an audio file using Hugging Face transformers.
+    Args:
+        audio_path: Path to the audio file
+    Returns:
+        str: Detected emotion (e.g., "calm", "angry", "happy")
+    """
+    try:
+        if voice_pipe is None:
+            print("Voice emotion model not loaded")
+            return "neutral"
+        # Load audio file and resample to 16kHz (required by the model)
+        audio_array, sample_rate = librosa.load(audio_path, sr=16000)
+        # Run inference
+        result = voice_pipe(audio_array)
+        # Get the highest scoring emotion
+        if result and len(result) > 0:
+            top_emotion = result[0]
+            emotion_label = top_emotion.get("label", "neutral")
+            score = top_emotion.get("score", 0)
+            print(f"Voice emotion detected: {emotion_label} (score: {score:.2f})")
+            return emotion_label
+        else:
+            return "neutral"
+    except Exception as e:
+        print(f"Error in voice emotion detection: {e}")
+        return "neutral"
+def get_transcript(audio_path):
+    """
+    Transcribes speech from an audio file using Google Speech Recognition.
+    Args:
+        audio_path: Path to the audio file
+    Returns:
+        str: Transcribed text, or empty string if transcription fails
+    """
+    try:
+        # Initialize recognizer
+        r = sr.Recognizer()
+        # Load audio file
+        with sr.AudioFile(audio_path) as source:
+            audio_data = r.record(source)
+        # Transcribe using Google Speech Recognition
+        text = r.recognize_google(audio_data)
+        print(f"Transcription: {text}")
+        return text
+    except sr.UnknownValueError:
+        print("Could not understand audio")
+        return ""
+    except sr.RequestError as e:
+        print(f"Could not request results from Google Speech Recognition service: {e}")
+        return ""
+    except Exception as e:
+        print(f"Error in transcription: {e}")
+        return ""
+def get_llm_response(user_query, face, voice, text):
+    """
+    Generates an empathetic response using Groq LLM based on emotional context.
+    Args:
+        user_query: The user's typed query
+        face: Detected facial emotion
+        voice: Detected vocal emotion
+        text: Transcribed speech text
+    Returns:
+        str: AI-generated empathetic response
+    """
+    try:
+        # Get API key from environment variable
+        api_key = os.getenv("GROQ_API_KEY")
+        if not api_key:
+            return "Error: GROQ_API_KEY not found in environment variables"
+        # Initialize Groq client
+        client = Groq(api_key=api_key)
+        # Create detailed system prompt with emotional context
+        system_prompt = f"""You are an empathetic AI assistant that provides thoughtful, caring responses based on the user's emotional state.
+**Emotional Context Analysis:**
+- Facial Expression: {face}
+- Vocal Tone: {voice}
+- Spoken Words: {text if text else "No speech detected"}
+**Instructions:**
+1. First, acknowledge and validate the user's emotional state based on the above indicators
+2. Show empathy and understanding
+3. Provide a helpful, supportive answer to their query
+4. Keep your response warm, genuine, and human-like
+5. If there are discrepancies between emotional signals, address them sensitively
+**User's Query:** {user_query}
+Respond in a natural, conversational manner that demonstrates emotional intelligence."""
+        # Call Groq API
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "system",
+                    "content": system_prompt
+                }
+            ],
+            model="llama-3.1-8b-instant",
+            temperature=0.7,
+            max_tokens=1024
+        )
+        # Extract and return response
+        response = chat_completion.choices[0].message.content
+        return response
+    except Exception as e:
+        return f"Error generating response: {e}"
+# The record_audio function has been removed as it is no longer needed.
+# st.audio_recorder in app.py handles audio capture in the browser.