import os import librosa import numpy as np import speech_recognition as sr from groq import Groq from inference_sdk import InferenceHTTPClient from transformers import pipeline # Initialize the voice emotion pipeline once (global) # This prevents reloading the model on every function call try: voice_pipe = pipeline( "audio-classification", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition" ) except Exception as e: print(f"Warning: Could not load voice emotion model: {e}") voice_pipe = None def get_facial_emotion(image_path): """ Analyzes facial emotion from an image using Roboflow API. Args: image_path: Path to the image file Returns: str: Detected emotion (e.g., "happy", "sad", "neutral") """ try: # Get API key from environment variable api_key = os.getenv("ROBOFLOW_API_KEY") if not api_key: print("Error: ROBOFLOW_API_KEY not found in environment variables") return "neutral" # Initialize Roboflow client client = InferenceHTTPClient( api_url="https://detect.roboflow.com", api_key=api_key ) # Run inference on the image result = client.infer(image_path, model_id="human-face-emotions/28") # Parse response and get top prediction if result and "predictions" in result and len(result["predictions"]) > 0: top_prediction = result["predictions"][0] emotion = top_prediction.get("class", "neutral") confidence = top_prediction.get("confidence", 0) print(f"Facial emotion detected: {emotion} (confidence: {confidence:.2f})") return emotion else: print("No face detected in image") return "neutral" except Exception as e: print(f"Error in facial emotion detection: {e}") return "neutral" def get_voice_emotion(audio_path): """ Analyzes vocal emotion from an audio file using Hugging Face transformers. Args: audio_path: Path to the audio file Returns: str: Detected emotion (e.g., "calm", "angry", "happy") """ try: if voice_pipe is None: print("Voice emotion model not loaded") return "neutral" # Load audio file and resample to 16kHz (required by the model) audio_array, sample_rate = librosa.load(audio_path, sr=16000) # Run inference result = voice_pipe(audio_array) # Get the highest scoring emotion if result and len(result) > 0: top_emotion = result[0] emotion_label = top_emotion.get("label", "neutral") score = top_emotion.get("score", 0) print(f"Voice emotion detected: {emotion_label} (score: {score:.2f})") return emotion_label else: return "neutral" except Exception as e: print(f"Error in voice emotion detection: {e}") return "neutral" def get_transcript(audio_path): """ Transcribes speech from an audio file using Google Speech Recognition. Args: audio_path: Path to the audio file Returns: str: Transcribed text, or empty string if transcription fails """ try: # Initialize recognizer r = sr.Recognizer() # Load audio file with sr.AudioFile(audio_path) as source: audio_data = r.record(source) # Transcribe using Google Speech Recognition text = r.recognize_google(audio_data) print(f"Transcription: {text}") return text except sr.UnknownValueError: print("Could not understand audio") return "" except sr.RequestError as e: print(f"Could not request results from Google Speech Recognition service: {e}") return "" except Exception as e: print(f"Error in transcription: {e}") return "" def get_llm_response(user_query, face, voice, text): """ Generates an empathetic response using Groq LLM based on emotional context. Args: user_query: The user's typed query face: Detected facial emotion voice: Detected vocal emotion text: Transcribed speech text Returns: str: AI-generated empathetic response """ try: # Get API key from environment variable api_key = os.getenv("GROQ_API_KEY") if not api_key: return "Error: GROQ_API_KEY not found in environment variables" # Initialize Groq client client = Groq(api_key=api_key) # Create detailed system prompt with emotional context system_prompt = f"""You are an empathetic AI assistant that provides thoughtful, caring responses based on the user's emotional state. **Emotional Context Analysis:** - Facial Expression: {face} - Vocal Tone: {voice} - Spoken Words: {text if text else "No speech detected"} **Instructions:** 1. First, acknowledge and validate the user's emotional state based on the above indicators 2. Show empathy and understanding 3. Provide a helpful, supportive answer to their query 4. Keep your response warm, genuine, and human-like 5. If there are discrepancies between emotional signals, address them sensitively **User's Query:** {user_query} Respond in a natural, conversational manner that demonstrates emotional intelligence.""" # Call Groq API chat_completion = client.chat.completions.create( messages=[ { "role": "system", "content": system_prompt } ], model="llama-3.1-8b-instant", temperature=0.7, max_tokens=1024 ) # Extract and return response response = chat_completion.choices[0].message.content return response except Exception as e: return f"Error generating response: {e}" # The record_audio function has been removed as it is no longer needed. # st.audio_recorder in app.py handles audio capture in the browser.