Spaces:

Garvitj
/

emotion_llm

Sleeping

App Files Files Community

emotion_llm / src /analysis.py

Garvitj

Upload 2 files

813d2f0 verified 4 months ago

raw

history blame contribute delete

6.48 kB

	import os
	import librosa
	import numpy as np
	import speech_recognition as sr
	from groq import Groq
	from inference_sdk import InferenceHTTPClient
	from transformers import pipeline

	# Initialize the voice emotion pipeline once (global)
	# This prevents reloading the model on every function call
	try:
	voice_pipe = pipeline(
	"audio-classification",
	model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
	)
	except Exception as e:
	print(f"Warning: Could not load voice emotion model: {e}")
	voice_pipe = None


	def get_facial_emotion(image_path):
	"""
	Analyzes facial emotion from an image using Roboflow API.

	Args:
	image_path: Path to the image file

	Returns:
	str: Detected emotion (e.g., "happy", "sad", "neutral")
	"""
	try:
	# Get API key from environment variable
	api_key = os.getenv("ROBOFLOW_API_KEY")
	if not api_key:
	print("Error: ROBOFLOW_API_KEY not found in environment variables")
	return "neutral"

	# Initialize Roboflow client
	client = InferenceHTTPClient(
	api_url="https://detect.roboflow.com",
	api_key=api_key
	)

	# Run inference on the image
	result = client.infer(image_path, model_id="human-face-emotions/28")

	# Parse response and get top prediction
	if result and "predictions" in result and len(result["predictions"]) > 0:
	top_prediction = result["predictions"][0]
	emotion = top_prediction.get("class", "neutral")
	confidence = top_prediction.get("confidence", 0)
	print(f"Facial emotion detected: {emotion} (confidence: {confidence:.2f})")
	return emotion
	else:
	print("No face detected in image")
	return "neutral"

	except Exception as e:
	print(f"Error in facial emotion detection: {e}")
	return "neutral"


	def get_voice_emotion(audio_path):
	"""
	Analyzes vocal emotion from an audio file using Hugging Face transformers.

	Args:
	audio_path: Path to the audio file

	Returns:
	str: Detected emotion (e.g., "calm", "angry", "happy")
	"""
	try:
	if voice_pipe is None:
	print("Voice emotion model not loaded")
	return "neutral"

	# Load audio file and resample to 16kHz (required by the model)
	audio_array, sample_rate = librosa.load(audio_path, sr=16000)

	# Run inference
	result = voice_pipe(audio_array)

	# Get the highest scoring emotion
	if result and len(result) > 0:
	top_emotion = result[0]
	emotion_label = top_emotion.get("label", "neutral")
	score = top_emotion.get("score", 0)
	print(f"Voice emotion detected: {emotion_label} (score: {score:.2f})")
	return emotion_label
	else:
	return "neutral"

	except Exception as e:
	print(f"Error in voice emotion detection: {e}")
	return "neutral"


	def get_transcript(audio_path):
	"""
	Transcribes speech from an audio file using Google Speech Recognition.

	Args:
	audio_path: Path to the audio file

	Returns:
	str: Transcribed text, or empty string if transcription fails
	"""
	try:
	# Initialize recognizer
	r = sr.Recognizer()

	# Load audio file
	with sr.AudioFile(audio_path) as source:
	audio_data = r.record(source)

	# Transcribe using Google Speech Recognition
	text = r.recognize_google(audio_data)
	print(f"Transcription: {text}")
	return text

	except sr.UnknownValueError:
	print("Could not understand audio")
	return ""
	except sr.RequestError as e:
	print(f"Could not request results from Google Speech Recognition service: {e}")
	return ""
	except Exception as e:
	print(f"Error in transcription: {e}")
	return ""


	def get_llm_response(user_query, face, voice, text):
	"""
	Generates an empathetic response using Groq LLM based on emotional context.

	Args:
	user_query: The user's typed query
	face: Detected facial emotion
	voice: Detected vocal emotion
	text: Transcribed speech text

	Returns:
	str: AI-generated empathetic response
	"""
	try:
	# Get API key from environment variable
	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	return "Error: GROQ_API_KEY not found in environment variables"

	# Initialize Groq client
	client = Groq(api_key=api_key)

	# Create detailed system prompt with emotional context
	system_prompt = f"""You are an empathetic AI assistant that provides thoughtful, caring responses based on the user's emotional state.

	Emotional Context Analysis:
	- Facial Expression: {face}
	- Vocal Tone: {voice}
	- Spoken Words: {text if text else "No speech detected"}

	Instructions:
	1. First, acknowledge and validate the user's emotional state based on the above indicators
	2. Show empathy and understanding
	3. Provide a helpful, supportive answer to their query
	4. Keep your response warm, genuine, and human-like
	5. If there are discrepancies between emotional signals, address them sensitively

	User's Query: {user_query}

	Respond in a natural, conversational manner that demonstrates emotional intelligence."""

	# Call Groq API
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "system",
	"content": system_prompt
	}
	],
	model="llama-3.1-8b-instant",
	temperature=0.7,
	max_tokens=1024
	)

	# Extract and return response
	response = chat_completion.choices[0].message.content
	return response

	except Exception as e:
	return f"Error generating response: {e}"

	# The record_audio function has been removed as it is no longer needed.
	# st.audio_recorder in app.py handles audio capture in the browser.