Spaces:

Garvitj
/

emotion_llm

Sleeping

App Files Files Community

emotion_llm / src /app.py

Garvitj

Update src/app.py

df98374 verified 4 months ago

raw

history blame contribute delete

6.94 kB

	import streamlit as st
	import cv2
	import numpy as np
	import os
	import tempfile
	from dotenv import load_dotenv
	from st_audiorec import st_audiorec
	from analysis import (
	get_facial_emotion,
	get_voice_emotion,
	get_transcript,
	get_llm_response
	)

	# Load the .env file
	load_dotenv()

	# Page configuration
	st.set_page_config(
	page_title="Empathetic AI Assistant",
	page_icon="🤖",
	layout="wide"
	)

	# --- 1. INITIALIZE THE SESSION STATE KEY ---
	# This single key 'camera_bytes' will be used everywhere.
	# This also fixes the AttributeError in your 'pages/2_Analyze.py' file.
	if 'camera_bytes' not in st.session_state:
	st.session_state.camera_bytes = None

	# --- 2. CREATE THE CALLBACK FUNCTION ---
	# This function will run immediately when the camera takes a photo.
	def _camera_callback():
	# Get the data from the widget's key and store it in our session state
	if st.session_state.camera_widget_key is not None:
	st.session_state.camera_bytes = st.session_state.camera_widget_key

	# Title and description
	st.title("🤖 Empathetic AI Assistant")
	st.markdown("""
	This AI assistant analyzes your emotional state through:
	- 📸 Facial Expression (from camera)
	- 🎤 Vocal Tone (from microphone)
	- 💬 Spoken Words (transcribed from audio)

	Then provides an empathetic, context-aware response to your query.
	""")

	st.divider()

	# Create two columns for layout
	col1, col2 = st.columns([1, 1])

	with col1:
	st.subheader("📸 1. Capture Your Expression")

	# --- 3. USE THE CAMERA WIDGET WITH THE CALLBACK ---
	# We give it a key 'camera_widget_key' which the callback will read from.
	camera_input_data = st.camera_input(
	"Take a snapshot",
	on_change=_camera_callback,
	key="camera_widget_key"
	)

	# --- 4. DEBUG/DISPLAY LOGIC NOW READS FROM SESSION STATE ---
	# This logic now displays the stored image.
	if st.session_state.camera_bytes is None:
	st.warning("DEBUG: 'camera_bytes' is currently None. Waiting for user to take photo...")
	else:
	st.success("DEBUG: Image captured and stored in session state!")
	st.image(st.session_state.camera_bytes, caption="Captured Image")
	# Add a button to clear the stored image
	if st.button("Clear Photo"):
	st.session_state.camera_bytes = None
	# Rerun to update the display
	st.experimental_rerun()

	with col2:
	st.subheader("💭 2. Your Query")
	# Also save the user query to session state
	user_query = st.text_area(
	"What would you like to ask?",
	placeholder="Type your question or concern here...",
	height=100,
	key="user_query" # Give it a key
	)

	st.divider()

	st.subheader("🎙️ 3. Record Your Voice")
	st.write("Click the microphone to record your voice, then click 'Analyze' below.")
	audio_bytes = st_audiorec()

	st.divider()

	# Main action button
	if st.button("🧠 Analyze My Emotion & Answer", type="primary", use_container_width=True):

	# --- 5. VALIDATION NOW CHECKS THE RELIABLE SESSION STATE ---
	if not st.session_state.camera_bytes:
	st.error("❌ Please take a snapshot using the camera first!")
	elif not audio_bytes:
	st.error("❌ Please record your voice first!")
	elif not st.session_state.user_query.strip(): # Check session state
	st.error("❌ Please enter your query!")
	else:
	# Step 1: Process camera image
	with st.spinner("📸 Processing facial expression..."):
	try:
	# Read from the reliable session state
	file_bytes = np.asarray(bytearray(st.session_state.camera_bytes.read()), dtype=np.uint8)
	image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)

	with tempfile.NamedTemporaryFile(delete=True, suffix=".jpg") as temp_img:
	cv2.imwrite(temp_img.name, image)
	facial_emotion = get_facial_emotion(temp_img.name)

	except Exception as e:
	st.error(f"Error processing image: {e}")
	facial_emotion = "neutral"

	# Step 2: Process recorded audio
	with st.spinner("🎵 Saving and analyzing audio..."):
	try:
	with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as temp_aud:
	temp_aud.write(audio_bytes)

	voice_emotion = get_voice_emotion(temp_aud.name)
	transcript = get_transcript(temp_aud.name)

	except Exception as e:
	st.error(f"Error processing audio: {e}")
	voice_emotion = "neutral"
	transcript = ""

	# Display analysis results
	st.divider()
	st.subheader("📊 Emotional Analysis Results")

	col_a, col_b, col_c = st.columns(3)

	with col_a:
	st.metric(
	label="😊 Facial Emotion",
	value=facial_emotion.capitalize()
	)

	with col_b:
	st.metric(
	label="🎤 Vocal Tone",
	value=voice_emotion.capitalize()
	)

	with col_c:
	st.metric(
	label="💬 Speech Detected",
	value="Yes" if transcript else "No"
	)

	if transcript:
	st.info(f"Transcription: {transcript}")

	# Step 5: Get empathetic AI response
	st.divider()
	with st.spinner("🤖 Empathetic AI is thinking..."):
	ai_response = get_llm_response(
	user_query=st.session_state.user_query, # Read from session state
	face=facial_emotion,
	voice=voice_emotion,
	text=transcript
	)

	# Display final response
	st.subheader("💙 Empathetic Response")
	st.markdown(ai_response)

	# Success feedback
	st.balloons()

	# Sidebar with instructions
	with st.sidebar:
	st.header("ℹ️ How to Use")
	st.markdown("""
	1. Take a snapshot
	2. Type your query
	3. Click the mic to record (click again to stop)
	4. Click the 'Analyze' button
	5. Receive your response
	""")

	st.divider()

	st.header("🔑 Setup Requirements")
	st.markdown("""
	Make sure these environment variables are set.

	Create a `.env` file in the same
	directory as `app.py`:
	```
	ROBOFLOW_API_KEY="your_key"
	GROQ_API_KEY="your_key"
	```
	""")

	st.divider()

	st.header("🛠️ Tech Stack")
	st.markdown("""
	- Frontend: Streamlit
	- Audio: `streamlit-audiorec`
	- Facial Analysis: Roboflow
	- Voice Analysis: Hugging Face
	- Speech-to-Text: Google SR
	- LLM: Groq (Llama 3)
	""")