emotion_llm / src /app.py
Garvitj's picture
Update src/app.py
df98374 verified
import streamlit as st
import cv2
import numpy as np
import os
import tempfile
from dotenv import load_dotenv
from st_audiorec import st_audiorec
from analysis import (
get_facial_emotion,
get_voice_emotion,
get_transcript,
get_llm_response
)
# Load the .env file
load_dotenv()
# Page configuration
st.set_page_config(
page_title="Empathetic AI Assistant",
page_icon="πŸ€–",
layout="wide"
)
# --- 1. INITIALIZE THE SESSION STATE KEY ---
# This single key 'camera_bytes' will be used everywhere.
# This also fixes the AttributeError in your 'pages/2_Analyze.py' file.
if 'camera_bytes' not in st.session_state:
st.session_state.camera_bytes = None
# --- 2. CREATE THE CALLBACK FUNCTION ---
# This function will run *immediately* when the camera takes a photo.
def _camera_callback():
# Get the data from the widget's key and store it in our session state
if st.session_state.camera_widget_key is not None:
st.session_state.camera_bytes = st.session_state.camera_widget_key
# Title and description
st.title("πŸ€– Empathetic AI Assistant")
st.markdown("""
This AI assistant analyzes your emotional state through:
- πŸ“Έ **Facial Expression** (from camera)
- 🎀 **Vocal Tone** (from microphone)
- πŸ’¬ **Spoken Words** (transcribed from audio)
Then provides an empathetic, context-aware response to your query.
""")
st.divider()
# Create two columns for layout
col1, col2 = st.columns([1, 1])
with col1:
st.subheader("πŸ“Έ 1. Capture Your Expression")
# --- 3. USE THE CAMERA WIDGET WITH THE CALLBACK ---
# We give it a key 'camera_widget_key' which the callback will read from.
camera_input_data = st.camera_input(
"Take a snapshot",
on_change=_camera_callback,
key="camera_widget_key"
)
# --- 4. DEBUG/DISPLAY LOGIC NOW READS FROM SESSION STATE ---
# This logic now displays the *stored* image.
if st.session_state.camera_bytes is None:
st.warning("DEBUG: 'camera_bytes' is currently None. Waiting for user to take photo...")
else:
st.success("DEBUG: Image captured and stored in session state!")
st.image(st.session_state.camera_bytes, caption="Captured Image")
# Add a button to clear the stored image
if st.button("Clear Photo"):
st.session_state.camera_bytes = None
# Rerun to update the display
st.experimental_rerun()
with col2:
st.subheader("πŸ’­ 2. Your Query")
# Also save the user query to session state
user_query = st.text_area(
"What would you like to ask?",
placeholder="Type your question or concern here...",
height=100,
key="user_query" # Give it a key
)
st.divider()
st.subheader("πŸŽ™οΈ 3. Record Your Voice")
st.write("Click the microphone to record your voice, then click 'Analyze' below.")
audio_bytes = st_audiorec()
st.divider()
# Main action button
if st.button("🧠 Analyze My Emotion & Answer", type="primary", use_container_width=True):
# --- 5. VALIDATION NOW CHECKS THE RELIABLE SESSION STATE ---
if not st.session_state.camera_bytes:
st.error("❌ Please take a snapshot using the camera first!")
elif not audio_bytes:
st.error("❌ Please record your voice first!")
elif not st.session_state.user_query.strip(): # Check session state
st.error("❌ Please enter your query!")
else:
# Step 1: Process camera image
with st.spinner("πŸ“Έ Processing facial expression..."):
try:
# Read from the reliable session state
file_bytes = np.asarray(bytearray(st.session_state.camera_bytes.read()), dtype=np.uint8)
image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
with tempfile.NamedTemporaryFile(delete=True, suffix=".jpg") as temp_img:
cv2.imwrite(temp_img.name, image)
facial_emotion = get_facial_emotion(temp_img.name)
except Exception as e:
st.error(f"Error processing image: {e}")
facial_emotion = "neutral"
# Step 2: Process recorded audio
with st.spinner("🎡 Saving and analyzing audio..."):
try:
with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as temp_aud:
temp_aud.write(audio_bytes)
voice_emotion = get_voice_emotion(temp_aud.name)
transcript = get_transcript(temp_aud.name)
except Exception as e:
st.error(f"Error processing audio: {e}")
voice_emotion = "neutral"
transcript = ""
# Display analysis results
st.divider()
st.subheader("πŸ“Š Emotional Analysis Results")
col_a, col_b, col_c = st.columns(3)
with col_a:
st.metric(
label="😊 Facial Emotion",
value=facial_emotion.capitalize()
)
with col_b:
st.metric(
label="🎀 Vocal Tone",
value=voice_emotion.capitalize()
)
with col_c:
st.metric(
label="πŸ’¬ Speech Detected",
value="Yes" if transcript else "No"
)
if transcript:
st.info(f"**Transcription:** {transcript}")
# Step 5: Get empathetic AI response
st.divider()
with st.spinner("πŸ€– Empathetic AI is thinking..."):
ai_response = get_llm_response(
user_query=st.session_state.user_query, # Read from session state
face=facial_emotion,
voice=voice_emotion,
text=transcript
)
# Display final response
st.subheader("πŸ’™ Empathetic Response")
st.markdown(ai_response)
# Success feedback
st.balloons()
# Sidebar with instructions
with st.sidebar:
st.header("ℹ️ How to Use")
st.markdown("""
1. **Take a snapshot**
2. **Type your query**
3. **Click the mic** to record (click again to stop)
4. **Click the 'Analyze' button**
5. **Receive** your response
""")
st.divider()
st.header("πŸ”‘ Setup Requirements")
st.markdown("""
Make sure these environment variables are set.
Create a `.env` file in the same
directory as `app.py`:
```
ROBOFLOW_API_KEY="your_key"
GROQ_API_KEY="your_key"
```
""")
st.divider()
st.header("πŸ› οΈ Tech Stack")
st.markdown("""
- **Frontend:** Streamlit
- **Audio:** `streamlit-audiorec`
- **Facial Analysis:** Roboflow
- **Voice Analysis:** Hugging Face
- **Speech-to-Text:** Google SR
- **LLM:** Groq (Llama 3)
""")