Spaces:

Garvitj
/

emotion_llm

Sleeping

File size: 6,936 Bytes

9cd4486
8e09e41
b1f4c12
 
92a6cf4
b1f4c12
 
 
 
 
 
 
 
 
5442d6b
b1f4c12
9cd4486
b1f4c12
9cd4486
b1f4c12
 
9cd4486
 
 
df98374
 
 
 
 
 
 
 
 
 
 
 
b5986e7
5442d6b
9cd4486
b1f4c12
 
 
 
 
5442d6b
 
b1f4c12
5442d6b
9cd4486
 
5442d6b
9cd4486
 
 
 
 
df98374
 
 
 
 
 
 
92a6cf4
df98374
 
 
 
b5986e7
df98374
 
 
 
 
 
 
b5986e7
9cd4486
 
df98374
5442d6b
9cd4486
 
df98374
 
9cd4486
 
 
 
b1f4c12
 
5442d6b
9cd4486
b1f4c12
 
5442d6b
b1f4c12
 
df98374
 
b1f4c12
5442d6b
b1f4c12
df98374
b1f4c12
 
df98374
b1f4c12
 
df98374
 
b1f4c12
5442d6b
 
 
 
 
b1f4c12
 
 
 
5442d6b
b1f4c12
 
5442d6b
 
 
 
 
 
b1f4c12
 
 
 
 
 
 
 
5442d6b
b1f4c12
5442d6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1f4c12
 
 
 
 
df98374
b1f4c12
 
 
 
 
5442d6b
b1f4c12
 
5442d6b
 
b1f4c12
 
5442d6b
9cd4486
b1f4c12
9cd4486
df98374
9cd4486
b1f4c12
 
 
 
5442d6b
b1f4c12
5442d6b
b1f4c12
 
5442d6b
 
 
 
b1f4c12
 
 
 
 
5442d6b
b1f4c12
5442d6b
b1f4c12
 
 
 
 
 
 
 
9cd4486

import streamlit as st
import cv2
import numpy as np
import os
import tempfile
from dotenv import load_dotenv
from st_audiorec import st_audiorec
from analysis import (
    get_facial_emotion,
    get_voice_emotion,
    get_transcript,
    get_llm_response
)

# Load the .env file
load_dotenv()

# Page configuration
st.set_page_config(
    page_title="Empathetic AI Assistant",
    page_icon="🤖",
    layout="wide"
)

# --- 1. INITIALIZE THE SESSION STATE KEY ---
# This single key 'camera_bytes' will be used everywhere.
# This also fixes the AttributeError in your 'pages/2_Analyze.py' file.
if 'camera_bytes' not in st.session_state:
    st.session_state.camera_bytes = None

# --- 2. CREATE THE CALLBACK FUNCTION ---
# This function will run *immediately* when the camera takes a photo.
def _camera_callback():
    # Get the data from the widget's key and store it in our session state
    if st.session_state.camera_widget_key is not None:
        st.session_state.camera_bytes = st.session_state.camera_widget_key

# Title and description
st.title("🤖 Empathetic AI Assistant")
st.markdown("""
This AI assistant analyzes your emotional state through:
- 📸 **Facial Expression** (from camera)
- 🎤 **Vocal Tone** (from microphone)
- 💬 **Spoken Words** (transcribed from audio)

Then provides an empathetic, context-aware response to your query.
""")

st.divider()

# Create two columns for layout
col1, col2 = st.columns([1, 1])

with col1:
    st.subheader("📸 1. Capture Your Expression")
    
    # --- 3. USE THE CAMERA WIDGET WITH THE CALLBACK ---
    # We give it a key 'camera_widget_key' which the callback will read from.
    camera_input_data = st.camera_input(
        "Take a snapshot",
        on_change=_camera_callback,
        key="camera_widget_key" 
    )
    
    # --- 4. DEBUG/DISPLAY LOGIC NOW READS FROM SESSION STATE ---
    # This logic now displays the *stored* image.
    if st.session_state.camera_bytes is None:
        st.warning("DEBUG: 'camera_bytes' is currently None. Waiting for user to take photo...")
    else:
        st.success("DEBUG: Image captured and stored in session state!")
        st.image(st.session_state.camera_bytes, caption="Captured Image")
        # Add a button to clear the stored image
        if st.button("Clear Photo"):
            st.session_state.camera_bytes = None
            # Rerun to update the display
            st.experimental_rerun() 

with col2:
    st.subheader("💭 2. Your Query")
    # Also save the user query to session state
    user_query = st.text_area(
        "What would you like to ask?",
        placeholder="Type your question or concern here...",
        height=100,
        key="user_query"  # Give it a key
    )

st.divider()

st.subheader("🎙️ 3. Record Your Voice")
st.write("Click the microphone to record your voice, then click 'Analyze' below.")
audio_bytes = st_audiorec()

st.divider()

# Main action button
if st.button("🧠 Analyze My Emotion & Answer", type="primary", use_container_width=True):
    
    # --- 5. VALIDATION NOW CHECKS THE RELIABLE SESSION STATE ---
    if not st.session_state.camera_bytes:
        st.error("❌ Please take a snapshot using the camera first!")
    elif not audio_bytes:
        st.error("❌ Please record your voice first!")
    elif not st.session_state.user_query.strip(): # Check session state
        st.error("❌ Please enter your query!")
    else:
        # Step 1: Process camera image
        with st.spinner("📸 Processing facial expression..."):
            try:
                # Read from the reliable session state
                file_bytes = np.asarray(bytearray(st.session_state.camera_bytes.read()), dtype=np.uint8)
                image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
                
                with tempfile.NamedTemporaryFile(delete=True, suffix=".jpg") as temp_img:
                    cv2.imwrite(temp_img.name, image)
                    facial_emotion = get_facial_emotion(temp_img.name)
                
            except Exception as e:
                st.error(f"Error processing image: {e}")
                facial_emotion = "neutral"
        
        # Step 2: Process recorded audio
        with st.spinner("🎵 Saving and analyzing audio..."):
            try:
                with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as temp_aud:
                    temp_aud.write(audio_bytes)
                    
                    voice_emotion = get_voice_emotion(temp_aud.name)
                    transcript = get_transcript(temp_aud.name)

            except Exception as e:
                st.error(f"Error processing audio: {e}")
                voice_emotion = "neutral"
                transcript = ""
        
        # Display analysis results
        st.divider()
        st.subheader("📊 Emotional Analysis Results")
        
        col_a, col_b, col_c = st.columns(3)
        
        with col_a:
            st.metric(
                label="😊 Facial Emotion",
                value=facial_emotion.capitalize()
            )
        
        with col_b:
            st.metric(
                label="🎤 Vocal Tone",
                value=voice_emotion.capitalize()
            )
        
        with col_c:
            st.metric(
                label="💬 Speech Detected",
                value="Yes" if transcript else "No"
            )
        
        if transcript:
            st.info(f"**Transcription:** {transcript}")
        
        # Step 5: Get empathetic AI response
        st.divider()
        with st.spinner("🤖 Empathetic AI is thinking..."):
            ai_response = get_llm_response(
                user_query=st.session_state.user_query, # Read from session state
                face=facial_emotion,
                voice=voice_emotion,
                text=transcript
            )
        
        # Display final response
        st.subheader("💙 Empathetic Response")
        st.markdown(ai_response)
        
        # Success feedback
        st.balloons()

# Sidebar with instructions
with st.sidebar:
    st.header("ℹ️ How to Use")
    st.markdown("""
    1. **Take a snapshot**
    2. **Type your query**
    3. **Click the mic** to record (click again to stop)
    4. **Click the 'Analyze' button**
    5. **Receive** your response
    """)
    
    st.divider()
    
    st.header("🔑 Setup Requirements")
    st.markdown("""
    Make sure these environment variables are set.
    
    Create a `.env` file in the same
    directory as `app.py`:
    ```
    ROBOFLOW_API_KEY="your_key"
    GROQ_API_KEY="your_key"
    ```
    """)
    
    st.divider()
    
    st.header("🛠️ Tech Stack")
    st.markdown("""
    - **Frontend:** Streamlit
    - **Audio:** `streamlit-audiorec`
    - **Facial Analysis:** Roboflow
    - **Voice Analysis:** Hugging Face
    - **Speech-to-Text:** Google SR
    - **LLM:** Groq (Llama 3)
    """)