import streamlit as st import cv2 import numpy as np import os import tempfile from dotenv import load_dotenv from st_audiorec import st_audiorec from analysis import ( get_facial_emotion, get_voice_emotion, get_transcript, get_llm_response ) # Load the .env file load_dotenv() # Page configuration st.set_page_config( page_title="Empathetic AI Assistant", page_icon="🤖", layout="wide" ) # --- 1. INITIALIZE THE SESSION STATE KEY --- # This single key 'camera_bytes' will be used everywhere. # This also fixes the AttributeError in your 'pages/2_Analyze.py' file. if 'camera_bytes' not in st.session_state: st.session_state.camera_bytes = None # --- 2. CREATE THE CALLBACK FUNCTION --- # This function will run *immediately* when the camera takes a photo. def _camera_callback(): # Get the data from the widget's key and store it in our session state if st.session_state.camera_widget_key is not None: st.session_state.camera_bytes = st.session_state.camera_widget_key # Title and description st.title("🤖 Empathetic AI Assistant") st.markdown(""" This AI assistant analyzes your emotional state through: - 📸 **Facial Expression** (from camera) - 🎤 **Vocal Tone** (from microphone) - đŸ’Ŧ **Spoken Words** (transcribed from audio) Then provides an empathetic, context-aware response to your query. """) st.divider() # Create two columns for layout col1, col2 = st.columns([1, 1]) with col1: st.subheader("📸 1. Capture Your Expression") # --- 3. USE THE CAMERA WIDGET WITH THE CALLBACK --- # We give it a key 'camera_widget_key' which the callback will read from. camera_input_data = st.camera_input( "Take a snapshot", on_change=_camera_callback, key="camera_widget_key" ) # --- 4. DEBUG/DISPLAY LOGIC NOW READS FROM SESSION STATE --- # This logic now displays the *stored* image. if st.session_state.camera_bytes is None: st.warning("DEBUG: 'camera_bytes' is currently None. Waiting for user to take photo...") else: st.success("DEBUG: Image captured and stored in session state!") st.image(st.session_state.camera_bytes, caption="Captured Image") # Add a button to clear the stored image if st.button("Clear Photo"): st.session_state.camera_bytes = None # Rerun to update the display st.experimental_rerun() with col2: st.subheader("💭 2. Your Query") # Also save the user query to session state user_query = st.text_area( "What would you like to ask?", placeholder="Type your question or concern here...", height=100, key="user_query" # Give it a key ) st.divider() st.subheader("đŸŽ™ī¸ 3. Record Your Voice") st.write("Click the microphone to record your voice, then click 'Analyze' below.") audio_bytes = st_audiorec() st.divider() # Main action button if st.button("🧠 Analyze My Emotion & Answer", type="primary", use_container_width=True): # --- 5. VALIDATION NOW CHECKS THE RELIABLE SESSION STATE --- if not st.session_state.camera_bytes: st.error("❌ Please take a snapshot using the camera first!") elif not audio_bytes: st.error("❌ Please record your voice first!") elif not st.session_state.user_query.strip(): # Check session state st.error("❌ Please enter your query!") else: # Step 1: Process camera image with st.spinner("📸 Processing facial expression..."): try: # Read from the reliable session state file_bytes = np.asarray(bytearray(st.session_state.camera_bytes.read()), dtype=np.uint8) image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR) with tempfile.NamedTemporaryFile(delete=True, suffix=".jpg") as temp_img: cv2.imwrite(temp_img.name, image) facial_emotion = get_facial_emotion(temp_img.name) except Exception as e: st.error(f"Error processing image: {e}") facial_emotion = "neutral" # Step 2: Process recorded audio with st.spinner("đŸŽĩ Saving and analyzing audio..."): try: with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as temp_aud: temp_aud.write(audio_bytes) voice_emotion = get_voice_emotion(temp_aud.name) transcript = get_transcript(temp_aud.name) except Exception as e: st.error(f"Error processing audio: {e}") voice_emotion = "neutral" transcript = "" # Display analysis results st.divider() st.subheader("📊 Emotional Analysis Results") col_a, col_b, col_c = st.columns(3) with col_a: st.metric( label="😊 Facial Emotion", value=facial_emotion.capitalize() ) with col_b: st.metric( label="🎤 Vocal Tone", value=voice_emotion.capitalize() ) with col_c: st.metric( label="đŸ’Ŧ Speech Detected", value="Yes" if transcript else "No" ) if transcript: st.info(f"**Transcription:** {transcript}") # Step 5: Get empathetic AI response st.divider() with st.spinner("🤖 Empathetic AI is thinking..."): ai_response = get_llm_response( user_query=st.session_state.user_query, # Read from session state face=facial_emotion, voice=voice_emotion, text=transcript ) # Display final response st.subheader("💙 Empathetic Response") st.markdown(ai_response) # Success feedback st.balloons() # Sidebar with instructions with st.sidebar: st.header("â„šī¸ How to Use") st.markdown(""" 1. **Take a snapshot** 2. **Type your query** 3. **Click the mic** to record (click again to stop) 4. **Click the 'Analyze' button** 5. **Receive** your response """) st.divider() st.header("🔑 Setup Requirements") st.markdown(""" Make sure these environment variables are set. Create a `.env` file in the same directory as `app.py`: ``` ROBOFLOW_API_KEY="your_key" GROQ_API_KEY="your_key" ``` """) st.divider() st.header("đŸ› ī¸ Tech Stack") st.markdown(""" - **Frontend:** Streamlit - **Audio:** `streamlit-audiorec` - **Facial Analysis:** Roboflow - **Voice Analysis:** Hugging Face - **Speech-to-Text:** Google SR - **LLM:** Groq (Llama 3) """)