Spaces:
Build error
Build error
| # app.py | |
| import streamlit as st | |
| import cv2 | |
| import numpy as np | |
| import tensorflow as tf | |
| import os | |
| from streamlit_webrtc import webrtc_streamer, VideoTransformerBase, WebRtcMode | |
| import av # Part of streamlit-webrtc's dependencies for frame handling | |
| # --- Streamlit Page Configuration (MUST BE THE FIRST STREAMLIT COMMAND) --- | |
| st.set_page_config(page_title="Real-time Emotion Recognition", layout="wide") | |
| # --- 1. Load Model and Face Detector (Cached for Performance) --- | |
| def load_emotion_model(): | |
| # Path to your trained model. | |
| # In a Docker container, the app's working directory will be /app. | |
| # So if your models folder is at /app/models, then 'models/...' is correct. | |
| # Ensure your Dockerfile copies the 'models' folder correctly. | |
| model_path = 'models/emotion_model_best.h5' | |
| if not os.path.exists(model_path): | |
| st.error(f"Error: Model file not found at {model_path}. Please ensure it's copied into the Docker image and path is correct.") | |
| st.stop() | |
| try: | |
| model = tf.keras.models.load_model(model_path) | |
| return model | |
| except Exception as e: | |
| st.error(f"Error loading model from {model_path}: {e}") | |
| st.stop() | |
| def load_face_detector(): | |
| # Path to your Haar Cascade file. | |
| # Ensure 'haarcascade_frontalface_default.xml' is in the root of your project | |
| # directory (which is copied to /app in Docker) for this path to be correct. | |
| cascade_path = 'haarcascade_frontalface_default.xml' | |
| if not os.path.exists(cascade_path): | |
| st.error(f"Error: Haar Cascade file not found at {cascade_path}.") | |
| st.markdown("Please ensure `haarcascade_frontalface_default.xml` is in the root of your project directory alongside `src/` and `models/`.") | |
| st.markdown("Download from: [https://github.com/opencv/opencv/blob/4.x/data/haarcascades/haarcascade_frontalface_default.xml](https://github.com/opencv/opencv/blob/4.x/data/haarcascades/haarcascade_frontalface_default.xml)") | |
| st.stop() | |
| face_cascade = cv2.CascadeClassifier(cascade_path) | |
| if face_cascade.empty(): | |
| st.error(f"Error: Could not load Haar Cascade classifier from {cascade_path}. Check file integrity.") | |
| st.stop() | |
| return face_cascade | |
| # Load the model and face detector when the app starts | |
| model = load_emotion_model() | |
| face_detector = load_face_detector() | |
| # --- 2. Define Constants and Labels --- | |
| IMG_HEIGHT = 48 | |
| IMG_WIDTH = 48 | |
| emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise'] | |
| label_colors = { | |
| 'angry': (0, 0, 255), # BGR Red | |
| 'disgust': (0, 165, 255), # BGR Orange | |
| 'fear': (0, 255, 255), # BGR Yellow | |
| 'happy': (0, 255, 0), # BGR Green | |
| 'neutral': (255, 255, 0), # BGR Cyan | |
| 'sad': (255, 0, 0), # BGR Blue | |
| 'surprise': (255, 0, 255) # BGR Magenta | |
| } | |
| FACE_DETECTION_DOWNSCALE = 0.5 # Scale factor for face detection | |
| # --- 3. Video Processing Class --- | |
| # This class will receive frames from the client and process them on the server | |
| class EmotionDetector(VideoTransformerBase): | |
| def __init__(self, model, face_detector): | |
| self.model = model | |
| self.face_detector = face_detector | |
| def transform(self, frame: av.VideoFrame) -> np.ndarray: | |
| # Convert av.VideoFrame to NumPy array. | |
| # Requesting "bgr24" format directly from `av` to align with OpenCV's default. | |
| img_bgr = frame.to_ndarray(format="bgr24") | |
| # Convert to grayscale for face detection and emotion prediction | |
| gray_frame = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) | |
| # Scale down for faster face detection | |
| small_frame = cv2.resize(gray_frame, (0, 0), fx=FACE_DETECTION_DOWNSCALE, fy=FACE_DETECTION_DOWNSCALE) | |
| # Detect faces | |
| faces = self.face_detector.detectMultiScale(small_frame, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)) | |
| # Scale face coordinates back to original frame size | |
| original_faces = [] | |
| for (x, y, w, h) in faces: | |
| x_orig = int(x / FACE_DETECTION_DOWNSCALE) | |
| y_orig = int(y / FACE_DETECTION_DOWNSCALE) | |
| w_orig = int(w / FACE_DETECTION_DOWNSCALE) | |
| h_orig = int(h / FACE_DETECTION_DOWNSCALE) | |
| original_faces.append((x_orig, y_orig, w_orig, h_orig)) | |
| # Process each detected face | |
| for (x, y, w, h) in original_faces: | |
| # Draw rectangle on the BGR image (img_bgr) | |
| cv2.rectangle(img_bgr, (x, y), (x+w, y+h), (255, 0, 0), 2) | |
| # Extract face ROI for emotion prediction | |
| # Ensure ROI coordinates are within image bounds | |
| face_roi = gray_frame[max(0, y):min(gray_frame.shape[0], y+h), max(0, x):min(gray_frame.shape[1], x+w)] | |
| if face_roi.size == 0: # Skip if ROI is empty (e.g., face partially out of frame) | |
| continue | |
| face_roi = cv2.resize(face_roi, (IMG_WIDTH, IMG_HEIGHT)) | |
| face_roi = np.expand_dims(face_roi, axis=0) # Add batch dimension | |
| face_roi = np.expand_dims(face_roi, axis=-1) # Add channel dimension (for grayscale) | |
| face_roi = face_roi / 255.0 # Normalize pixel values | |
| predictions = self.model.predict(face_roi, verbose=0)[0] | |
| emotion_index = np.argmax(predictions) | |
| predicted_emotion = emotion_labels[emotion_index] | |
| confidence = predictions[emotion_index] * 100 | |
| text_color = label_colors.get(predicted_emotion, (255, 255, 255)) | |
| text = f"{predicted_emotion} ({confidence:.2f}%)" | |
| # Position text above face, or below if not enough space above | |
| text_y = y - 10 if y - 10 > 10 else y + h + 20 | |
| # Draw text on the BGR image (img_bgr) | |
| cv2.putText(img_bgr, text, (x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.9, text_color, 2, cv2.LINE_AA) | |
| # Convert the processed BGR image back to RGB for Streamlit/WebRTC display | |
| return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) | |
| # app.py | |
| # ... (previous code above) ... | |
| # --- 4. Streamlit App Layout and WebRTC Stream --- | |
| st.title("Live Facial Emotion Recognition") | |
| st.markdown(""" | |
| This application uses a deep learning model to detect emotions from faces in real-time. | |
| It accesses your webcam directly via your browser (WebRTC) and processes the video frames on the server. | |
| """) | |
| # Place the webrtc_streamer widget. | |
| # It automatically renders a video player and "Connect" / "Disconnect" buttons. | |
| webrtc_ctx = webrtc_streamer( | |
| key="emotion_detection_stream", | |
| mode=WebRtcMode.SENDRECV, # Send video from client, receive processed video from server | |
| video_processor_factory=lambda: EmotionDetector(model, face_detector), | |
| media_stream_constraints={"video": True, "audio": False}, # Only video, no audio | |
| async_processing=False, # Keep this False for now to avoid asyncio errors | |
| # Optional: tries to auto-start. Can comment out if you prefer manual start. | |
| # desired_playing_state={"playing": True}, | |
| # --- ENHANCED RTC CONFIGURATION --- | |
| # Providing a very robust list of public STUN servers for better NAT traversal | |
| rtc_configuration={ | |
| "iceServers": [ | |
| {"urls": ["stun:stun.l.google.com:19302"]}, | |
| {"urls": ["stun:stun1.l.google.com:19302"]}, | |
| {"urls": ["stun:stun2.l.google.com:19302"]}, | |
| {"urls": ["stun:stun3.l.google.com:19302"]}, | |
| {"urls": ["stun:stun4.l.google.com:19302"]}, | |
| {"urls": ["stun:stun.services.mozilla.com"]}, | |
| {"urls": ["stun:global.stun.twilio.com:3478"]}, | |
| {"urls": ["stun:stun.nextcloud.com:3478"]}, | |
| {"urls": ["stun:stun.schlund.de"]}, | |
| {"urls": ["stun:stun.stunprotocol.org"]}, # Added another | |
| {"urls": ["stun:stunserver.org"]}, # Added another | |
| ] | |
| }, | |
| # --- Enable Debug Logging --- | |
| log_level="debug", # <--- CRITICAL FOR DIAGNOSIS | |
| ) | |
| # Provide feedback based on the stream state | |
| if webrtc_ctx.state.playing: | |
| st.success("Webcam stream active. Looking for faces...") | |
| else: | |
| st.info("Webcam stream not active. Click the 'Start' button above to begin, and allow camera access.") |