import cv2 import numpy as np import io import PIL from base64 import b64decode, b64encode from keras.models import load_model import streamlit as st from streamlit_webrtc import webrtc_streamer, VideoProcessorBase # Initialize the Haar Cascade face detection model face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') model = load_model('emotion_model.h5',compile=False) emotion_dict = {0: "Angry", 1: "Disgust", 2: "Fear", 3: "Happy", 4: "Neutral", 5: "Sad", 6: "Surprised"} # Define functions to convert between JavaScript image reply and OpenCV image def js_to_image(js_reply): image_bytes = b64decode(js_reply.split(',')[1]) jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8) img = cv2.imdecode(jpg_as_np, flags=1) return img def bbox_to_bytes(bbox_array): bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA') iobuf = io.BytesIO() bbox_PIL.save(iobuf, format='png') bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8'))) return bbox_bytes # Define function to process each frame from the video stream def process_frame(frame): # Convert frame to grayscale gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Perform face detection faces = face_cascade.detectMultiScale(gray) emotions = [] # Process each detected face for (x, y, w, h) in faces: face_region = gray[y:y+h, x:x+w] face_resized = cv2.resize(face_region, (48, 48)) img = np.expand_dims(face_resized, axis=0) img = np.expand_dims(img, axis=-1) predictions = model.predict(img) emo = model.predict(img)[0] emotions.append(emo) predicted_class = np.argmax(predictions) predicted_emotion = emotion_dict[predicted_class] accuracy = predictions[0][predicted_class] # Draw bounding box and emotion label on the frame cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2) cv2.putText(frame, f"{predicted_emotion} ({accuracy:.2f})", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2) return frame, emotions class VideoProcessor(VideoProcessorBase): def recv(self, frame): img = frame.to_ndarray(format="bgr24") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray) for (x, y, w, h) in faces: face_region = gray[y:y+h, x:x+w] face_resized = cv2.resize(face_region, (48, 48)) img_array = np.expand_dims(face_resized, axis=0) img_array = np.expand_dims(img_array, axis=-1) predictions = model.predict(img_array) predicted_class = np.argmax(predictions) predicted_emotion = emotion_dict[predicted_class] accuracy = predictions[0][predicted_class] cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2) cv2.putText(img, f"{predicted_emotion} ({accuracy:.2f})", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2) return frame.from_ndarray(img, format="bgr24") # Page Title and Description st.set_page_config(page_title="Facial Emotion Recognition", layout="wide") st.title("Facial Emotion Recognition") # Sidebar st.sidebar.title("Options") option = st.sidebar.radio("Select Option", ("Drag a File","Process Video")) # Main Content Area if option == "Drag a File" : st.subheader("Photo Processing") # Process image or captured frame if option == "Drag a File": uploaded_file = st.file_uploader("Upload Photo", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8) image = cv2.imdecode(file_bytes, 1) if 'image' in locals(): processed_frame, emotions = process_frame(image) # Display processed frame and emotions st.subheader("Processed Frame") st.image(processed_frame, channels="BGR", use_column_width=False) if not emotions: st.warning("No faces detected in the image.") elif option == "Process Video": webrtc_streamer(key="camera", video_processor_factory=VideoProcessor)