Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import cv2 | |
| import torch | |
| import torchvision.transforms as transforms | |
| from PIL import Image | |
| import numpy as np | |
| import timm | |
| import torch.nn as nn | |
| import mediapipe as mp | |
| import time | |
| import tempfile | |
| import pandas as pd | |
| # Initialize device | |
| device = "cpu" | |
| # st.write(f"Using CUDA: {torch.cuda.is_available()}") | |
| # Define the transformation to apply to the images | |
| transform = transforms.Compose( | |
| [ | |
| transforms.Resize((224, 224)), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
| ] | |
| ) | |
| change_list = [] | |
| # Load the model | |
| model = timm.create_model("tf_efficientnet_b0_ns", pretrained=False) | |
| model.classifier = nn.Sequential(nn.Linear(in_features=1280, out_features=7)) | |
| model = torch.load( | |
| "22.6_AffectNet_10K_part2.pt",map_location=device | |
| ) | |
| model.to(device) | |
| model.eval() | |
| # Initialize MediaPipe Face Detection | |
| mp_face_detection = mp.solutions.face_detection | |
| mp_drawing = mp.solutions.drawing_utils | |
| # Streamlit interface | |
| st.title("Emotion Detection from Video") | |
| st.write("Upload a video file to detect emotions.") | |
| uploaded_file = st.file_uploader("Choose a video file", type=["mp4", "avi", "mov"]) | |
| if uploaded_file is not None: | |
| with tempfile.NamedTemporaryFile(delete=False) as temp_file: | |
| temp_file.write(uploaded_file.read()) | |
| video_path = temp_file.name | |
| cap = cv2.VideoCapture(video_path) | |
| histogram = {i: 0 for i in range(7)} | |
| mat = [[0 for _ in range(7)] for _ in range(7)] | |
| prev_emotion = None | |
| current_emotion = None | |
| begin = time.time() | |
| with mp_face_detection.FaceDetection( | |
| model_selection=0, min_detection_confidence=0.5 | |
| ) as face_detection: | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Convert frame to RGB for MediaPipe | |
| rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| # Process the frame and detect faces | |
| results = face_detection.process(rgb_frame) | |
| if results.detections: | |
| for detection in results.detections: | |
| # Get bounding box | |
| bboxC = detection.location_data.relative_bounding_box | |
| ih, iw, _ = frame.shape | |
| x, y, w, h = ( | |
| int(bboxC.xmin * iw), | |
| int(bboxC.ymin * ih), | |
| int(bboxC.width * iw), | |
| int(bboxC.height * ih), | |
| ) | |
| # Extract the region of interest (the face) | |
| face = frame[y : y + h, x : x + w] | |
| # Convert the face to a PIL image | |
| face_pil = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB)) | |
| # Apply transformations | |
| face_tensor = transform(face_pil).unsqueeze(0).to(device) | |
| # Pass the face through the neural network | |
| with torch.no_grad(): | |
| output = model(face_tensor) | |
| _, predicted = torch.max(output, 1) | |
| label_dict = { | |
| 0: "angry", | |
| 1: "disgust", | |
| 2: "fear", | |
| 3: "happy", | |
| 4: "neutral", | |
| 5: "sad", | |
| 6: "surprised", | |
| } | |
| # Draw a rectangle around the face and label it with the prediction | |
| cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) | |
| label = f"{label_dict[predicted.item()]}" | |
| current_emotion = predicted.item() | |
| if current_emotion != prev_emotion: | |
| current_time = time.time() - begin | |
| if prev_emotion != None: | |
| st.write( | |
| f"Change detected: {label_dict[prev_emotion]} -> {label_dict[current_emotion]} at {current_time}" | |
| ) | |
| change_list.append( | |
| f"Change detected: {label_dict[prev_emotion]} -> {label_dict[current_emotion]} at {current_time}" | |
| ) | |
| if prev_emotion is not None: | |
| mat[current_emotion][prev_emotion] += 1 | |
| prev_emotion = current_emotion | |
| histogram[predicted.item()] += 1 | |
| cv2.putText( | |
| frame, | |
| label, | |
| (x, y - 10), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.9, | |
| (255, 0, 0), | |
| 2, | |
| ) | |
| # Display the resulting frame | |
| st.image(frame, channels="BGR") | |
| # Release the capture and close the windows | |
| cap.release() | |
| end = time.time() | |
| st.write(f"Total runtime of the program is {end - begin}") | |
| # Plot histogram | |
| st.write("Emotion Distribution") | |
| x = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprised"] | |
| y = list(histogram.values()) | |
| total = sum(y) | |
| y_new = [(i / total) * 100 for i in y] | |
| st.bar_chart({"Emotions": x, "Percentage": y_new}) | |
| print(mat) | |
| data = { | |
| "angry": mat[0], | |
| "disgust": mat[1], | |
| "fear": mat[2], | |
| "happy": mat[3], | |
| "neutral": mat[4], | |
| "sad": mat[5], | |
| "surprise": mat[6], | |
| } | |
| st.write("Change Matrix") | |
| st.write("Y - axis -> initial emotion") | |
| st.write("X - axis -> next emotion") | |
| df = pd.DataFrame( | |
| data, | |
| index=["angry", "disgust", "fear", "happy", "neutral", "sad", "surprised"], | |
| ) | |
| st.table(df) | |
| # for i in mat: | |
| # st.write(i[7], i[0:7]) | |
| st.write("Change List") | |
| st.write(change_list) | |
| else: | |
| st.write("Please upload a video file to start emotion detection.") | |