winno
/

video-analysis

Model card Files Files and versions

xet

Community

Matb09 commited on Jul 8, 2025

Commit

b1f721e

1 Parent(s): 7586773

k

Browse files

Files changed (1) hide show

handler.py +32 -6

handler.py CHANGED Viewed

@@ -154,6 +154,14 @@ class EndpointHandler:
     def _detect_faces_and_emotions(self, video_path: str) -> Dict:
         emotions_data = []
         output_video_path = None
         try:
             # Create a temporary file for the output video
             with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_out_video:
@@ -162,6 +170,8 @@ class EndpointHandler:
             original_video = VideoFileClip(video_path)
             cap = cv2.VideoCapture(video_path)
             fps = int(cap.get(cv2.CAP_PROP_FPS))
             w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
             h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
@@ -179,17 +189,33 @@ class EndpointHandler:
                 time_seconds = round(frame_number / fps)
                 result = self.face_detector.detect_emotions(frame)
                 for face in result:
-                    box = face["box"]
                     emotions = face["emotions"]
                     emotions["Time(s)"] = time_seconds
                     emotions_data.append(emotions)
                     cv2.rectangle(frame, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 155, 255), 2)
-                    for i, (emotion, score) in enumerate(emotions.items()):
-                        if emotion == "Time(s)": continue
-                        color = (211, 211, 211) if score < 0.01 else (255, 0, 0)
-                        cv2.putText(frame, f"{emotion}: {score:.2f}", (box[0], box[1] + box[3] + 30 + i * 15),
-                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA)
                 out.write(frame)
                 frame_number += 1

     def _detect_faces_and_emotions(self, video_path: str) -> Dict:
         emotions_data = []
         output_video_path = None
+        # ===================================================================
+        # NEW: Confidence threshold for filtering false positives.
+        # Only faces where at least one emotion has a score > 0.35 will be kept.
+        # You can adjust this value. Higher = stricter filtering. (e.g., 0.40)
+        # ===================================================================
+        CONFIDENCE_THRESHOLD = 0.35
         try:
             # Create a temporary file for the output video
             with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_out_video:
             original_video = VideoFileClip(video_path)
             cap = cv2.VideoCapture(video_path)
             fps = int(cap.get(cv2.CAP_PROP_FPS))
+            if fps == 0:  # Handle potential issue with video metadata
+                fps = 30
             w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
             h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                 time_seconds = round(frame_number / fps)
                 result = self.face_detector.detect_emotions(frame)
+                # Process each face found in the frame
                 for face in result:
+                    # ===================================================================
+                    # NEW: Filtering logic starts here
+                    # ===================================================================
                     emotions = face["emotions"]
+                    max_emotion_score = max(emotions.values())
+                    # If the highest emotion score is below our threshold, skip this face
+                    if max_emotion_score < CONFIDENCE_THRESHOLD:
+                        continue  # Ignore this low-confidence detection
+                    # ===================================================================
+                    # End of new filtering logic. The rest of the loop proceeds as before.
+                    # ===================================================================
+                    box = face["box"]
                     emotions["Time(s)"] = time_seconds
                     emotions_data.append(emotions)
                     cv2.rectangle(frame, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 155, 255), 2)
+                    # Find the dominant emotion to display on the video
+                    dominant_emotion = max(emotions, key=lambda k: emotions[k] if k != 'Time(s)' else -1)
+                    text_to_display = f"{dominant_emotion}: {emotions[dominant_emotion]:.2f}"
+                    cv2.putText(frame, text_to_display, (box[0], box[1] - 10),
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2, cv2.LINE_AA)
                 out.write(frame)
                 frame_number += 1