Spaces:

ALYYAN
/

Emotion-Recognition

Running

App Files Files Community

ALYYAN commited on Sep 8, 2025

Commit

55b3ce5

verified ·

1 Parent(s): f0293fd

Update src/EmotionRecognition/pipeline/hf_predictor.py

Browse files

Files changed (1) hide show

src/EmotionRecognition/pipeline/hf_predictor.py +63 -31

src/EmotionRecognition/pipeline/hf_predictor.py CHANGED Viewed

@@ -20,26 +20,19 @@ class HFPredictor:
         self.stable_prediction = "---"
         print("[PREDICTOR INFO] Predictor initialized successfully.")
-    def process_frame(self, frame):
         """
-        Processes a single frame: flips it for a mirror effect, detects faces,
-        predicts emotions, and draws professional annotations.
         """
-        if frame is None: return frame, {}
-        # --- MIRROR FIX: Flip the frame FIRST! ---
-        # This ensures detection and drawing happen in the same coordinate space the user sees.
-        frame = cv2.flip(frame, 1)
-        annotated_frame = frame.copy()
-        # --- END FIX ---
-        all_probabilities = {}
         faces = self.face_detector.detect_faces(frame)
         for face in faces:
             x, y, width, height = face['box']
-            x, y = max(0, x), max(0, y)
             face_roi = frame[y:y+height, x:x+width]
             if face_roi.size > 0:
@@ -51,27 +44,66 @@ class HFPredictor:
                 probs = torch.nn.functional.softmax(logits, dim=-1)
                 predictions = probs[0].numpy()
                 pred_index = np.argmax(predictions)
-                # Use temporal smoothing for the displayed label
                 confidence = predictions[pred_index]
                 if confidence > self.confidence_threshold:
                     self.recent_predictions.append(pred_index)
-                if self.recent_predictions:
-                    most_common_pred = Counter(self.recent_predictions).most_common(1)[0][0]
-                    self.stable_prediction = self.classes[most_common_pred]
-                # --- PROFESSIONAL DRAWING LOGIC ---
-                GREEN = (0, 255, 0)
-                BLACK = (0, 0, 0)
-                FONT = cv2.FONT_HERSHEY_SIMPLEX
-                text = f"{self.stable_prediction} ({confidence*100:.1f}%)"
-                (text_width, text_height), baseline = cv2.getTextSize(text, FONT, 0.8, 2)
-                cv2.rectangle(annotated_frame, (x, y - text_height - baseline - 10), (x + text_width + 10, y), GREEN, cv2.FILLED)
                 cv2.putText(annotated_frame, text, (x + 5, y - 5), FONT, 0.8, BLACK, 2)
                 cv2.rectangle(annotated_frame, (x, y), (x+width, y+height), GREEN, 3)
-                all_probabilities = {self.classes[i]: float(predictions[i]) for i in range(len(self.classes))}
-        return annotated_frame, all_probabilities

         self.stable_prediction = "---"
         print("[PREDICTOR INFO] Predictor initialized successfully.")
+    def get_probabilities(self, frame):
         """
+        A lightweight function that takes a frame, runs inference,
+        updates the stable prediction, and returns ONLY the probability dictionary.
         """
+        if frame is None:
+            return {}
+        probabilities = {}
         faces = self.face_detector.detect_faces(frame)
         for face in faces:
             x, y, width, height = face['box']
             face_roi = frame[y:y+height, x:x+width]
             if face_roi.size > 0:
                 probs = torch.nn.functional.softmax(logits, dim=-1)
                 predictions = probs[0].numpy()
                 pred_index = np.argmax(predictions)
                 confidence = predictions[pred_index]
                 if confidence > self.confidence_threshold:
                     self.recent_predictions.append(pred_index)
+                probabilities = {self.classes[i]: float(predictions[i]) for i in range(len(self.classes))}
+        return probabilities
+    def annotate_frame(self, frame):
+        """
+        Takes a frame, detects faces, and returns the fully annotated version
+        using the latest stable prediction.
+        """
+        if frame is None: return None
+        annotated_frame = frame.copy()
+        faces = self.face_detector.detect_faces(frame)
+        # We use the 'stable_prediction' which is updated by the high-fps get_probabilities call
+        # This ensures the box text is smooth and consistent.
+        for face in faces:
+            x, y, width, height = face['box']
+            GREEN = (0, 255, 0)
+            BLACK = (0, 0, 0)
+            FONT = cv2.FONT_HERSHEY_SIMPLEX
+            text = self.stable_prediction # Use the smoothed prediction
+            (text_width, text_height), baseline = cv2.getTextSize(text, FONT, 0.8, 2)
+            cv2.rectangle(annotated_frame, (x, y - text_height - baseline - 10), (x + text_width + 10, y), GREEN, cv2.FILLED)
+            cv2.putText(annotated_frame, text, (x + 5, y - 5), FONT, 0.8, BLACK, 2)
+            cv2.rectangle(annotated_frame, (x, y), (x+width, y+height), GREEN, 3)
+        return annotated_frame
+    def process_frame_for_upload(self, frame):
+        """A simple, all-in-one function for static images and videos."""
+        if frame is None: return None, {}
+        annotated_frame = frame.copy()
+        probabilities = {}
+        faces = self.face_detector.detect_faces(frame)
+        for face in faces:
+            x, y, width, height = face['box']
+            face_roi = frame[y:y+height, x:x+width]
+            if face_roi.size > 0:
+                pil_image = Image.fromarray(face_roi)
+                inputs = self.processor(images=pil_image, return_tensors="pt")
+                with torch.no_grad():
+                    logits = self.model(**inputs).logits
+                probs = torch.nn.functional.softmax(logits, dim=-1)
+                predictions = probs[0].numpy()
+                pred_index = np.argmax(predictions)
+                emotion = self.classes[pred_index]
+                confidence = predictions[pred_index]
+                text = f"{emotion} ({confidence*100:.1f}%)"
+                # (Drawing logic is duplicated here for simplicity)
+                GREEN = (0, 255, 0); BLACK = (0, 0, 0); FONT = cv2.FONT_HERSHEY_SIMPLEX
+                (tw, th), bl = cv2.getTextSize(text, FONT, 0.8, 2)
+                cv2.rectangle(annotated_frame, (x, y-th-bl-10), (x+tw+10, y), GREEN, cv2.FILLED)
                 cv2.putText(annotated_frame, text, (x + 5, y - 5), FONT, 0.8, BLACK, 2)
                 cv2.rectangle(annotated_frame, (x, y), (x+width, y+height), GREEN, 3)
+                probabilities = {self.classes[i]: float(predictions[i]) for i in range(len(self.classes))}
+        return annotated_frame, probabilities