Spaces:

reab5555
/

EmotionTrack

Runtime error

App Files Files Community

reab5555 commited on Jul 25, 2024

Commit

caba546

verified ·

1 Parent(s): d4deb5c

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -4

app.py CHANGED Viewed

@@ -19,13 +19,111 @@ current_model_name = "ViT-B/16"
 # Initialize MTCNN for face detection
 mtcnn = MTCNN(keep_all=True, device=device)
-# Process image function (same as before)
 def process_image(input_image, selected_model):
-    # ... (keep the existing process_image function as is)
-# Process video function (same as before)
 def process_video(input_video, frame_number, selected_model):
-    # ... (keep the existing process_video function as is)
 def update_slider(video):
     if video is None:

 # Initialize MTCNN for face detection
 mtcnn = MTCNN(keep_all=True, device=device)
 def process_image(input_image, selected_model):
+    global model, preprocess, current_model_name
+    try:
+        # Load the selected model if it's different from the current one
+        if selected_model != current_model_name:
+            model, preprocess = clip.load(selected_model, device=device)
+            current_model_name = selected_model
+        # Convert input_image to numpy array
+        cv2_frame = np.array(input_image)
+        cv2_frame = cv2.cvtColor(cv2_frame, cv2.COLOR_RGB2BGR)
+        # Detect faces
+        frame_pil = Image.fromarray(cv2.cvtColor(cv2_frame, cv2.COLOR_BGR2RGB))
+        boxes, _ = mtcnn.detect(frame_pil)
+        # Find the largest face detected
+        largest_face = None
+        if boxes is not None and len(boxes) > 0:
+            largest_face = max(boxes, key=lambda box: (box[2] - box[0]) * (box[3] - box[1]))
+        # Process the largest face
+        if largest_face is not None:
+            x, y, w, h = map(int, largest_face)
+            cv2.rectangle(cv2_frame, (x, y), (w, h), (0, 0, 255), 2)
+            cropped_face = cv2_frame[y:h, x:w]
+            # Convert the cropped face to grayscale
+            frame_gray = cv2.cvtColor(cropped_face, cv2.COLOR_BGR2GRAY)
+            frame_gray_resized = cv2.resize(frame_gray, (160, 160))
+            # Convert the resized grayscale image to a tensor
+            frame_tensor = preprocess(Image.fromarray(frame_gray_resized)).unsqueeze(0).to(device)
+            # Tokenize input labels and prepare for model
+            input_labels = input_labels_X.split(", ")
+            text = clip.tokenize(input_labels).to(device)
+            with torch.no_grad():
+                # Encode the frame and text
+                image_features = model.encode_image(frame_tensor)
+                text_features = model.encode_text(text)
+                # Calculate logit
+                logit_per_image, logit_per_text = model(frame_tensor, text)
+                # Apply softmax to convert logits to probabilities
+                probabilities = F.softmax(logit_per_image[0], dim=0)
+            # Combine labels with probabilities and sort
+            combined_labels_probs = list(zip(input_labels, probabilities.tolist()))
+            combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
+            top_five_labels_probs = combined_labels_probs[:5]
+            # Prepare results
+            results = []
+            for label, prob in top_five_labels_probs:
+                results.append(f"{label.strip()}: {prob * 100:.1f}%")
+            # Draw results on the image
+            for idx, result in enumerate(results):
+                cv2.putText(cv2_frame, result, (10, 30 + idx * 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+            # Convert back to RGB for display
+            output_image = cv2.cvtColor(cv2_frame, cv2.COLOR_BGR2RGB)
+            # Create a bar plot with different colors
+            labels, probs = zip(*top_five_labels_probs)
+            fig, ax = plt.subplots(figsize=(10, 6))
+            colors = list(mcolors.TABLEAU_COLORS.values())[:5]  # Get 5 distinct colors
+            ax.barh(labels, probs, color=colors)
+            ax.set_xlabel('Probability')
+            ax.set_title('Top Emotion Probabilities')
+            ax.set_xlim(0, max(probs) * 1.1)  # Set x-axis limit to slightly larger than max probability
+            plt.tight_layout()
+            return output_image, "\n".join(results), fig
+        else:
+            return cv2_frame, "No face detected", None
+    except Exception as e:
+        return None, f"An error occurred: {str(e)}", None
 def process_video(input_video, frame_number, selected_model):
+    try:
+        # Load the video
+        cap = cv2.VideoCapture(input_video)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        if frame_number >= total_frames:
+            return None, "Frame number exceeds total frames in the video", None
+        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
+        ret, frame = cap.read()
+        if not ret:
+            return None, "Error reading the frame", None
+        frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        processed_frame, results, fig = process_image(frame_pil, selected_model)
+        cap.release()
+        return processed_frame, results, fig
+    except Exception as e:
+        return None, f"An error occurred: {str(e)}", None
 def update_slider(video):
     if video is None: