Spaces:

molecularmax
/

guard-robustness-web-demo

Runtime error

molecularmax commited on Sep 4, 2025

Commit

d6a6f38

1 Parent(s): 1c72189

Remove SCRFD and OpenCV DNN models for reliable HF Spaces deployment

- Remove SCRFD model initialization that fails on external downloads
- Remove OpenCV DNN model requiring manual file uploads
- Simplify to 4 reliable models: MediaPipe, MTCNN, RetinaFace, YOLOv8
- Update UI to show only available models with proper color coding
- Reduce slider maximum to 4 detectors for agreement threshold
- Update documentation and README to reflect streamlined model set
- Ensure robust deployment on Hugging Face Spaces without download failures

Files changed (2) hide show

README.md +1 -1
app.py +11 -119

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ pinned: false
 Compare multiple face detection models and evaluate their robustness with selective model execution.
 Features:
-- 6 face detection models (MediaPipe, MTCNN, RetinaFace, SCRFD, YOLOv8, OpenCV DNN)
 - Model selection checkboxes for customized comparisons
 - Ground truth evaluation with precision/recall metrics
 - Consensus scoring and agreement visualization

 Compare multiple face detection models and evaluate their robustness with selective model execution.
 Features:
+- 4 reliable face detection models (MediaPipe, MTCNN, RetinaFace, YOLOv8)
 - Model selection checkboxes for customized comparisons
 - Ground truth evaluation with precision/recall metrics
 - Consensus scoring and agreement visualization

app.py CHANGED Viewed

@@ -24,32 +24,7 @@ mtcnn = MTCNN(keep_all=True, device=device, min_face_size=20)
 face_app = FaceAnalysis(allowed_modules=['detection'], providers=['CPUExecutionProvider'])
 face_app.prepare(ctx_id=0, det_size=(640, 640))
-# Initialize SCRFD face detector
-try:
-    scrfd_app = FaceAnalysis(name='scrfd_10g_bnkps', providers=['CPUExecutionProvider'])
-    scrfd_app.prepare(ctx_id=0, det_size=(640, 640))
-    print("Loaded SCRFD face detector")
-except Exception as e:
-    scrfd_app = None
-    print(f"SCRFD model initialization failed: {str(e)}. SCRFD detection will be disabled.")
-# Initialize OpenCV DNN face detector
-try:
-    # Download model files if they don't exist
-    opencv_face_net = None
-    opencv_proto_path = 'opencv_face_detector.pbtxt'
-    opencv_model_path = 'opencv_face_detector_uint8.pb'
-    # URLs for OpenCV face detection model
-    if not (os.path.exists(opencv_proto_path) and os.path.exists(opencv_model_path)):
-        print("OpenCV face detection model files not found. DNN detection will be disabled.")
-        opencv_face_net = None
-    else:
-        opencv_face_net = cv2.dnn.readNetFromTensorflow(opencv_model_path, opencv_proto_path)
-        print("Loaded OpenCV DNN face detector")
-except Exception as e:
-    opencv_face_net = None
-    print(f"OpenCV DNN model initialization failed: {str(e)}")
 # Global variable for YOLO face mode
 yolo_face_mode = False
@@ -77,9 +52,7 @@ face_detectors = {
     "MediaPipe": "mediapipe",
     "MTCNN": "mtcnn",
     "RetinaFace": "retinaface",
-    "SCRFD": "scrfd",
-    "YOLOv8": "yolo",
-    "OpenCV DNN": "opencv_dnn"
 }
 def create_detection_legend():
@@ -281,75 +254,6 @@ def detect_faces_yolo(image, confidence_threshold=0.5):
     return boxes, confidences
-def detect_faces_scrfd(image, confidence_threshold=0.5):
-    """Detect faces using SCRFD via InsightFace with confidence scores."""
-    if scrfd_app is None:
-        return [], []
-    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    try:
-        # Detect faces
-        faces = scrfd_app.get(rgb_image)
-        # Convert to [x, y, w, h] format and extract confidence
-        result_boxes = []
-        result_confidences = []
-        for face in faces:
-            # Get confidence score (det_score)
-            confidence = face.det_score
-            if confidence >= confidence_threshold:
-                bbox = face.bbox.astype(int)
-                x1, y1, x2, y2 = bbox
-                w = x2 - x1
-                h = y2 - y1
-                result_boxes.append([x1, y1, w, h])
-                result_confidences.append(float(confidence))
-        return result_boxes, result_confidences
-    except Exception as e:
-        print(f"SCRFD detection error: {str(e)}")
-        return [], []
-def detect_faces_opencv_dnn(image, confidence_threshold=0.5):
-    """Detect faces using OpenCV DNN with confidence scores."""
-    if opencv_face_net is None:
-        return [], []
-    try:
-        h, w = image.shape[:2]
-        # Create blob from image
-        blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123])
-        opencv_face_net.setInput(blob)
-        detections = opencv_face_net.forward()
-        boxes = []
-        confidences = []
-        for i in range(detections.shape[2]):
-            confidence = detections[0, 0, i, 2]
-            if confidence > confidence_threshold:
-                # Get bounding box coordinates
-                x1 = int(detections[0, 0, i, 3] * w)
-                y1 = int(detections[0, 0, i, 4] * h)
-                x2 = int(detections[0, 0, i, 5] * w)
-                y2 = int(detections[0, 0, i, 6] * h)
-                # Convert to [x, y, w, h] format
-                box_w = x2 - x1
-                box_h = y2 - y1
-                # Ensure valid box dimensions
-                if box_w > 0 and box_h > 0:
-                    boxes.append([x1, y1, box_w, box_h])
-                    confidences.append(float(confidence))
-        return boxes, confidences
-    except Exception as e:
-        print(f"OpenCV DNN detection error: {str(e)}")
-        return [], []
 def draw_ground_truth(image, ground_truth_boxes):
     """Draw ground truth boxes on image in cyan with enhanced visualization."""
@@ -623,8 +527,8 @@ def create_comparison_grid(images_dict, max_cols=3):
     return grid
 def process_image(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
-                 mediapipe_enabled, mtcnn_enabled, retinaface_enabled, scrfd_enabled, yolo_enabled, opencv_dnn_enabled):
-    """Process the image with selected face detectors and provide enhanced metrics."""
     if image is None:
         return None, "No image uploaded", None, None, "❌ REJECTED: No image provided", None, None, None, None
@@ -662,18 +566,14 @@ def process_image(image, min_detector_agreement, ground_truth_str, quality_check
         "MediaPipe": mediapipe_enabled,
         "MTCNN": mtcnn_enabled,
         "RetinaFace": retinaface_enabled,
-        "SCRFD": scrfd_enabled,
-        "YOLOv8": yolo_enabled,
-        "OpenCV DNN": opencv_dnn_enabled
     }
     detector_func_map = {
         "MediaPipe": detect_faces_mediapipe,
         "MTCNN": detect_faces_mtcnn,
         "RetinaFace": detect_faces_retinaface,
-        "SCRFD": detect_faces_scrfd,
-        "YOLOv8": detect_faces_yolo,
-        "OpenCV DNN": detect_faces_opencv_dnn
     }
     # Only include enabled detectors
@@ -718,9 +618,7 @@ def process_image(image, min_detector_agreement, ground_truth_str, quality_check
         "MediaPipe": (0, 255, 0),      # Green
         "MTCNN": (0, 0, 255),          # Red
         "RetinaFace": (255, 255, 0),   # Yellow
-        "SCRFD": (255, 128, 0),        # Orange
-        "YOLOv8": (255, 0, 255),       # Magenta
-        "OpenCV DNN": (128, 0, 255)    # Purple
     }
     # Draw all detections with colored boxes and better visibility
@@ -970,13 +868,10 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
                     mediapipe_enabled = gr.Checkbox(value=True, label="MediaPipe", scale=1)
                     mtcnn_enabled = gr.Checkbox(value=True, label="MTCNN", scale=1)
                     retinaface_enabled = gr.Checkbox(value=True, label="RetinaFace", scale=1)
-                with gr.Row():
-                    scrfd_enabled = gr.Checkbox(value=scrfd_app is not None, label="SCRFD", scale=1, interactive=scrfd_app is not None)
                     yolo_enabled = gr.Checkbox(value=True, label="YOLOv8", scale=1)
-                    opencv_dnn_enabled = gr.Checkbox(value=opencv_face_net is not None, label="OpenCV DNN", scale=1, interactive=opencv_face_net is not None)
                 min_detector_agreement = gr.Slider(
-                    minimum=1, maximum=6, value=2, step=1,
                     label="Minimum Detector Agreement",
                     info="Number of detectors that must agree on a face"
                 )
@@ -1038,20 +933,18 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
     gr.Markdown("""
     ---
     ### 📖 About
-    This demo compares six popular face detection models:
     - **MediaPipe**: Google's lightweight face detection
     - **MTCNN**: Multi-task Cascaded CNNs
     - **RetinaFace**: State-of-the-art face detection via InsightFace
-    - **SCRFD**: Fast and accurate face detection via InsightFace
     - **YOLOv8**: Latest YOLO architecture adapted for face detection
-    - **OpenCV DNN**: Traditional deep learning approach with pre-trained models
     Select which models to run and compare their performance. The consensus view shows faces colored by detector agreement level.
     """)
     # Process button handler
     def process_handler(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
-                       mediapipe_enabled, mtcnn_enabled, retinaface_enabled, scrfd_enabled, yolo_enabled, opencv_dnn_enabled):
         if image is None:
             legend_img = create_detection_legend()
             return [None, "No image selected", None, None,
@@ -1060,8 +953,7 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
         try:
             return process_image(image, min_detector_agreement, ground_truth_str,
                                quality_check, confidence_threshold,
-                               mediapipe_enabled, mtcnn_enabled, retinaface_enabled,
-                               scrfd_enabled, yolo_enabled, opencv_dnn_enabled)
         except Exception as e:
             print(f"Error processing image: {str(e)}")
             legend_img = create_detection_legend()
@@ -1071,7 +963,7 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
     submit_btn.click(
         fn=process_handler,
         inputs=[input_image, min_detector_agreement, ground_truth, quality_check, confidence_threshold,
-               mediapipe_enabled, mtcnn_enabled, retinaface_enabled, scrfd_enabled, yolo_enabled, opencv_dnn_enabled],
         outputs=[output_image, metrics_text, consensus_image, original_image,
                 verdict_box, detector_status, ground_truth_image, legend_image, comparison_grid]
     )

 face_app = FaceAnalysis(allowed_modules=['detection'], providers=['CPUExecutionProvider'])
 face_app.prepare(ctx_id=0, det_size=(640, 640))
 # Global variable for YOLO face mode
 yolo_face_mode = False
     "MediaPipe": "mediapipe",
     "MTCNN": "mtcnn",
     "RetinaFace": "retinaface",
+    "YOLOv8": "yolo"
 }
 def create_detection_legend():
     return boxes, confidences
 def draw_ground_truth(image, ground_truth_boxes):
     """Draw ground truth boxes on image in cyan with enhanced visualization."""
     return grid
 def process_image(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
+                 mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled):
+    """Process the image with selected face detectors and provide enhanced metrics.""
     if image is None:
         return None, "No image uploaded", None, None, "❌ REJECTED: No image provided", None, None, None, None
         "MediaPipe": mediapipe_enabled,
         "MTCNN": mtcnn_enabled,
         "RetinaFace": retinaface_enabled,
+        "YOLOv8": yolo_enabled
     }
     detector_func_map = {
         "MediaPipe": detect_faces_mediapipe,
         "MTCNN": detect_faces_mtcnn,
         "RetinaFace": detect_faces_retinaface,
+        "YOLOv8": detect_faces_yolo
     }
     # Only include enabled detectors
         "MediaPipe": (0, 255, 0),      # Green
         "MTCNN": (0, 0, 255),          # Red
         "RetinaFace": (255, 255, 0),   # Yellow
+        "YOLOv8": (255, 0, 255)        # Magenta
     }
     # Draw all detections with colored boxes and better visibility
                     mediapipe_enabled = gr.Checkbox(value=True, label="MediaPipe", scale=1)
                     mtcnn_enabled = gr.Checkbox(value=True, label="MTCNN", scale=1)
                     retinaface_enabled = gr.Checkbox(value=True, label="RetinaFace", scale=1)
                     yolo_enabled = gr.Checkbox(value=True, label="YOLOv8", scale=1)
                 min_detector_agreement = gr.Slider(
+                    minimum=1, maximum=4, value=2, step=1,
                     label="Minimum Detector Agreement",
                     info="Number of detectors that must agree on a face"
                 )
     gr.Markdown("""
     ---
     ### 📖 About
+    This demo compares four reliable face detection models:
     - **MediaPipe**: Google's lightweight face detection
     - **MTCNN**: Multi-task Cascaded CNNs
     - **RetinaFace**: State-of-the-art face detection via InsightFace
     - **YOLOv8**: Latest YOLO architecture adapted for face detection
     Select which models to run and compare their performance. The consensus view shows faces colored by detector agreement level.
     """)
     # Process button handler
     def process_handler(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
+                       mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled):
         if image is None:
             legend_img = create_detection_legend()
             return [None, "No image selected", None, None,
         try:
             return process_image(image, min_detector_agreement, ground_truth_str,
                                quality_check, confidence_threshold,
+                               mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled)
         except Exception as e:
             print(f"Error processing image: {str(e)}")
             legend_img = create_detection_legend()
     submit_btn.click(
         fn=process_handler,
         inputs=[input_image, min_detector_agreement, ground_truth, quality_check, confidence_threshold,
+               mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled],
         outputs=[output_image, metrics_text, consensus_image, original_image,
                 verdict_box, detector_status, ground_truth_image, legend_image, comparison_grid]
     )