molecularmax commited on
Commit
d6a6f38
·
1 Parent(s): 1c72189

Remove SCRFD and OpenCV DNN models for reliable HF Spaces deployment

Browse files

- Remove SCRFD model initialization that fails on external downloads
- Remove OpenCV DNN model requiring manual file uploads
- Simplify to 4 reliable models: MediaPipe, MTCNN, RetinaFace, YOLOv8
- Update UI to show only available models with proper color coding
- Reduce slider maximum to 4 detectors for agreement threshold
- Update documentation and README to reflect streamlined model set
- Ensure robust deployment on Hugging Face Spaces without download failures

Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +11 -119
README.md CHANGED
@@ -14,7 +14,7 @@ pinned: false
14
  Compare multiple face detection models and evaluate their robustness with selective model execution.
15
 
16
  Features:
17
- - 6 face detection models (MediaPipe, MTCNN, RetinaFace, SCRFD, YOLOv8, OpenCV DNN)
18
  - Model selection checkboxes for customized comparisons
19
  - Ground truth evaluation with precision/recall metrics
20
  - Consensus scoring and agreement visualization
 
14
  Compare multiple face detection models and evaluate their robustness with selective model execution.
15
 
16
  Features:
17
+ - 4 reliable face detection models (MediaPipe, MTCNN, RetinaFace, YOLOv8)
18
  - Model selection checkboxes for customized comparisons
19
  - Ground truth evaluation with precision/recall metrics
20
  - Consensus scoring and agreement visualization
app.py CHANGED
@@ -24,32 +24,7 @@ mtcnn = MTCNN(keep_all=True, device=device, min_face_size=20)
24
  face_app = FaceAnalysis(allowed_modules=['detection'], providers=['CPUExecutionProvider'])
25
  face_app.prepare(ctx_id=0, det_size=(640, 640))
26
 
27
- # Initialize SCRFD face detector
28
- try:
29
- scrfd_app = FaceAnalysis(name='scrfd_10g_bnkps', providers=['CPUExecutionProvider'])
30
- scrfd_app.prepare(ctx_id=0, det_size=(640, 640))
31
- print("Loaded SCRFD face detector")
32
- except Exception as e:
33
- scrfd_app = None
34
- print(f"SCRFD model initialization failed: {str(e)}. SCRFD detection will be disabled.")
35
 
36
- # Initialize OpenCV DNN face detector
37
- try:
38
- # Download model files if they don't exist
39
- opencv_face_net = None
40
- opencv_proto_path = 'opencv_face_detector.pbtxt'
41
- opencv_model_path = 'opencv_face_detector_uint8.pb'
42
-
43
- # URLs for OpenCV face detection model
44
- if not (os.path.exists(opencv_proto_path) and os.path.exists(opencv_model_path)):
45
- print("OpenCV face detection model files not found. DNN detection will be disabled.")
46
- opencv_face_net = None
47
- else:
48
- opencv_face_net = cv2.dnn.readNetFromTensorflow(opencv_model_path, opencv_proto_path)
49
- print("Loaded OpenCV DNN face detector")
50
- except Exception as e:
51
- opencv_face_net = None
52
- print(f"OpenCV DNN model initialization failed: {str(e)}")
53
 
54
  # Global variable for YOLO face mode
55
  yolo_face_mode = False
@@ -77,9 +52,7 @@ face_detectors = {
77
  "MediaPipe": "mediapipe",
78
  "MTCNN": "mtcnn",
79
  "RetinaFace": "retinaface",
80
- "SCRFD": "scrfd",
81
- "YOLOv8": "yolo",
82
- "OpenCV DNN": "opencv_dnn"
83
  }
84
 
85
  def create_detection_legend():
@@ -281,75 +254,6 @@ def detect_faces_yolo(image, confidence_threshold=0.5):
281
 
282
  return boxes, confidences
283
 
284
- def detect_faces_scrfd(image, confidence_threshold=0.5):
285
- """Detect faces using SCRFD via InsightFace with confidence scores."""
286
- if scrfd_app is None:
287
- return [], []
288
-
289
- rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
290
-
291
- try:
292
- # Detect faces
293
- faces = scrfd_app.get(rgb_image)
294
-
295
- # Convert to [x, y, w, h] format and extract confidence
296
- result_boxes = []
297
- result_confidences = []
298
- for face in faces:
299
- # Get confidence score (det_score)
300
- confidence = face.det_score
301
- if confidence >= confidence_threshold:
302
- bbox = face.bbox.astype(int)
303
- x1, y1, x2, y2 = bbox
304
- w = x2 - x1
305
- h = y2 - y1
306
- result_boxes.append([x1, y1, w, h])
307
- result_confidences.append(float(confidence))
308
-
309
- return result_boxes, result_confidences
310
- except Exception as e:
311
- print(f"SCRFD detection error: {str(e)}")
312
- return [], []
313
-
314
- def detect_faces_opencv_dnn(image, confidence_threshold=0.5):
315
- """Detect faces using OpenCV DNN with confidence scores."""
316
- if opencv_face_net is None:
317
- return [], []
318
-
319
- try:
320
- h, w = image.shape[:2]
321
-
322
- # Create blob from image
323
- blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123])
324
- opencv_face_net.setInput(blob)
325
- detections = opencv_face_net.forward()
326
-
327
- boxes = []
328
- confidences = []
329
-
330
- for i in range(detections.shape[2]):
331
- confidence = detections[0, 0, i, 2]
332
-
333
- if confidence > confidence_threshold:
334
- # Get bounding box coordinates
335
- x1 = int(detections[0, 0, i, 3] * w)
336
- y1 = int(detections[0, 0, i, 4] * h)
337
- x2 = int(detections[0, 0, i, 5] * w)
338
- y2 = int(detections[0, 0, i, 6] * h)
339
-
340
- # Convert to [x, y, w, h] format
341
- box_w = x2 - x1
342
- box_h = y2 - y1
343
-
344
- # Ensure valid box dimensions
345
- if box_w > 0 and box_h > 0:
346
- boxes.append([x1, y1, box_w, box_h])
347
- confidences.append(float(confidence))
348
-
349
- return boxes, confidences
350
- except Exception as e:
351
- print(f"OpenCV DNN detection error: {str(e)}")
352
- return [], []
353
 
354
  def draw_ground_truth(image, ground_truth_boxes):
355
  """Draw ground truth boxes on image in cyan with enhanced visualization."""
@@ -623,8 +527,8 @@ def create_comparison_grid(images_dict, max_cols=3):
623
  return grid
624
 
625
  def process_image(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
626
- mediapipe_enabled, mtcnn_enabled, retinaface_enabled, scrfd_enabled, yolo_enabled, opencv_dnn_enabled):
627
- """Process the image with selected face detectors and provide enhanced metrics."""
628
  if image is None:
629
  return None, "No image uploaded", None, None, "❌ REJECTED: No image provided", None, None, None, None
630
 
@@ -662,18 +566,14 @@ def process_image(image, min_detector_agreement, ground_truth_str, quality_check
662
  "MediaPipe": mediapipe_enabled,
663
  "MTCNN": mtcnn_enabled,
664
  "RetinaFace": retinaface_enabled,
665
- "SCRFD": scrfd_enabled,
666
- "YOLOv8": yolo_enabled,
667
- "OpenCV DNN": opencv_dnn_enabled
668
  }
669
 
670
  detector_func_map = {
671
  "MediaPipe": detect_faces_mediapipe,
672
  "MTCNN": detect_faces_mtcnn,
673
  "RetinaFace": detect_faces_retinaface,
674
- "SCRFD": detect_faces_scrfd,
675
- "YOLOv8": detect_faces_yolo,
676
- "OpenCV DNN": detect_faces_opencv_dnn
677
  }
678
 
679
  # Only include enabled detectors
@@ -718,9 +618,7 @@ def process_image(image, min_detector_agreement, ground_truth_str, quality_check
718
  "MediaPipe": (0, 255, 0), # Green
719
  "MTCNN": (0, 0, 255), # Red
720
  "RetinaFace": (255, 255, 0), # Yellow
721
- "SCRFD": (255, 128, 0), # Orange
722
- "YOLOv8": (255, 0, 255), # Magenta
723
- "OpenCV DNN": (128, 0, 255) # Purple
724
  }
725
 
726
  # Draw all detections with colored boxes and better visibility
@@ -970,13 +868,10 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
970
  mediapipe_enabled = gr.Checkbox(value=True, label="MediaPipe", scale=1)
971
  mtcnn_enabled = gr.Checkbox(value=True, label="MTCNN", scale=1)
972
  retinaface_enabled = gr.Checkbox(value=True, label="RetinaFace", scale=1)
973
- with gr.Row():
974
- scrfd_enabled = gr.Checkbox(value=scrfd_app is not None, label="SCRFD", scale=1, interactive=scrfd_app is not None)
975
  yolo_enabled = gr.Checkbox(value=True, label="YOLOv8", scale=1)
976
- opencv_dnn_enabled = gr.Checkbox(value=opencv_face_net is not None, label="OpenCV DNN", scale=1, interactive=opencv_face_net is not None)
977
 
978
  min_detector_agreement = gr.Slider(
979
- minimum=1, maximum=6, value=2, step=1,
980
  label="Minimum Detector Agreement",
981
  info="Number of detectors that must agree on a face"
982
  )
@@ -1038,20 +933,18 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
1038
  gr.Markdown("""
1039
  ---
1040
  ### 📖 About
1041
- This demo compares six popular face detection models:
1042
  - **MediaPipe**: Google's lightweight face detection
1043
  - **MTCNN**: Multi-task Cascaded CNNs
1044
  - **RetinaFace**: State-of-the-art face detection via InsightFace
1045
- - **SCRFD**: Fast and accurate face detection via InsightFace
1046
  - **YOLOv8**: Latest YOLO architecture adapted for face detection
1047
- - **OpenCV DNN**: Traditional deep learning approach with pre-trained models
1048
 
1049
  Select which models to run and compare their performance. The consensus view shows faces colored by detector agreement level.
1050
  """)
1051
 
1052
  # Process button handler
1053
  def process_handler(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
1054
- mediapipe_enabled, mtcnn_enabled, retinaface_enabled, scrfd_enabled, yolo_enabled, opencv_dnn_enabled):
1055
  if image is None:
1056
  legend_img = create_detection_legend()
1057
  return [None, "No image selected", None, None,
@@ -1060,8 +953,7 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
1060
  try:
1061
  return process_image(image, min_detector_agreement, ground_truth_str,
1062
  quality_check, confidence_threshold,
1063
- mediapipe_enabled, mtcnn_enabled, retinaface_enabled,
1064
- scrfd_enabled, yolo_enabled, opencv_dnn_enabled)
1065
  except Exception as e:
1066
  print(f"Error processing image: {str(e)}")
1067
  legend_img = create_detection_legend()
@@ -1071,7 +963,7 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
1071
  submit_btn.click(
1072
  fn=process_handler,
1073
  inputs=[input_image, min_detector_agreement, ground_truth, quality_check, confidence_threshold,
1074
- mediapipe_enabled, mtcnn_enabled, retinaface_enabled, scrfd_enabled, yolo_enabled, opencv_dnn_enabled],
1075
  outputs=[output_image, metrics_text, consensus_image, original_image,
1076
  verdict_box, detector_status, ground_truth_image, legend_image, comparison_grid]
1077
  )
 
24
  face_app = FaceAnalysis(allowed_modules=['detection'], providers=['CPUExecutionProvider'])
25
  face_app.prepare(ctx_id=0, det_size=(640, 640))
26
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # Global variable for YOLO face mode
30
  yolo_face_mode = False
 
52
  "MediaPipe": "mediapipe",
53
  "MTCNN": "mtcnn",
54
  "RetinaFace": "retinaface",
55
+ "YOLOv8": "yolo"
 
 
56
  }
57
 
58
  def create_detection_legend():
 
254
 
255
  return boxes, confidences
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
  def draw_ground_truth(image, ground_truth_boxes):
259
  """Draw ground truth boxes on image in cyan with enhanced visualization."""
 
527
  return grid
528
 
529
  def process_image(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
530
+ mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled):
531
+ """Process the image with selected face detectors and provide enhanced metrics.""
532
  if image is None:
533
  return None, "No image uploaded", None, None, "❌ REJECTED: No image provided", None, None, None, None
534
 
 
566
  "MediaPipe": mediapipe_enabled,
567
  "MTCNN": mtcnn_enabled,
568
  "RetinaFace": retinaface_enabled,
569
+ "YOLOv8": yolo_enabled
 
 
570
  }
571
 
572
  detector_func_map = {
573
  "MediaPipe": detect_faces_mediapipe,
574
  "MTCNN": detect_faces_mtcnn,
575
  "RetinaFace": detect_faces_retinaface,
576
+ "YOLOv8": detect_faces_yolo
 
 
577
  }
578
 
579
  # Only include enabled detectors
 
618
  "MediaPipe": (0, 255, 0), # Green
619
  "MTCNN": (0, 0, 255), # Red
620
  "RetinaFace": (255, 255, 0), # Yellow
621
+ "YOLOv8": (255, 0, 255) # Magenta
 
 
622
  }
623
 
624
  # Draw all detections with colored boxes and better visibility
 
868
  mediapipe_enabled = gr.Checkbox(value=True, label="MediaPipe", scale=1)
869
  mtcnn_enabled = gr.Checkbox(value=True, label="MTCNN", scale=1)
870
  retinaface_enabled = gr.Checkbox(value=True, label="RetinaFace", scale=1)
 
 
871
  yolo_enabled = gr.Checkbox(value=True, label="YOLOv8", scale=1)
 
872
 
873
  min_detector_agreement = gr.Slider(
874
+ minimum=1, maximum=4, value=2, step=1,
875
  label="Minimum Detector Agreement",
876
  info="Number of detectors that must agree on a face"
877
  )
 
933
  gr.Markdown("""
934
  ---
935
  ### 📖 About
936
+ This demo compares four reliable face detection models:
937
  - **MediaPipe**: Google's lightweight face detection
938
  - **MTCNN**: Multi-task Cascaded CNNs
939
  - **RetinaFace**: State-of-the-art face detection via InsightFace
 
940
  - **YOLOv8**: Latest YOLO architecture adapted for face detection
 
941
 
942
  Select which models to run and compare their performance. The consensus view shows faces colored by detector agreement level.
943
  """)
944
 
945
  # Process button handler
946
  def process_handler(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
947
+ mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled):
948
  if image is None:
949
  legend_img = create_detection_legend()
950
  return [None, "No image selected", None, None,
 
953
  try:
954
  return process_image(image, min_detector_agreement, ground_truth_str,
955
  quality_check, confidence_threshold,
956
+ mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled)
 
957
  except Exception as e:
958
  print(f"Error processing image: {str(e)}")
959
  legend_img = create_detection_legend()
 
963
  submit_btn.click(
964
  fn=process_handler,
965
  inputs=[input_image, min_detector_agreement, ground_truth, quality_check, confidence_threshold,
966
+ mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled],
967
  outputs=[output_image, metrics_text, consensus_image, original_image,
968
  verdict_box, detector_status, ground_truth_image, legend_image, comparison_grid]
969
  )