Spaces:
Runtime error
Runtime error
Commit ·
d6a6f38
1
Parent(s): 1c72189
Remove SCRFD and OpenCV DNN models for reliable HF Spaces deployment
Browse files- Remove SCRFD model initialization that fails on external downloads
- Remove OpenCV DNN model requiring manual file uploads
- Simplify to 4 reliable models: MediaPipe, MTCNN, RetinaFace, YOLOv8
- Update UI to show only available models with proper color coding
- Reduce slider maximum to 4 detectors for agreement threshold
- Update documentation and README to reflect streamlined model set
- Ensure robust deployment on Hugging Face Spaces without download failures
README.md
CHANGED
|
@@ -14,7 +14,7 @@ pinned: false
|
|
| 14 |
Compare multiple face detection models and evaluate their robustness with selective model execution.
|
| 15 |
|
| 16 |
Features:
|
| 17 |
-
-
|
| 18 |
- Model selection checkboxes for customized comparisons
|
| 19 |
- Ground truth evaluation with precision/recall metrics
|
| 20 |
- Consensus scoring and agreement visualization
|
|
|
|
| 14 |
Compare multiple face detection models and evaluate their robustness with selective model execution.
|
| 15 |
|
| 16 |
Features:
|
| 17 |
+
- 4 reliable face detection models (MediaPipe, MTCNN, RetinaFace, YOLOv8)
|
| 18 |
- Model selection checkboxes for customized comparisons
|
| 19 |
- Ground truth evaluation with precision/recall metrics
|
| 20 |
- Consensus scoring and agreement visualization
|
app.py
CHANGED
|
@@ -24,32 +24,7 @@ mtcnn = MTCNN(keep_all=True, device=device, min_face_size=20)
|
|
| 24 |
face_app = FaceAnalysis(allowed_modules=['detection'], providers=['CPUExecutionProvider'])
|
| 25 |
face_app.prepare(ctx_id=0, det_size=(640, 640))
|
| 26 |
|
| 27 |
-
# Initialize SCRFD face detector
|
| 28 |
-
try:
|
| 29 |
-
scrfd_app = FaceAnalysis(name='scrfd_10g_bnkps', providers=['CPUExecutionProvider'])
|
| 30 |
-
scrfd_app.prepare(ctx_id=0, det_size=(640, 640))
|
| 31 |
-
print("Loaded SCRFD face detector")
|
| 32 |
-
except Exception as e:
|
| 33 |
-
scrfd_app = None
|
| 34 |
-
print(f"SCRFD model initialization failed: {str(e)}. SCRFD detection will be disabled.")
|
| 35 |
|
| 36 |
-
# Initialize OpenCV DNN face detector
|
| 37 |
-
try:
|
| 38 |
-
# Download model files if they don't exist
|
| 39 |
-
opencv_face_net = None
|
| 40 |
-
opencv_proto_path = 'opencv_face_detector.pbtxt'
|
| 41 |
-
opencv_model_path = 'opencv_face_detector_uint8.pb'
|
| 42 |
-
|
| 43 |
-
# URLs for OpenCV face detection model
|
| 44 |
-
if not (os.path.exists(opencv_proto_path) and os.path.exists(opencv_model_path)):
|
| 45 |
-
print("OpenCV face detection model files not found. DNN detection will be disabled.")
|
| 46 |
-
opencv_face_net = None
|
| 47 |
-
else:
|
| 48 |
-
opencv_face_net = cv2.dnn.readNetFromTensorflow(opencv_model_path, opencv_proto_path)
|
| 49 |
-
print("Loaded OpenCV DNN face detector")
|
| 50 |
-
except Exception as e:
|
| 51 |
-
opencv_face_net = None
|
| 52 |
-
print(f"OpenCV DNN model initialization failed: {str(e)}")
|
| 53 |
|
| 54 |
# Global variable for YOLO face mode
|
| 55 |
yolo_face_mode = False
|
|
@@ -77,9 +52,7 @@ face_detectors = {
|
|
| 77 |
"MediaPipe": "mediapipe",
|
| 78 |
"MTCNN": "mtcnn",
|
| 79 |
"RetinaFace": "retinaface",
|
| 80 |
-
"
|
| 81 |
-
"YOLOv8": "yolo",
|
| 82 |
-
"OpenCV DNN": "opencv_dnn"
|
| 83 |
}
|
| 84 |
|
| 85 |
def create_detection_legend():
|
|
@@ -281,75 +254,6 @@ def detect_faces_yolo(image, confidence_threshold=0.5):
|
|
| 281 |
|
| 282 |
return boxes, confidences
|
| 283 |
|
| 284 |
-
def detect_faces_scrfd(image, confidence_threshold=0.5):
|
| 285 |
-
"""Detect faces using SCRFD via InsightFace with confidence scores."""
|
| 286 |
-
if scrfd_app is None:
|
| 287 |
-
return [], []
|
| 288 |
-
|
| 289 |
-
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 290 |
-
|
| 291 |
-
try:
|
| 292 |
-
# Detect faces
|
| 293 |
-
faces = scrfd_app.get(rgb_image)
|
| 294 |
-
|
| 295 |
-
# Convert to [x, y, w, h] format and extract confidence
|
| 296 |
-
result_boxes = []
|
| 297 |
-
result_confidences = []
|
| 298 |
-
for face in faces:
|
| 299 |
-
# Get confidence score (det_score)
|
| 300 |
-
confidence = face.det_score
|
| 301 |
-
if confidence >= confidence_threshold:
|
| 302 |
-
bbox = face.bbox.astype(int)
|
| 303 |
-
x1, y1, x2, y2 = bbox
|
| 304 |
-
w = x2 - x1
|
| 305 |
-
h = y2 - y1
|
| 306 |
-
result_boxes.append([x1, y1, w, h])
|
| 307 |
-
result_confidences.append(float(confidence))
|
| 308 |
-
|
| 309 |
-
return result_boxes, result_confidences
|
| 310 |
-
except Exception as e:
|
| 311 |
-
print(f"SCRFD detection error: {str(e)}")
|
| 312 |
-
return [], []
|
| 313 |
-
|
| 314 |
-
def detect_faces_opencv_dnn(image, confidence_threshold=0.5):
|
| 315 |
-
"""Detect faces using OpenCV DNN with confidence scores."""
|
| 316 |
-
if opencv_face_net is None:
|
| 317 |
-
return [], []
|
| 318 |
-
|
| 319 |
-
try:
|
| 320 |
-
h, w = image.shape[:2]
|
| 321 |
-
|
| 322 |
-
# Create blob from image
|
| 323 |
-
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123])
|
| 324 |
-
opencv_face_net.setInput(blob)
|
| 325 |
-
detections = opencv_face_net.forward()
|
| 326 |
-
|
| 327 |
-
boxes = []
|
| 328 |
-
confidences = []
|
| 329 |
-
|
| 330 |
-
for i in range(detections.shape[2]):
|
| 331 |
-
confidence = detections[0, 0, i, 2]
|
| 332 |
-
|
| 333 |
-
if confidence > confidence_threshold:
|
| 334 |
-
# Get bounding box coordinates
|
| 335 |
-
x1 = int(detections[0, 0, i, 3] * w)
|
| 336 |
-
y1 = int(detections[0, 0, i, 4] * h)
|
| 337 |
-
x2 = int(detections[0, 0, i, 5] * w)
|
| 338 |
-
y2 = int(detections[0, 0, i, 6] * h)
|
| 339 |
-
|
| 340 |
-
# Convert to [x, y, w, h] format
|
| 341 |
-
box_w = x2 - x1
|
| 342 |
-
box_h = y2 - y1
|
| 343 |
-
|
| 344 |
-
# Ensure valid box dimensions
|
| 345 |
-
if box_w > 0 and box_h > 0:
|
| 346 |
-
boxes.append([x1, y1, box_w, box_h])
|
| 347 |
-
confidences.append(float(confidence))
|
| 348 |
-
|
| 349 |
-
return boxes, confidences
|
| 350 |
-
except Exception as e:
|
| 351 |
-
print(f"OpenCV DNN detection error: {str(e)}")
|
| 352 |
-
return [], []
|
| 353 |
|
| 354 |
def draw_ground_truth(image, ground_truth_boxes):
|
| 355 |
"""Draw ground truth boxes on image in cyan with enhanced visualization."""
|
|
@@ -623,8 +527,8 @@ def create_comparison_grid(images_dict, max_cols=3):
|
|
| 623 |
return grid
|
| 624 |
|
| 625 |
def process_image(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
|
| 626 |
-
mediapipe_enabled, mtcnn_enabled, retinaface_enabled,
|
| 627 |
-
"""Process the image with selected face detectors and provide enhanced metrics.""
|
| 628 |
if image is None:
|
| 629 |
return None, "No image uploaded", None, None, "❌ REJECTED: No image provided", None, None, None, None
|
| 630 |
|
|
@@ -662,18 +566,14 @@ def process_image(image, min_detector_agreement, ground_truth_str, quality_check
|
|
| 662 |
"MediaPipe": mediapipe_enabled,
|
| 663 |
"MTCNN": mtcnn_enabled,
|
| 664 |
"RetinaFace": retinaface_enabled,
|
| 665 |
-
"
|
| 666 |
-
"YOLOv8": yolo_enabled,
|
| 667 |
-
"OpenCV DNN": opencv_dnn_enabled
|
| 668 |
}
|
| 669 |
|
| 670 |
detector_func_map = {
|
| 671 |
"MediaPipe": detect_faces_mediapipe,
|
| 672 |
"MTCNN": detect_faces_mtcnn,
|
| 673 |
"RetinaFace": detect_faces_retinaface,
|
| 674 |
-
"
|
| 675 |
-
"YOLOv8": detect_faces_yolo,
|
| 676 |
-
"OpenCV DNN": detect_faces_opencv_dnn
|
| 677 |
}
|
| 678 |
|
| 679 |
# Only include enabled detectors
|
|
@@ -718,9 +618,7 @@ def process_image(image, min_detector_agreement, ground_truth_str, quality_check
|
|
| 718 |
"MediaPipe": (0, 255, 0), # Green
|
| 719 |
"MTCNN": (0, 0, 255), # Red
|
| 720 |
"RetinaFace": (255, 255, 0), # Yellow
|
| 721 |
-
"
|
| 722 |
-
"YOLOv8": (255, 0, 255), # Magenta
|
| 723 |
-
"OpenCV DNN": (128, 0, 255) # Purple
|
| 724 |
}
|
| 725 |
|
| 726 |
# Draw all detections with colored boxes and better visibility
|
|
@@ -970,13 +868,10 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
|
|
| 970 |
mediapipe_enabled = gr.Checkbox(value=True, label="MediaPipe", scale=1)
|
| 971 |
mtcnn_enabled = gr.Checkbox(value=True, label="MTCNN", scale=1)
|
| 972 |
retinaface_enabled = gr.Checkbox(value=True, label="RetinaFace", scale=1)
|
| 973 |
-
with gr.Row():
|
| 974 |
-
scrfd_enabled = gr.Checkbox(value=scrfd_app is not None, label="SCRFD", scale=1, interactive=scrfd_app is not None)
|
| 975 |
yolo_enabled = gr.Checkbox(value=True, label="YOLOv8", scale=1)
|
| 976 |
-
opencv_dnn_enabled = gr.Checkbox(value=opencv_face_net is not None, label="OpenCV DNN", scale=1, interactive=opencv_face_net is not None)
|
| 977 |
|
| 978 |
min_detector_agreement = gr.Slider(
|
| 979 |
-
minimum=1, maximum=
|
| 980 |
label="Minimum Detector Agreement",
|
| 981 |
info="Number of detectors that must agree on a face"
|
| 982 |
)
|
|
@@ -1038,20 +933,18 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
|
|
| 1038 |
gr.Markdown("""
|
| 1039 |
---
|
| 1040 |
### 📖 About
|
| 1041 |
-
This demo compares
|
| 1042 |
- **MediaPipe**: Google's lightweight face detection
|
| 1043 |
- **MTCNN**: Multi-task Cascaded CNNs
|
| 1044 |
- **RetinaFace**: State-of-the-art face detection via InsightFace
|
| 1045 |
-
- **SCRFD**: Fast and accurate face detection via InsightFace
|
| 1046 |
- **YOLOv8**: Latest YOLO architecture adapted for face detection
|
| 1047 |
-
- **OpenCV DNN**: Traditional deep learning approach with pre-trained models
|
| 1048 |
|
| 1049 |
Select which models to run and compare their performance. The consensus view shows faces colored by detector agreement level.
|
| 1050 |
""")
|
| 1051 |
|
| 1052 |
# Process button handler
|
| 1053 |
def process_handler(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
|
| 1054 |
-
mediapipe_enabled, mtcnn_enabled, retinaface_enabled,
|
| 1055 |
if image is None:
|
| 1056 |
legend_img = create_detection_legend()
|
| 1057 |
return [None, "No image selected", None, None,
|
|
@@ -1060,8 +953,7 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
|
|
| 1060 |
try:
|
| 1061 |
return process_image(image, min_detector_agreement, ground_truth_str,
|
| 1062 |
quality_check, confidence_threshold,
|
| 1063 |
-
mediapipe_enabled, mtcnn_enabled, retinaface_enabled,
|
| 1064 |
-
scrfd_enabled, yolo_enabled, opencv_dnn_enabled)
|
| 1065 |
except Exception as e:
|
| 1066 |
print(f"Error processing image: {str(e)}")
|
| 1067 |
legend_img = create_detection_legend()
|
|
@@ -1071,7 +963,7 @@ with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as dem
|
|
| 1071 |
submit_btn.click(
|
| 1072 |
fn=process_handler,
|
| 1073 |
inputs=[input_image, min_detector_agreement, ground_truth, quality_check, confidence_threshold,
|
| 1074 |
-
mediapipe_enabled, mtcnn_enabled, retinaface_enabled,
|
| 1075 |
outputs=[output_image, metrics_text, consensus_image, original_image,
|
| 1076 |
verdict_box, detector_status, ground_truth_image, legend_image, comparison_grid]
|
| 1077 |
)
|
|
|
|
| 24 |
face_app = FaceAnalysis(allowed_modules=['detection'], providers=['CPUExecutionProvider'])
|
| 25 |
face_app.prepare(ctx_id=0, det_size=(640, 640))
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
# Global variable for YOLO face mode
|
| 30 |
yolo_face_mode = False
|
|
|
|
| 52 |
"MediaPipe": "mediapipe",
|
| 53 |
"MTCNN": "mtcnn",
|
| 54 |
"RetinaFace": "retinaface",
|
| 55 |
+
"YOLOv8": "yolo"
|
|
|
|
|
|
|
| 56 |
}
|
| 57 |
|
| 58 |
def create_detection_legend():
|
|
|
|
| 254 |
|
| 255 |
return boxes, confidences
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
def draw_ground_truth(image, ground_truth_boxes):
|
| 259 |
"""Draw ground truth boxes on image in cyan with enhanced visualization."""
|
|
|
|
| 527 |
return grid
|
| 528 |
|
| 529 |
def process_image(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
|
| 530 |
+
mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled):
|
| 531 |
+
"""Process the image with selected face detectors and provide enhanced metrics.""
|
| 532 |
if image is None:
|
| 533 |
return None, "No image uploaded", None, None, "❌ REJECTED: No image provided", None, None, None, None
|
| 534 |
|
|
|
|
| 566 |
"MediaPipe": mediapipe_enabled,
|
| 567 |
"MTCNN": mtcnn_enabled,
|
| 568 |
"RetinaFace": retinaface_enabled,
|
| 569 |
+
"YOLOv8": yolo_enabled
|
|
|
|
|
|
|
| 570 |
}
|
| 571 |
|
| 572 |
detector_func_map = {
|
| 573 |
"MediaPipe": detect_faces_mediapipe,
|
| 574 |
"MTCNN": detect_faces_mtcnn,
|
| 575 |
"RetinaFace": detect_faces_retinaface,
|
| 576 |
+
"YOLOv8": detect_faces_yolo
|
|
|
|
|
|
|
| 577 |
}
|
| 578 |
|
| 579 |
# Only include enabled detectors
|
|
|
|
| 618 |
"MediaPipe": (0, 255, 0), # Green
|
| 619 |
"MTCNN": (0, 0, 255), # Red
|
| 620 |
"RetinaFace": (255, 255, 0), # Yellow
|
| 621 |
+
"YOLOv8": (255, 0, 255) # Magenta
|
|
|
|
|
|
|
| 622 |
}
|
| 623 |
|
| 624 |
# Draw all detections with colored boxes and better visibility
|
|
|
|
| 868 |
mediapipe_enabled = gr.Checkbox(value=True, label="MediaPipe", scale=1)
|
| 869 |
mtcnn_enabled = gr.Checkbox(value=True, label="MTCNN", scale=1)
|
| 870 |
retinaface_enabled = gr.Checkbox(value=True, label="RetinaFace", scale=1)
|
|
|
|
|
|
|
| 871 |
yolo_enabled = gr.Checkbox(value=True, label="YOLOv8", scale=1)
|
|
|
|
| 872 |
|
| 873 |
min_detector_agreement = gr.Slider(
|
| 874 |
+
minimum=1, maximum=4, value=2, step=1,
|
| 875 |
label="Minimum Detector Agreement",
|
| 876 |
info="Number of detectors that must agree on a face"
|
| 877 |
)
|
|
|
|
| 933 |
gr.Markdown("""
|
| 934 |
---
|
| 935 |
### 📖 About
|
| 936 |
+
This demo compares four reliable face detection models:
|
| 937 |
- **MediaPipe**: Google's lightweight face detection
|
| 938 |
- **MTCNN**: Multi-task Cascaded CNNs
|
| 939 |
- **RetinaFace**: State-of-the-art face detection via InsightFace
|
|
|
|
| 940 |
- **YOLOv8**: Latest YOLO architecture adapted for face detection
|
|
|
|
| 941 |
|
| 942 |
Select which models to run and compare their performance. The consensus view shows faces colored by detector agreement level.
|
| 943 |
""")
|
| 944 |
|
| 945 |
# Process button handler
|
| 946 |
def process_handler(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
|
| 947 |
+
mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled):
|
| 948 |
if image is None:
|
| 949 |
legend_img = create_detection_legend()
|
| 950 |
return [None, "No image selected", None, None,
|
|
|
|
| 953 |
try:
|
| 954 |
return process_image(image, min_detector_agreement, ground_truth_str,
|
| 955 |
quality_check, confidence_threshold,
|
| 956 |
+
mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled)
|
|
|
|
| 957 |
except Exception as e:
|
| 958 |
print(f"Error processing image: {str(e)}")
|
| 959 |
legend_img = create_detection_legend()
|
|
|
|
| 963 |
submit_btn.click(
|
| 964 |
fn=process_handler,
|
| 965 |
inputs=[input_image, min_detector_agreement, ground_truth, quality_check, confidence_threshold,
|
| 966 |
+
mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled],
|
| 967 |
outputs=[output_image, metrics_text, consensus_image, original_image,
|
| 968 |
verdict_box, detector_status, ground_truth_image, legend_image, comparison_grid]
|
| 969 |
)
|