Spaces:

molecularmax
/

guard-robustness-web-demo

Runtime error

App Files Files Community

guard-robustness-web-demo / app.py

molecularmax

Remove SCRFD and OpenCV DNN models for reliable HF Spaces deployment

d6a6f38 8 months ago

raw

history blame contribute delete

38.3 kB

	import gradio as gr
	import numpy as np
	import cv2
	import mediapipe as mp
	from facenet_pytorch import MTCNN
	import torch
	from insightface.app import FaceAnalysis
	from ultralytics import YOLO
	import os
	import glob
	from PIL import Image
	import json
	from datetime import datetime

	# Load MediaPipe face detector
	mp_face_detection = mp.solutions.face_detection
	mp_drawing = mp.solutions.drawing_utils

	# Initialize MTCNN
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	mtcnn = MTCNN(keep_all=True, device=device, min_face_size=20)

	# Initialize InsightFace (RetinaFace)
	face_app = FaceAnalysis(allowed_modules=['detection'], providers=['CPUExecutionProvider'])
	face_app.prepare(ctx_id=0, det_size=(640, 640))



	# Global variable for YOLO face mode
	yolo_face_mode = False

	# Initialize YOLOv8 face detector
	# Note: You can use a face-specific model like yolov8n-face.pt if available
	try:
	# Try to load face-specific model first
	if os.path.exists('yolov8n-face.pt'):
	yolo_model = YOLO('yolov8n-face.pt')
	yolo_face_mode = True
	print("Loaded YOLOv8 face-specific model")
	else:
	# Fall back to general model
	yolo_model = YOLO('yolov8n.pt')
	yolo_face_mode = False
	print("Loaded general YOLOv8 model - will adapt person detections for faces")
	except:
	yolo_model = None
	yolo_face_mode = False
	print("YOLOv8 model not found. YOLO detection will be disabled.")

	# Dictionary to store face detector functions
	face_detectors = {
	"MediaPipe": "mediapipe",
	"MTCNN": "mtcnn",
	"RetinaFace": "retinaface",
	"YOLOv8": "yolo"
	}

	def create_detection_legend():
	"""Create an enhanced legend image showing which color corresponds to which model."""
	# Create a blank image with gradient background
	legend_height = 280
	legend_width = 350
	legend = np.ones((legend_height, legend_width, 3), dtype=np.uint8) * 255

	# Add a subtle gradient background
	for y in range(legend_height):
	legend[y, :] = [255 - int(y * 0.1), 255 - int(y * 0.1), 255]

	# Add title
	cv2.putText(legend, "Detection Legend", (legend_width//2 - 70, 25),
	cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)

	# Add colored rectangles and text
	colors = {
	"MediaPipe": (0, 255, 0), # Green
	"MTCNN": (0, 0, 255), # Red
	"RetinaFace": (255, 255, 0), # Yellow
	"SCRFD": (255, 128, 0), # Orange
	"YOLOv8": (255, 0, 255), # Magenta
	"OpenCV DNN": (128, 0, 255), # Purple
	"Ground Truth": (0, 255, 255) # Cyan
	}

	y_offset = 60
	for i, (name, color) in enumerate(colors.items()):
	# Draw colored rectangle with border
	cv2.rectangle(legend, (15, y_offset - 15), (35, y_offset + 5), color, -1)
	cv2.rectangle(legend, (15, y_offset - 15), (35, y_offset + 5), (0, 0, 0), 1)

	# Add text with shadow effect
	cv2.putText(legend, name, (46, y_offset + 1),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (100, 100, 100), 1)
	cv2.putText(legend, name, (45, y_offset),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)

	y_offset += 30

	# Add text for heat-colored boxes in consensus view
	cv2.putText(legend, "Consensus View (Agreement Level):", (15, y_offset + 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
	y_offset += 35

	# Draw color samples for agreement levels
	agreement_colors = [
	("1/4 (25%)", (0, 0, 255)), # Red
	("2/4 (50%)", (0, 165, 255)), # Orange
	("3/4 (75%)", (0, 255, 255)), # Yellow
	("4/4 (100%)", (0, 255, 0)) # Green
	]

	for i, (label, color) in enumerate(agreement_colors):
	x_pos = 45 + i * 70
	cv2.rectangle(legend, (x_pos, y_offset), (x_pos + 20, y_offset + 20), color, -1)
	cv2.rectangle(legend, (x_pos, y_offset), (x_pos + 20, y_offset + 20), (0, 0, 0), 1)
	cv2.putText(legend, label, (x_pos - 10, y_offset + 35),
	cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)

	return legend

	return legend

	def get_example_images():
	"""Get list of example images from the faces folder."""
	examples = []

	# Common paths where examples might be stored
	search_paths = ["faces", "examples", "samples", "."]

	for base_path in search_paths:
	if os.path.exists(base_path):
	for ext in [".jpg", ".jpeg", ".png", ".webp"]:
	sample_paths = glob.glob(os.path.join(base_path, ext))
	examples.extend(sample_paths)

	# Remove duplicates and sort
	examples = sorted(list(set(examples)))[:12] # Limit to 12 examples

	return examples

	def detect_faces_mediapipe(image, confidence_threshold=0.5):
	"""Detect faces using MediaPipe with confidence scores."""
	rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	with mp_face_detection.FaceDetection(
	min_detection_confidence=confidence_threshold,
	model_selection=1 # Use full-range model
	) as face_detection:
	results = face_detection.process(rgb_image)

	if not results.detections:
	return [], []

	bboxes = []
	confidences = []
	for detection in results.detections:
	h, w, _ = image.shape
	bbox = detection.location_data.relative_bounding_box
	x = int(bbox.xmin * w)
	y = int(bbox.ymin * h)
	width = int(bbox.width * w)
	height = int(bbox.height * h)

	# Ensure bbox is within image bounds
	x = max(0, x)
	y = max(0, y)
	width = min(width, w - x)
	height = min(height, h - y)

	bboxes.append([x, y, width, height])
	confidences.append(detection.score[0] if detection.score else confidence_threshold)

	return bboxes, confidences

	def detect_faces_mtcnn(image, confidence_threshold=0.5):
	"""Detect faces using MTCNN with confidence scores."""
	rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	# Detect faces
	boxes, probs = mtcnn.detect(rgb_image)

	if boxes is None:
	return [], []

	# Convert to [x, y, w, h] format and filter by confidence
	result_boxes = []
	result_confidences = []
	for box, prob in zip(boxes, probs):
	if prob >= confidence_threshold:
	x1, y1, x2, y2 = [int(coord) for coord in box]
	w = x2 - x1
	h = y2 - y1
	result_boxes.append([x1, y1, w, h])
	result_confidences.append(float(prob))

	return result_boxes, result_confidences

	def detect_faces_retinaface(image, confidence_threshold=0.5):
	"""Detect faces using RetinaFace via InsightFace with confidence scores."""
	rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	# Detect faces
	faces = face_app.get(rgb_image)

	# Convert to [x, y, w, h] format and extract confidence
	result_boxes = []
	result_confidences = []
	for face in faces:
	# Get confidence score (det_score)
	confidence = face.det_score
	if confidence >= confidence_threshold:
	bbox = face.bbox.astype(int)
	x1, y1, x2, y2 = bbox
	w = x2 - x1
	h = y2 - y1
	result_boxes.append([x1, y1, w, h])
	result_confidences.append(float(confidence))

	return result_boxes, result_confidences

	def detect_faces_yolo(image, confidence_threshold=0.5):
	"""Detect faces/persons using YOLOv8."""
	if yolo_model is None:
	return [], []

	# Run YOLOv8 inference
	results = yolo_model(image, conf=confidence_threshold)

	boxes = []
	confidences = []

	for r in results:
	for box in r.boxes:
	class_id = int(box.cls)

	# Check if using face-specific model or general model
	if yolo_face_mode and class_id == 0: # Face class in face-specific model
	x1, y1, x2, y2 = box.xyxy[0].tolist()
	w = x2 - x1
	h = y2 - y1
	boxes.append([int(x1), int(y1), int(w), int(h)])
	confidences.append(float(box.conf))
	elif not yolo_face_mode and class_id == 0: # Person class in general COCO model
	x1, y1, x2, y2 = box.xyxy[0].tolist()
	w = x2 - x1
	h = y2 - y1

	# For person detection, estimate face region
	# Use top 1/3 of person bbox as a heuristic
	face_h = h * 0.3
	face_y = y1

	boxes.append([int(x1), int(face_y), int(w), int(face_h)])
	confidences.append(float(box.conf) * 0.7) # Reduce confidence for adapted detections

	return boxes, confidences


	def draw_ground_truth(image, ground_truth_boxes):
	"""Draw ground truth boxes on image in cyan with enhanced visualization."""
	image_with_gt = image.copy()
	for i, box in enumerate(ground_truth_boxes):
	x, y, w, h = box
	# Draw filled rectangle with transparency
	overlay = image_with_gt.copy()
	cv2.rectangle(overlay, (x, y), (x+w, y+h), (0, 255, 255), -1)
	cv2.addWeighted(overlay, 0.1, image_with_gt, 0.9, 0, image_with_gt)

	# Draw border with white background for visibility
	cv2.rectangle(image_with_gt, (x-1, y-1), (x+w+1, y+h+1), (255, 255, 255), 3)
	cv2.rectangle(image_with_gt, (x, y), (x+w, y+h), (0, 255, 255), 2)

	# Add label with background
	label = f"GT {i+1}"
	label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
	cv2.rectangle(image_with_gt, (x, y-25), (x + label_size[0] + 5, y-2), (255, 255, 255), -1)
	cv2.putText(image_with_gt, label, (x+2, y-8),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	return image_with_gt

	def iou(bbox1, bbox2):
	"""Calculate the Intersection over Union (IoU) between two bounding boxes."""
	# Convert from [x, y, w, h] to [x1, y1, x2, y2]
	bbox1_x1, bbox1_y1, bbox1_w, bbox1_h = bbox1
	bbox1_x2, bbox1_y2 = bbox1_x1 + bbox1_w, bbox1_y1 + bbox1_h

	bbox2_x1, bbox2_y1, bbox2_w, bbox2_h = bbox2
	bbox2_x2, bbox2_y2 = bbox2_x1 + bbox2_w, bbox2_y1 + bbox2_h

	# Calculate intersection coordinates
	inter_x1 = max(bbox1_x1, bbox2_x1)
	inter_y1 = max(bbox1_y1, bbox2_y1)
	inter_x2 = min(bbox1_x2, bbox2_x2)
	inter_y2 = min(bbox1_y2, bbox2_y2)

	# Check if there is an intersection
	if inter_x2 < inter_x1 or inter_y2 < inter_y1:
	return 0.0

	# Calculate areas
	inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
	bbox1_area = bbox1_w * bbox1_h
	bbox2_area = bbox2_w * bbox2_h

	# Calculate IoU
	union_area = bbox1_area + bbox2_area - inter_area
	return inter_area / union_area if union_area > 0 else 0.0

	def filter_overlapping_boxes(detections_dict, threshold=0.5):
	"""Filter out overlapping boxes across different detectors with confidence scores."""
	all_boxes = []

	# Collect all detections with confidence scores
	for detector_name, (boxes, confidences) in detections_dict.items():
	for box, conf in zip(boxes, confidences):
	box_found = False
	for existing_box in all_boxes:
	if iou(box, existing_box['box']) > threshold:
	# Check if this detector already contributed to this box
	if detector_name not in existing_box['detectors']:
	existing_box['detectors'].append(detector_name)
	existing_box['confidences'][detector_name] = conf
	existing_box['avg_confidence'] = np.mean(list(existing_box['confidences'].values()))
	box_found = True
	break

	if not box_found:
	all_boxes.append({
	'box': box,
	'detectors': [detector_name],
	'confidences': {detector_name: conf},
	'avg_confidence': conf
	})

	# Create unique boxes list with correct detector counts
	unique_boxes = []
	for box_info in all_boxes:
	unique_detectors = list(set(box_info['detectors']))
	unique_boxes.append({
	'box': box_info['box'],
	'detector_count': len(unique_detectors),
	'detectors': unique_detectors,
	'avg_confidence': box_info['avg_confidence'],
	'confidences': box_info['confidences']
	})

	return unique_boxes

	def evaluate_image_quality(image):
	"""Enhanced image quality evaluation with more metrics."""
	h, w = image.shape[:2]

	quality_issues = []
	quality_score = 100 # Start with perfect score

	# Check image dimensions
	if h < 100 or w < 100:
	quality_issues.append(f"Image too small ({w}x{h}, min 100x100)")
	quality_score -= 30

	# Convert to grayscale for analysis
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Check blur using Laplacian variance
	blur_value = cv2.Laplacian(gray, cv2.CV_64F).var()
	if blur_value < 50:
	quality_issues.append(f"Image blurry (score: {blur_value:.1f}, min 50)")
	quality_score -= 20

	# Check brightness
	brightness = np.mean(gray)
	if brightness < 30:
	quality_issues.append(f"Image too dark (brightness: {brightness:.1f})")
	quality_score -= 20
	elif brightness > 225:
	quality_issues.append(f"Image too bright (brightness: {brightness:.1f})")
	quality_score -= 20

	# Check contrast
	contrast = gray.std()
	if contrast < 20:
	quality_issues.append(f"Low contrast (std: {contrast:.1f})")
	quality_score -= 15

	# Check for noise (high-frequency components)
	noise_level = np.mean(np.abs(cv2.Laplacian(gray, cv2.CV_64F)))
	if noise_level > 20:
	quality_issues.append(f"High noise level ({noise_level:.1f})")
	quality_score -= 10

	quality_ok = quality_score >= 50
	quality_message = f"Quality score: {quality_score}/100"
	if quality_issues:
	quality_message += " - Issues: " + "; ".join(quality_issues)

	return quality_ok, quality_message, quality_score

	def calculate_metrics_with_ground_truth(unique_faces, ground_truth_faces, iou_threshold=0.5):
	"""Calculate enhanced detection metrics using ground truth faces."""
	if not ground_truth_faces:
	return {
	"precision": None,
	"recall": None,
	"f1_score": None,
	"true_positives": 0,
	"false_positives": len(unique_faces),
	"false_negatives": 0,
	"avg_iou": None,
	"avg_confidence": np.mean([f['avg_confidence'] for f in unique_faces]) if unique_faces else 0
	}

	# Extract just the boxes from unique_faces
	detection_boxes = [face['box'] for face in unique_faces]
	detection_confidences = [face['avg_confidence'] for face in unique_faces]

	# Initialize counters
	true_positives = 0
	detected_gt = [False] * len(ground_truth_faces)
	iou_scores = []
	tp_confidences = []

	# Match detections to ground truth
	for det_idx, det_box in enumerate(detection_boxes):
	max_iou = 0
	max_idx = -1

	# Find best matching ground truth box
	for i, gt_box in enumerate(ground_truth_faces):
	if not detected_gt[i]: # Only consider unmatched ground truth
	curr_iou = iou(det_box, gt_box)
	if curr_iou > max_iou and curr_iou >= iou_threshold:
	max_iou = curr_iou
	max_idx = i

	# If we found a match
	if max_idx >= 0:
	true_positives += 1
	detected_gt[max_idx] = True
	iou_scores.append(max_iou)
	tp_confidences.append(detection_confidences[det_idx])

	# Calculate metrics
	false_positives = len(detection_boxes) - true_positives
	false_negatives = detected_gt.count(False)

	precision = true_positives / len(detection_boxes) if len(detection_boxes) > 0 else 0
	recall = true_positives / len(ground_truth_faces) if len(ground_truth_faces) > 0 else 0
	f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

	return {
	"precision": precision,
	"recall": recall,
	"f1_score": f1_score,
	"true_positives": true_positives,
	"false_positives": false_positives,
	"false_negatives": false_negatives,
	"avg_iou": np.mean(iou_scores) if iou_scores else None,
	"avg_confidence": np.mean(tp_confidences) if tp_confidences else 0
	}

	def parse_ground_truth(ground_truth_str):
	"""Parse ground truth boxes from string format with validation."""
	if not ground_truth_str or ground_truth_str.strip() == "":
	return []

	try:
	ground_truth_boxes = []
	lines = ground_truth_str.strip().split("\n")
	for line_num, line in enumerate(lines, 1):
	if line.strip():
	# Try to parse as "x,y,w,h"
	coords = [int(x.strip()) for x in line.split(",")]
	if len(coords) == 4:
	x, y, w, h = coords
	if w > 0 and h > 0: # Validate positive dimensions
	ground_truth_boxes.append(coords)
	else:
	print(f"Line {line_num}: Invalid dimensions (w={w}, h={h})")
	else:
	print(f"Line {line_num}: Expected 4 coordinates, got {len(coords)}")
	return ground_truth_boxes
	except Exception as e:
	print(f"Error parsing ground truth: {str(e)}")
	return []

	def create_comparison_grid(images_dict, max_cols=3):
	"""Create a grid of images for comparison."""
	images = list(images_dict.values())
	titles = list(images_dict.keys())

	n_images = len(images)
	n_cols = min(n_images, max_cols)
	n_rows = (n_images + n_cols - 1) // n_cols

	# Calculate grid dimensions
	img_h, img_w = images[0].shape[:2] if images else (480, 640)
	scale = 0.5 # Scale down for grid
	cell_w = int(img_w * scale)
	cell_h = int(img_h * scale)

	# Create grid canvas
	grid_w = cell_w * n_cols + 10 * (n_cols + 1)
	grid_h = cell_h * n_rows + 40 * n_rows + 10
	grid = np.ones((grid_h, grid_w, 3), dtype=np.uint8) * 240

	# Place images in grid
	for idx, (img, title) in enumerate(zip(images, titles)):
	if img is None:
	continue

	row = idx // n_cols
	col = idx % n_cols

	# Resize image
	img_resized = cv2.resize(img, (cell_w, cell_h))

	# Calculate position
	x = col * (cell_w + 10) + 10
	y = row * (cell_h + 40) + 30

	# Place image
	grid[y:y+cell_h, x:x+cell_w] = img_resized

	# Add title
	cv2.putText(grid, title, (x, y-10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)

	return grid

	def process_image(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
	mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled):
	"""Process the image with selected face detectors and provide enhanced metrics.""
	if image is None:
	return None, "No image uploaded", None, None, "❌ REJECTED: No image provided", None, None, None, None

	# Create legend image
	try:
	legend_image = create_detection_legend()
	except Exception as e:
	print(f"Error creating legend: {str(e)}")
	legend_image = np.ones((250, 350, 3), dtype=np.uint8) * 255

	# Parse ground truth boxes
	ground_truth_boxes = parse_ground_truth(ground_truth_str)

	# Make a copy to avoid modifying the original
	image_copy = image.copy()

	# Check image quality if enabled
	quality_score = 100
	if quality_check:
	quality_ok, quality_message, quality_score = evaluate_image_quality(image)
	if not quality_ok:
	return None, quality_message, None, None, f"❌ REJECTED: {quality_message}", None, None, legend_image, None

	# Detect faces with different methods
	detections = {}
	detector_results = {}
	processing_times = {}

	# Run each detector with timing
	import time

	# Build detector functions dict based on selected models
	detector_functions = {}
	model_enabled = {
	"MediaPipe": mediapipe_enabled,
	"MTCNN": mtcnn_enabled,
	"RetinaFace": retinaface_enabled,
	"YOLOv8": yolo_enabled
	}

	detector_func_map = {
	"MediaPipe": detect_faces_mediapipe,
	"MTCNN": detect_faces_mtcnn,
	"RetinaFace": detect_faces_retinaface,
	"YOLOv8": detect_faces_yolo
	}

	# Only include enabled detectors
	for detector_name, func in detector_func_map.items():
	if model_enabled[detector_name]:
	detector_functions[detector_name] = func

	# Update face_detectors dict for dynamic counting
	enabled_face_detectors = {k: v for k, v in face_detectors.items() if model_enabled[k]}

	for detector_name, detect_func in detector_functions.items():
	try:
	start_time = time.time()
	boxes, confidences = detect_func(image, confidence_threshold)
	processing_times[detector_name] = (time.time() - start_time) * 1000 # ms
	detections[detector_name] = (boxes, confidences)
	detector_results[detector_name] = {
	"status": "✅ Success",
	"detections": len(boxes),
	"avg_confidence": np.mean(confidences) if confidences else 0,
	"time_ms": f"{processing_times[detector_name]:.1f}"
	}
	except Exception as e:
	detections[detector_name] = ([], [])
	detector_results[detector_name] = {
	"status": f"❌ Error: {str(e)}",
	"detections": 0,
	"avg_confidence": 0,
	"time_ms": "N/A"
	}
	print(f"{detector_name} error: {str(e)}")

	# Filter overlapping boxes with lower threshold for better merging
	unique_faces = filter_overlapping_boxes(detections, threshold=0.3)

	# Sort by confidence
	unique_faces = sorted(unique_faces, key=lambda x: x['avg_confidence'], reverse=True)

	# Create visualizations with better colors
	image_with_boxes = image_copy.copy()
	colors = {
	"MediaPipe": (0, 255, 0), # Green
	"MTCNN": (0, 0, 255), # Red
	"RetinaFace": (255, 255, 0), # Yellow
	"YOLOv8": (255, 0, 255) # Magenta
	}

	# Draw all detections with colored boxes and better visibility
	for detector, (boxes, confidences) in detections.items():
	for box, conf in zip(boxes, confidences):
	x, y, w, h = box
	# Draw white background for better contrast
	cv2.rectangle(image_with_boxes, (x-1, y-1), (x+w+1, y+h+1), (255, 255, 255), 3)
	# Draw colored box
	cv2.rectangle(image_with_boxes, (x, y), (x+w, y+h), colors[detector], 2)

	# Add label with white background
	label = f"{detector} ({conf:.2f})"
	label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	cv2.rectangle(image_with_boxes, (x, y-22), (x + label_size[0] + 4, y-2), (255, 255, 255), -1)
	cv2.putText(image_with_boxes, label, (x+2, y-7),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[detector], 2)

	# Add ground truth boxes if provided
	if ground_truth_boxes:
	image_with_boxes = draw_ground_truth(image_with_boxes, ground_truth_boxes)

	# Prepare enhanced metrics
	metrics = {
	"📊 Detection Summary": "",
	"Total unique faces": len(unique_faces),
	"Total detections": sum(len(boxes) for boxes, _ in detections.values()),
	"": "",
	"🔍 Detector Performance": ""
	}

	# Add metrics for enabled detectors only
	for detector_name in detector_functions.keys():
	if detector_name in detections:
	detection_count = len(detections[detector_name][0])
	processing_time = processing_times.get(detector_name, 0)
	metrics[detector_name] = f"{detection_count} faces ({processing_time:.1f}ms)"

	# Add ground truth metrics if provided
	if ground_truth_boxes:
	gt_metrics = calculate_metrics_with_ground_truth(unique_faces, ground_truth_boxes)
	metrics.update({
	" ": "",
	"📏 Ground Truth Evaluation": "",
	"Ground truth faces": len(ground_truth_boxes),
	"True positives": gt_metrics["true_positives"],
	"False positives": gt_metrics["false_positives"],
	"False negatives": gt_metrics["false_negatives"],
	"Precision": f"{gt_metrics['precision']:.3f}" if gt_metrics['precision'] is not None else "N/A",
	"Recall": f"{gt_metrics['recall']:.3f}" if gt_metrics['recall'] is not None else "N/A",
	"F1 Score": f"{gt_metrics['f1_score']:.3f}" if gt_metrics['f1_score'] is not None else "N/A",
	"Average IoU": f"{gt_metrics['avg_iou']:.3f}" if gt_metrics['avg_iou'] is not None else "N/A"
	})

	# Add image quality metrics
	if quality_check:
	metrics.update({
	" ": "",
	"🖼️ Image Quality": "",
	"Quality Score": f"{quality_score}/100"
	})

	# Calculate confidence scores for each face
	face_confidence = {}
	num_enabled_detectors = len(enabled_face_detectors)
	for i, face in enumerate(unique_faces):
	detector_ratio = face['detector_count'] / num_enabled_detectors
	conf_details = {
	"Agreement": f"{face['detector_count']}/{num_enabled_detectors} detectors",
	"Avg Confidence": f"{face['avg_confidence']:.3f}",
	"Detected by": ", ".join(face['detectors'])
	}

	# Add individual detector confidences
	for det in face['detectors']:
	if det in face['confidences']:
	conf_details[f"{det} conf"] = f"{face['confidences'][det]:.3f}"

	face_confidence[f"Face {i+1}"] = conf_details

	# Create metrics text
	metrics_text = "\n".join([f"{k}: {v}" for k, v in metrics.items()])

	# Create detailed face info text
	agreement_text = "\n🎯 Face Detection Details:\n"
	for face_id, conf in face_confidence.items():
	agreement_text += f"\n{face_id}:\n"
	for metric, value in conf.items():
	agreement_text += f" {metric}: {value}\n"

	# Check acceptance criteria
	accepted = True
	verdict_details = []

	# Check if we have enough detectors agreeing
	valid_faces = [face for face in unique_faces if face['detector_count'] >= min_detector_agreement]

	# If ground truth is provided, check against it
	if ground_truth_boxes:
	metrics_verdict = calculate_metrics_with_ground_truth(unique_faces, ground_truth_boxes)
	precision_threshold = 0.7
	recall_threshold = 0.7

	if metrics_verdict["precision"] < precision_threshold or metrics_verdict["recall"] < recall_threshold:
	accepted = False
	verdict_details.append(f"Detection quality below threshold")

	verdict_details.append(f"Precision: {metrics_verdict['precision']:.2f}, Recall: {metrics_verdict['recall']:.2f}")

	# Check detector agreement
	if len(valid_faces) == 0:
	accepted = False
	verdict_details.append(f"No faces with {min_detector_agreement}+ detector agreement")
	else:
	verdict_details.append(f"{len(valid_faces)} faces with {min_detector_agreement}+ detector agreement")

	# Create verdict display
	verdict = " \| ".join(verdict_details)
	if accepted:
	verdict_text = f"✅ ACCEPTED: {verdict}"
	verdict_color = (0, 200, 0) # Green for accepted
	else:
	verdict_text = f"❌ REJECTED: {verdict}"
	verdict_color = (0, 0, 200) # Red for rejected

	# Final output text
	final_text = f"{metrics_text}\n{agreement_text}"

	# Generate consensus result image with better visibility
	result_image = image_copy.copy()

	# Draw white background rectangles first for better contrast
	for face in unique_faces:
	x, y, w, h = face['box']
	cv2.rectangle(result_image, (x-2, y-2), (x+w+2, y+h+2), (255, 255, 255), 4)

	for i, face in enumerate(unique_faces):
	x, y, w, h = face['box']
	# Color based on detector agreement with more distinct colors
	agreement = face['detector_count'] / num_enabled_detectors
	if agreement <= 0.25:
	color = (0, 0, 255) # Red for low agreement
	elif agreement <= 0.5:
	color = (0, 165, 255) # Orange
	elif agreement <= 0.75:
	color = (0, 255, 255) # Yellow
	else:
	color = (0, 255, 0) # Green for high agreement

	# Draw box with fixed thick line for visibility
	thickness = 3
	cv2.rectangle(result_image, (x, y), (x+w, y+h), color, thickness)

	# Add label with better background
	label = f"F{i+1} ({face['detector_count']}/{len(face_detectors)}) {face['avg_confidence']:.2f}"
	label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)

	# White background for label
	cv2.rectangle(result_image, (x, y-25), (x + label_size[0] + 6, y-2), (255, 255, 255), -1)
	# Black text for contrast
	cv2.putText(result_image, label, (x+3, y-8),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)

	# Add ground truth boxes with enhanced visibility
	if ground_truth_boxes:
	for i, box in enumerate(ground_truth_boxes):
	x, y, w, h = box
	# White background for visibility
	cv2.rectangle(result_image, (x-2, y-2), (x+w+2, y+h+2), (255, 255, 255), 4)
	cv2.rectangle(result_image, (x, y), (x+w, y+h), (0, 255, 255), 3)

	# Label with white background
	label = f"GT {i+1}"
	label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
	cv2.rectangle(result_image, (x, y+h+2), (x + label_size[0] + 6, y+h+25), (255, 255, 255), -1)
	cv2.putText(result_image, label, (x+3, y+h+18),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)

	# Add verdict overlay
	verdict_image = result_image.copy()
	overlay = verdict_image.copy()
	cv2.rectangle(overlay, (10, 10), (len(verdict_text) * 12, 50), (255, 255, 255), -1)
	cv2.addWeighted(overlay, 0.7, verdict_image, 0.3, 0, verdict_image)
	cv2.putText(verdict_image, verdict_text, (15, 35),
	cv2.FONT_HERSHEY_SIMPLEX, 0.8, verdict_color, 2)

	# Create ground truth visualization
	if ground_truth_boxes:
	gt_image = draw_ground_truth(image_copy, ground_truth_boxes)
	else:
	gt_image = None

	# Create comparison grid
	comparison_images = {
	"Original": image_copy,
	"All Detections": image_with_boxes,
	"Consensus": result_image
	}
	if gt_image is not None:
	comparison_images["Ground Truth"] = gt_image

	comparison_grid = create_comparison_grid(comparison_images)

	return (image_with_boxes, final_text, verdict_image, image, verdict_text,
	detector_results, gt_image, legend_image, comparison_grid)

	# Define the Gradio interface with enhanced styling
	css = """
	.gradio-container {
	font-family: 'Arial', sans-serif;
	}
	.gr-button-primary {
	background-color: #2563eb !important;
	}
	.verdict-box textarea {
	font-size: 1.2em !important;
	font-weight: bold !important;
	}
	"""

	with gr.Blocks(title="GUARD Robustness Face Detection Ensemble", css=css) as demo:
	gr.Markdown("""
	# 🎯 GUARD Robustness
	### See the results from an ensemble of face detectors and if they would pass the P/F criteria
	""")

	# Get sample image paths
	examples = get_example_images()

	with gr.Row():
	with gr.Column(scale=1):
	# Input section
	input_image = gr.Image(type="numpy", label="📸 Upload Image")

	# Display examples if available
	if examples:
	gr.Examples(
	examples=examples,
	inputs=input_image,
	examples_per_page=6,
	label="📁 Example Images"
	)

	with gr.Accordion("⚙️ Detection Settings", open=True):
	gr.Markdown("Select Face Detection Models:")
	with gr.Row():
	mediapipe_enabled = gr.Checkbox(value=True, label="MediaPipe", scale=1)
	mtcnn_enabled = gr.Checkbox(value=True, label="MTCNN", scale=1)
	retinaface_enabled = gr.Checkbox(value=True, label="RetinaFace", scale=1)
	yolo_enabled = gr.Checkbox(value=True, label="YOLOv8", scale=1)

	min_detector_agreement = gr.Slider(
	minimum=1, maximum=4, value=2, step=1,
	label="Minimum Detector Agreement",
	info="Number of detectors that must agree on a face"
	)

	confidence_threshold = gr.Slider(
	minimum=0.1, maximum=0.9, value=0.5, step=0.1,
	label="Confidence Threshold",
	info="Minimum confidence score for detections"
	)

	quality_check = gr.Checkbox(
	value=True,
	label="Enable Image Quality Check",
	info="Check for blur, brightness, and contrast issues"
	)

	ground_truth = gr.Textbox(
	label="Ground Truth Faces (Optional)",
	placeholder="Enter face coordinates (x,y,w,h), one per line:\n100,150,50,60\n200,250,45,55",
	lines=4,
	info="Provide ground truth for accuracy evaluation"
	)

	submit_btn = gr.Button("🚀 Detect Faces", variant="primary", size="lg")

	with gr.Column(scale=2):
	# Results section
	verdict_box = gr.Textbox(
	label="📋 Verdict",
	lines=1,
	elem_classes=["verdict-box"]
	)

	with gr.Tabs():
	with gr.Tab("🎯 Consensus Result"):
	consensus_image = gr.Image(label="Consensus Detection with Verdict")
	gr.Markdown("Boxes colored by agreement level: Red (low) → Yellow → Green (high)")

	with gr.Tab("🔍 All Detections"):
	output_image = gr.Image(label="All Model Detections")
	metrics_text = gr.Textbox(label="Detection Metrics & Analysis", lines=20)

	with gr.Tab("📊 Comparison Grid"):
	comparison_grid = gr.Image(label="Side-by-side Comparison")

	with gr.Tab("📸 Original"):
	original_image = gr.Image(label="Original Image")

	with gr.Tab("✔️ Ground Truth"):
	ground_truth_image = gr.Image(label="Ground Truth Visualization")

	with gr.Tab("🎨 Legend"):
	legend_image = gr.Image(label="Detection Box Legend")

	with gr.Tab("📈 Detector Status"):
	detector_status = gr.Json(label="Detector Performance Details")

	# Footer
	gr.Markdown("""
	---
	### 📖 About
	This demo compares four reliable face detection models:
	- MediaPipe: Google's lightweight face detection
	- MTCNN: Multi-task Cascaded CNNs
	- RetinaFace: State-of-the-art face detection via InsightFace
	- YOLOv8: Latest YOLO architecture adapted for face detection

	Select which models to run and compare their performance. The consensus view shows faces colored by detector agreement level.
	""")

	# Process button handler
	def process_handler(image, min_detector_agreement, ground_truth_str, quality_check, confidence_threshold,
	mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled):
	if image is None:
	legend_img = create_detection_legend()
	return [None, "No image selected", None, None,
	"❌ REJECTED: No image provided", None, None, legend_img, None]

	try:
	return process_image(image, min_detector_agreement, ground_truth_str,
	quality_check, confidence_threshold,
	mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled)
	except Exception as e:
	print(f"Error processing image: {str(e)}")
	legend_img = create_detection_legend()
	return [None, f"Error: {str(e)}", None, image,
	f"❌ ERROR: {str(e)}", None, None, legend_img, None]

	submit_btn.click(
	fn=process_handler,
	inputs=[input_image, min_detector_agreement, ground_truth, quality_check, confidence_threshold,
	mediapipe_enabled, mtcnn_enabled, retinaface_enabled, yolo_enabled],
	outputs=[output_image, metrics_text, consensus_image, original_image,
	verdict_box, detector_status, ground_truth_image, legend_image, comparison_grid]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(share=True)