import os import numpy as np import cv2 import gradio as gr import torch from PIL import Image, ImageDraw, ImageFont import matplotlib.pyplot as plt from facenet_pytorch import MTCNN, RetinaFace from retinaface.pre_trained_models import get_model as get_retinaface_model import matplotlib.cm as cm from collections import defaultdict # Set up device device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print(f"Using device: {device}") # Load face detector models for ensemble models = {} # Initialize MTCNN models['mtcnn'] = MTCNN(keep_all=True, device=device) # Initialize RetinaFace models['retinaface'] = get_retinaface_model("resnet50", max_size=1024, device=device.type) models['retinaface'].eval() def load_images_from_folder(folder_path): """Load all jpg images from the specified folder""" image_paths = [] if os.path.exists(folder_path): for filename in os.listdir(folder_path): if filename.lower().endswith(('.jpg', '.jpeg')): image_paths.append(os.path.join(folder_path, filename)) return sorted(image_paths) def detect_faces_ensemble(image): """ Detect faces using an ensemble of face detectors Returns: List of face bounding boxes with format [x1, y1, x2, y2, confidence] """ # Convert image to RGB if needed if isinstance(image, str): image = Image.open(image).convert('RGB') elif isinstance(image, np.ndarray): if image.shape[2] == 3: image = Image.fromarray(image) else: image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # Get MTCNN detections boxes_mtcnn, probs_mtcnn = models['mtcnn'].detect(image) # Get RetinaFace detections tensor_image = models['retinaface'].preprocess_image(np.array(image)) with torch.no_grad(): boxes_retinaface, scores_retinaface = models['retinaface'].predict(tensor_image) # Ensemble the results (in this simple case, we'll just combine them) all_boxes = [] # Add MTCNN boxes if boxes_mtcnn is not None: for box, prob in zip(boxes_mtcnn, probs_mtcnn): x1, y1, x2, y2 = box all_boxes.append([int(x1), int(y1), int(x2), int(y2), float(prob)]) # Add RetinaFace boxes if len(boxes_retinaface) > 0: for box, score in zip(boxes_retinaface, scores_retinaface): x1, y1, x2, y2 = box all_boxes.append([int(x1), int(y1), int(x2), int(y2), float(score)]) # Apply non-maximum suppression to remove duplicate detections if len(all_boxes) > 0: all_boxes = non_maximum_suppression(all_boxes, 0.5) return all_boxes, image def calculate_iou(box1, box2): """Calculate intersection over union between two boxes""" x1_1, y1_1, x2_1, y2_1 = box1[:4] x1_2, y1_2, x2_2, y2_2 = box2[:4] # Calculate intersection area x_left = max(x1_1, x1_2) y_top = max(y1_1, y1_2) x_right = min(x2_1, x2_2) y_bottom = min(y2_1, y2_2) if x_right < x_left or y_bottom < y_top: return 0.0 intersection_area = (x_right - x_left) * (y_bottom - y_top) # Calculate union area box1_area = (x2_1 - x1_1) * (y2_1 - y1_1) box2_area = (x2_2 - x1_2) * (y2_2 - y1_2) union_area = box1_area + box2_area - intersection_area return intersection_area / union_area def non_maximum_suppression(boxes, iou_threshold): """Apply non-maximum suppression to remove overlapping boxes""" if len(boxes) == 0: return [] # Sort boxes by confidence (descending) boxes = sorted(boxes, key=lambda x: x[4], reverse=True) kept_boxes = [] while len(boxes) > 0: # Add the box with highest confidence current_box = boxes.pop(0) kept_boxes.append(current_box) # Remove overlapping boxes remaining_boxes = [] for box in boxes: if calculate_iou(current_box, box) < iou_threshold: remaining_boxes.append(box) boxes = remaining_boxes return kept_boxes def bin_faces_by_size(faces): """Group faces into bins based on their size (max of width and height)""" face_sizes = [] bin_size = 20 # Size of each bin in pixels # Calculate face sizes for face in faces: x1, y1, x2, y2, _ = face width = x2 - x1 height = y2 - y1 size = max(width, height) face_sizes.append(size) # Determine bin range if not face_sizes: return {} min_size = min(face_sizes) max_size = max(face_sizes) # Create bins bin_edges = range( bin_size * (min_size // bin_size), bin_size * (max_size // bin_size + 2), bin_size ) # Place faces in bins bin_counts = defaultdict(int) bin_faces = defaultdict(list) for i, size in enumerate(face_sizes): bin_idx = size // bin_size * bin_size bin_counts[bin_idx] += 1 bin_faces[bin_idx].append((faces[i], size)) return { 'bin_counts': dict(bin_counts), 'bin_faces': dict(bin_faces), 'bin_edges': list(bin_edges) } def plot_face_histogram(bin_data): """Create a histogram of face sizes""" if not bin_data or len(bin_data['bin_counts']) == 0: # Create empty figure if no data fig, ax = plt.subplots(figsize=(10, 6)) ax.set_title('Face Size Distribution') ax.set_xlabel('Face Size (pixels)') ax.set_ylabel('Count') ax.text(0.5, 0.5, 'No faces detected', ha='center', va='center', transform=ax.transAxes) return fig # Extract data bins = sorted(bin_data['bin_counts'].keys()) counts = [bin_data['bin_counts'][b] for b in bins] # Create histogram figure fig, ax = plt.subplots(figsize=(10, 6)) bars = ax.bar( [str(b) for b in bins], counts, color='skyblue', edgecolor='navy' ) # Add value labels for bar in bars: height = bar.get_height() ax.annotate( f'{height}', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom' ) ax.set_title('Face Size Distribution') ax.set_xlabel('Face Size (pixels)') ax.set_ylabel('Count') # Rotate x-axis labels for better readability plt.xticks(rotation=45, ha='right') plt.tight_layout() return fig def create_face_examples_grid(image, bin_data, selected_bin=None): """Create a grid of face examples from the selected bin""" if not bin_data or 'bin_faces' not in bin_data or not bin_data['bin_faces']: return None if isinstance(image, str): image = Image.open(image).convert('RGB') elif isinstance(image, np.ndarray): image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # If no bin is selected, return None if selected_bin is None: return None # Get faces from the selected bin if int(selected_bin) not in bin_data['bin_faces']: return None bin_faces = bin_data['bin_faces'][int(selected_bin)] # Determine grid size num_faces = len(bin_faces) cols = min(5, num_faces) rows = (num_faces + cols - 1) // cols # Create empty white canvas for the grid margin = 10 face_size = int(selected_bin) + 2 * margin grid_width = cols * face_size + (cols + 1) * margin grid_height = rows * face_size + (rows + 1) * margin grid_image = Image.new('RGB', (grid_width, grid_height), color='white') draw = ImageDraw.Draw(grid_image) # Extract and place faces on the grid for i, (face, size) in enumerate(bin_faces): x1, y1, x2, y2, conf = face # Calculate position in the grid row = i // cols col = i % cols # Extract face with margin face_img = image.crop(( max(0, x1 - margin), max(0, y1 - margin), min(image.width, x2 + margin), min(image.height, y2 + margin) )) # Resize to consistent size if needed target_size = face_size - 2 * margin if face_img.width != target_size or face_img.height != target_size: face_img = face_img.resize((target_size, target_size)) # Place face in grid grid_x = col * face_size + (col + 1) * margin grid_y = row * face_size + (row + 1) * margin grid_image.paste(face_img, (grid_x, grid_y)) # Add size label draw.rectangle( [grid_x, grid_y + target_size - 20, grid_x + target_size, grid_y + target_size], fill=(0, 0, 0, 128) ) draw.text( (grid_x + 5, grid_y + target_size - 15), f"{size}px", fill=(255, 255, 255) ) return grid_image def draw_faces_on_image(image, faces): """Draw bounding boxes around detected faces""" if isinstance(image, str): image = Image.open(image).convert('RGB') elif isinstance(image, np.ndarray): image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # Create a copy of the image result_image = image.copy() draw = ImageDraw.Draw(result_image) # Generate colors for different face sizes if faces: sizes = [max(face[2] - face[0], face[3] - face[1]) for face in faces] min_size = min(sizes) max_size = max(sizes) size_range = max(max_size - min_size, 1) # Draw faces for face in faces: x1, y1, x2, y2, conf = face width = x2 - x1 height = y2 - y1 size = max(width, height) # Determine color based on face size if max_size == min_size: normalized_size = 0.5 else: normalized_size = (size - min_size) / size_range # Use a color gradient from blue to red color_r = int(255 * normalized_size) color_g = 0 color_b = int(255 * (1 - normalized_size)) # Draw rectangle draw.rectangle([x1, y1, x2, y2], outline=(color_r, color_g, color_b), width=2) # Draw size and confidence label label = f"{size}px ({conf:.2f})" draw.rectangle([x1, y1, x1 + 100, y1 - 20], fill=(color_r, color_g, color_b)) draw.text((x1 + 5, y1 - 15), label, fill=(255, 255, 255)) return result_image def process_image(image, selected_bin=None): """Main function to process an image and return results""" # Detect faces faces, img = detect_faces_ensemble(image) # Bin faces by size bin_data = bin_faces_by_size(faces) # Create visualizations annotated_image = draw_faces_on_image(img, faces) histogram = plot_face_histogram(bin_data) # Create face examples grid for selected bin examples_grid = create_face_examples_grid(img, bin_data, selected_bin) # Handle the case when no bin is selected if selected_bin is None or examples_grid is None: available_bins = sorted(bin_data['bin_counts'].keys()) if bin_data else [] return annotated_image, histogram, None, gr.Dropdown.update(choices=[str(b) for b in available_bins]) # Update dropdown choices available_bins = sorted(bin_data['bin_counts'].keys()) if bin_data else [] return annotated_image, histogram, examples_grid, gr.Dropdown.update(choices=[str(b) for b in available_bins]) def update_examples(image, selected_bin): """Update face examples when a bin is selected""" # Detect faces faces, img = detect_faces_ensemble(image) # Bin faces by size bin_data = bin_faces_by_size(faces) # Create face examples grid for selected bin examples_grid = create_face_examples_grid(img, bin_data, selected_bin) return examples_grid # Create Gradio interface with gr.Blocks(title="Face Size Distribution Analysis") as demo: gr.Markdown("# Face Size Distribution Analysis") gr.Markdown("Upload an image or select from the examples to see the distribution of face sizes") with gr.Row(): with gr.Column(scale=1): # Input components input_image = gr.Image(type="pil", label="Input Image") example_dropdown = gr.Dropdown( choices=[], label="Select from available images", interactive=True ) run_button = gr.Button("Analyze Image") # Bin selection for examples bin_dropdown = gr.Dropdown( choices=[], label="Select size bin to see examples", interactive=True ) with gr.Column(scale=2): # Output components output_image = gr.Image(type="pil", label="Detected Faces") with gr.Tab("Histogram"): histogram_plot = gr.Plot(label="Face Size Distribution") with gr.Tab("Face Examples"): examples_grid = gr.Image(type="pil", label="Face Examples") # Load example images on startup def load_examples(): examples = load_images_from_folder("data") return gr.Dropdown.update(choices=[os.path.basename(path) for path in examples], value=examples[0] if examples else None) # Handle example selection def select_example(example_name): if not example_name: return None # Look for the example in the data folder example_path = os.path.join("data", example_name) if os.path.exists(example_path): return example_path return None # Set up event handlers run_button.click( process_image, inputs=[input_image, bin_dropdown], outputs=[output_image, histogram_plot, examples_grid, bin_dropdown] ) example_dropdown.change( select_example, inputs=[example_dropdown], outputs=[input_image] ) input_image.change( process_image, inputs=[input_image, None], outputs=[output_image, histogram_plot, examples_grid, bin_dropdown] ) bin_dropdown.change( update_examples, inputs=[input_image, bin_dropdown], outputs=[examples_grid] ) # Load examples on startup demo.load(load_examples, outputs=[example_dropdown]) # Launch the demo if __name__ == "__main__": demo.launch()