Spaces:

molecularmax
/

robustness-input

Build error

File size: 14,627 Bytes

dfb6ca6

import os
import numpy as np
import cv2
import gradio as gr
import torch
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
from facenet_pytorch import MTCNN, RetinaFace
from retinaface.pre_trained_models import get_model as get_retinaface_model
import matplotlib.cm as cm
from collections import defaultdict

# Set up device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load face detector models for ensemble
models = {}

# Initialize MTCNN
models['mtcnn'] = MTCNN(keep_all=True, device=device)

# Initialize RetinaFace
models['retinaface'] = get_retinaface_model("resnet50", max_size=1024, device=device.type)
models['retinaface'].eval()

def load_images_from_folder(folder_path):
    """Load all jpg images from the specified folder"""
    image_paths = []
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            if filename.lower().endswith(('.jpg', '.jpeg')):
                image_paths.append(os.path.join(folder_path, filename))
    return sorted(image_paths)

def detect_faces_ensemble(image):
    """
    Detect faces using an ensemble of face detectors
    Returns: List of face bounding boxes with format [x1, y1, x2, y2, confidence]
    """
    # Convert image to RGB if needed
    if isinstance(image, str):
        image = Image.open(image).convert('RGB')
    elif isinstance(image, np.ndarray):
        if image.shape[2] == 3:
            image = Image.fromarray(image)
        else:
            image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
    # Get MTCNN detections
    boxes_mtcnn, probs_mtcnn = models['mtcnn'].detect(image)
    
    # Get RetinaFace detections
    tensor_image = models['retinaface'].preprocess_image(np.array(image))
    with torch.no_grad():
        boxes_retinaface, scores_retinaface = models['retinaface'].predict(tensor_image)
    
    # Ensemble the results (in this simple case, we'll just combine them)
    all_boxes = []
    
    # Add MTCNN boxes
    if boxes_mtcnn is not None:
        for box, prob in zip(boxes_mtcnn, probs_mtcnn):
            x1, y1, x2, y2 = box
            all_boxes.append([int(x1), int(y1), int(x2), int(y2), float(prob)])
    
    # Add RetinaFace boxes
    if len(boxes_retinaface) > 0:
        for box, score in zip(boxes_retinaface, scores_retinaface):
            x1, y1, x2, y2 = box
            all_boxes.append([int(x1), int(y1), int(x2), int(y2), float(score)])
    
    # Apply non-maximum suppression to remove duplicate detections
    if len(all_boxes) > 0:
        all_boxes = non_maximum_suppression(all_boxes, 0.5)
    
    return all_boxes, image

def calculate_iou(box1, box2):
    """Calculate intersection over union between two boxes"""
    x1_1, y1_1, x2_1, y2_1 = box1[:4]
    x1_2, y1_2, x2_2, y2_2 = box2[:4]
    
    # Calculate intersection area
    x_left = max(x1_1, x1_2)
    y_top = max(y1_1, y1_2)
    x_right = min(x2_1, x2_2)
    y_bottom = min(y2_1, y2_2)
    
    if x_right < x_left or y_bottom < y_top:
        return 0.0
    
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    
    # Calculate union area
    box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
    box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
    union_area = box1_area + box2_area - intersection_area
    
    return intersection_area / union_area

def non_maximum_suppression(boxes, iou_threshold):
    """Apply non-maximum suppression to remove overlapping boxes"""
    if len(boxes) == 0:
        return []
    
    # Sort boxes by confidence (descending)
    boxes = sorted(boxes, key=lambda x: x[4], reverse=True)
    kept_boxes = []
    
    while len(boxes) > 0:
        # Add the box with highest confidence
        current_box = boxes.pop(0)
        kept_boxes.append(current_box)
        
        # Remove overlapping boxes
        remaining_boxes = []
        for box in boxes:
            if calculate_iou(current_box, box) < iou_threshold:
                remaining_boxes.append(box)
        
        boxes = remaining_boxes
    
    return kept_boxes

def bin_faces_by_size(faces):
    """Group faces into bins based on their size (max of width and height)"""
    face_sizes = []
    bin_size = 20  # Size of each bin in pixels
    
    # Calculate face sizes
    for face in faces:
        x1, y1, x2, y2, _ = face
        width = x2 - x1
        height = y2 - y1
        size = max(width, height)
        face_sizes.append(size)
    
    # Determine bin range
    if not face_sizes:
        return {}
    
    min_size = min(face_sizes)
    max_size = max(face_sizes)
    
    # Create bins
    bin_edges = range(
        bin_size * (min_size // bin_size), 
        bin_size * (max_size // bin_size + 2), 
        bin_size
    )
    
    # Place faces in bins
    bin_counts = defaultdict(int)
    bin_faces = defaultdict(list)
    
    for i, size in enumerate(face_sizes):
        bin_idx = size // bin_size * bin_size
        bin_counts[bin_idx] += 1
        bin_faces[bin_idx].append((faces[i], size))
    
    return {
        'bin_counts': dict(bin_counts),
        'bin_faces': dict(bin_faces),
        'bin_edges': list(bin_edges)
    }

def plot_face_histogram(bin_data):
    """Create a histogram of face sizes"""
    if not bin_data or len(bin_data['bin_counts']) == 0:
        # Create empty figure if no data
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.set_title('Face Size Distribution')
        ax.set_xlabel('Face Size (pixels)')
        ax.set_ylabel('Count')
        ax.text(0.5, 0.5, 'No faces detected', ha='center', va='center', transform=ax.transAxes)
        return fig
    
    # Extract data
    bins = sorted(bin_data['bin_counts'].keys())
    counts = [bin_data['bin_counts'][b] for b in bins]
    
    # Create histogram figure
    fig, ax = plt.subplots(figsize=(10, 6))
    bars = ax.bar(
        [str(b) for b in bins], 
        counts, 
        color='skyblue', 
        edgecolor='navy'
    )
    
    # Add value labels
    for bar in bars:
        height = bar.get_height()
        ax.annotate(
            f'{height}',
            xy=(bar.get_x() + bar.get_width() / 2, height),
            xytext=(0, 3),
            textcoords="offset points",
            ha='center', va='bottom'
        )
    
    ax.set_title('Face Size Distribution')
    ax.set_xlabel('Face Size (pixels)')
    ax.set_ylabel('Count')
    
    # Rotate x-axis labels for better readability
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    
    return fig

def create_face_examples_grid(image, bin_data, selected_bin=None):
    """Create a grid of face examples from the selected bin"""
    if not bin_data or 'bin_faces' not in bin_data or not bin_data['bin_faces']:
        return None
    
    if isinstance(image, str):
        image = Image.open(image).convert('RGB')
    elif isinstance(image, np.ndarray):
        image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
    # If no bin is selected, return None
    if selected_bin is None:
        return None
    
    # Get faces from the selected bin
    if int(selected_bin) not in bin_data['bin_faces']:
        return None
    
    bin_faces = bin_data['bin_faces'][int(selected_bin)]
    
    # Determine grid size
    num_faces = len(bin_faces)
    cols = min(5, num_faces)
    rows = (num_faces + cols - 1) // cols
    
    # Create empty white canvas for the grid
    margin = 10
    face_size = int(selected_bin) + 2 * margin
    
    grid_width = cols * face_size + (cols + 1) * margin
    grid_height = rows * face_size + (rows + 1) * margin
    
    grid_image = Image.new('RGB', (grid_width, grid_height), color='white')
    draw = ImageDraw.Draw(grid_image)
    
    # Extract and place faces on the grid
    for i, (face, size) in enumerate(bin_faces):
        x1, y1, x2, y2, conf = face
        
        # Calculate position in the grid
        row = i // cols
        col = i % cols
        
        # Extract face with margin
        face_img = image.crop((
            max(0, x1 - margin),
            max(0, y1 - margin),
            min(image.width, x2 + margin),
            min(image.height, y2 + margin)
        ))
        
        # Resize to consistent size if needed
        target_size = face_size - 2 * margin
        if face_img.width != target_size or face_img.height != target_size:
            face_img = face_img.resize((target_size, target_size))
        
        # Place face in grid
        grid_x = col * face_size + (col + 1) * margin
        grid_y = row * face_size + (row + 1) * margin
        
        grid_image.paste(face_img, (grid_x, grid_y))
        
        # Add size label
        draw.rectangle(
            [grid_x, grid_y + target_size - 20, grid_x + target_size, grid_y + target_size],
            fill=(0, 0, 0, 128)
        )
        draw.text(
            (grid_x + 5, grid_y + target_size - 15),
            f"{size}px",
            fill=(255, 255, 255)
        )
    
    return grid_image

def draw_faces_on_image(image, faces):
    """Draw bounding boxes around detected faces"""
    if isinstance(image, str):
        image = Image.open(image).convert('RGB')
    elif isinstance(image, np.ndarray):
        image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
    # Create a copy of the image
    result_image = image.copy()
    draw = ImageDraw.Draw(result_image)
    
    # Generate colors for different face sizes
    if faces:
        sizes = [max(face[2] - face[0], face[3] - face[1]) for face in faces]
        min_size = min(sizes)
        max_size = max(sizes)
        size_range = max(max_size - min_size, 1)
    
    # Draw faces
    for face in faces:
        x1, y1, x2, y2, conf = face
        width = x2 - x1
        height = y2 - y1
        size = max(width, height)
        
        # Determine color based on face size
        if max_size == min_size:
            normalized_size = 0.5
        else:
            normalized_size = (size - min_size) / size_range
        
        # Use a color gradient from blue to red
        color_r = int(255 * normalized_size)
        color_g = 0
        color_b = int(255 * (1 - normalized_size))
        
        # Draw rectangle
        draw.rectangle([x1, y1, x2, y2], outline=(color_r, color_g, color_b), width=2)
        
        # Draw size and confidence label
        label = f"{size}px ({conf:.2f})"
        draw.rectangle([x1, y1, x1 + 100, y1 - 20], fill=(color_r, color_g, color_b))
        draw.text((x1 + 5, y1 - 15), label, fill=(255, 255, 255))
    
    return result_image

def process_image(image, selected_bin=None):
    """Main function to process an image and return results"""
    # Detect faces
    faces, img = detect_faces_ensemble(image)
    
    # Bin faces by size
    bin_data = bin_faces_by_size(faces)
    
    # Create visualizations
    annotated_image = draw_faces_on_image(img, faces)
    histogram = plot_face_histogram(bin_data)
    
    # Create face examples grid for selected bin
    examples_grid = create_face_examples_grid(img, bin_data, selected_bin)
    
    # Handle the case when no bin is selected
    if selected_bin is None or examples_grid is None:
        available_bins = sorted(bin_data['bin_counts'].keys()) if bin_data else []
        return annotated_image, histogram, None, gr.Dropdown.update(choices=[str(b) for b in available_bins])
    
    # Update dropdown choices
    available_bins = sorted(bin_data['bin_counts'].keys()) if bin_data else []
    
    return annotated_image, histogram, examples_grid, gr.Dropdown.update(choices=[str(b) for b in available_bins])

def update_examples(image, selected_bin):
    """Update face examples when a bin is selected"""
    # Detect faces
    faces, img = detect_faces_ensemble(image)
    
    # Bin faces by size
    bin_data = bin_faces_by_size(faces)
    
    # Create face examples grid for selected bin
    examples_grid = create_face_examples_grid(img, bin_data, selected_bin)
    
    return examples_grid

# Create Gradio interface
with gr.Blocks(title="Face Size Distribution Analysis") as demo:
    gr.Markdown("# Face Size Distribution Analysis")
    gr.Markdown("Upload an image or select from the examples to see the distribution of face sizes")
    
    with gr.Row():
        with gr.Column(scale=1):
            # Input components
            input_image = gr.Image(type="pil", label="Input Image")
            example_dropdown = gr.Dropdown(
                choices=[], 
                label="Select from available images",
                interactive=True
            )
            run_button = gr.Button("Analyze Image")
            
            # Bin selection for examples
            bin_dropdown = gr.Dropdown(
                choices=[], 
                label="Select size bin to see examples",
                interactive=True
            )
        
        with gr.Column(scale=2):
            # Output components
            output_image = gr.Image(type="pil", label="Detected Faces")
            with gr.Tab("Histogram"):
                histogram_plot = gr.Plot(label="Face Size Distribution")
            with gr.Tab("Face Examples"):
                examples_grid = gr.Image(type="pil", label="Face Examples")
    
    # Load example images on startup
    def load_examples():
        examples = load_images_from_folder("data")
        return gr.Dropdown.update(choices=[os.path.basename(path) for path in examples], value=examples[0] if examples else None)
    
    # Handle example selection
    def select_example(example_name):
        if not example_name:
            return None
        
        # Look for the example in the data folder
        example_path = os.path.join("data", example_name)
        if os.path.exists(example_path):
            return example_path
        return None
    
    # Set up event handlers
    run_button.click(
        process_image,
        inputs=[input_image, bin_dropdown],
        outputs=[output_image, histogram_plot, examples_grid, bin_dropdown]
    )
    
    example_dropdown.change(
        select_example,
        inputs=[example_dropdown],
        outputs=[input_image]
    )
    
    input_image.change(
        process_image,
        inputs=[input_image, None],
        outputs=[output_image, histogram_plot, examples_grid, bin_dropdown]
    )
    
    bin_dropdown.change(
        update_examples,
        inputs=[input_image, bin_dropdown],
        outputs=[examples_grid]
    )
    
    # Load examples on startup
    demo.load(load_examples, outputs=[example_dropdown])

# Launch the demo
if __name__ == "__main__":
    demo.launch()