Spaces:

HimAJ
/

emotion-detection-api

Running

File size: 6,612 Bytes

1e4fc28

# app/utils.py
import os
import cv2
import numpy as np
from typing import Optional, Tuple

def _enhance_for_detection(gray: np.ndarray) -> np.ndarray:
    """
    Apply light preprocessing to improve face detection on low-contrast or slightly blurry images.
    Uses CLAHE (adaptive histogram equalization) and a mild bilateral filter.
    """
    # CLAHE for contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(gray)

    # Mild bilateral filtering to reduce noise while preserving edges (helps detection on some images)
    enhanced = cv2.bilateralFilter(enhanced, d=5, sigmaColor=75, sigmaSpace=75)
    return enhanced


def preprocess_face(
    image_path: str,
    target_size: Tuple[int, int] = (48, 48),
    detect_max_dim: int = 800,
    pad_ratio: float = 0.25,  # Increased from 0.15 to 0.25 to preserve more context (eyes, eyebrows, mouth area)
) -> Tuple[Optional[np.ndarray], Optional[str]]:
    """
    Load an image at image_path, detect a face and return a preprocessed array:
      - shape: (1, H, W, 1)
      - dtype: np.float32
      - values scaled to [0,1]

    If no face detected or on error, returns (None, None).

    Parameters:
    - target_size: size expected by the model (height, width).
    - detect_max_dim: maximum size (longest side) used for the detection pass to speed up detection.
    - pad_ratio: fraction of face box to pad on each side (helps avoid tight crops).

    Returns:
    - (face_array, used_filename)
    """
    try:
        img = cv2.imread(image_path)
        if img is None:
            return None, None

        h0, w0 = img.shape[:2]
        # grayscale copy for detection
        gray_full = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Downscale for faster detection if image is huge
        scale = 1.0
        max_side = max(w0, h0)
        if max_side > detect_max_dim:
            scale = detect_max_dim / float(max_side)
            small = cv2.resize(gray_full, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR)
        else:
            small = gray_full.copy()

        # Try to enhance small image for better detection on blurry photos
        small_enh = _enhance_for_detection(small)

        # Try multiple cascade classifiers for better detection
        cascade_paths = [
            "haarcascade_frontalface_default.xml",
            "haarcascade_frontalface_alt.xml",
            "haarcascade_frontalface_alt2.xml",
        ]
        
        faces = []
        
        # Try each cascade with progressively more permissive parameters
        for cascade_name in cascade_paths:
            if len(faces) > 0:
                break
                
            try:
                face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + cascade_name)
                if face_cascade.empty():
                    continue
                
                # Attempt 1: Standard detection
                faces = face_cascade.detectMultiScale(
                    small_enh,
                    scaleFactor=1.1,
                    minNeighbors=5,
                    minSize=(30, 30),
                    flags=cv2.CASCADE_SCALE_IMAGE,
                )
                
                # Attempt 2: More permissive (helps blurry / odd-angle photos)
                if len(faces) == 0:
                    faces = face_cascade.detectMultiScale(
                        small_enh,
                        scaleFactor=1.05,
                        minNeighbors=3,
                        minSize=(20, 20),
                        flags=cv2.CASCADE_SCALE_IMAGE,
                    )
                
                # Attempt 3: Even more permissive (for challenging conditions)
                if len(faces) == 0:
                    faces = face_cascade.detectMultiScale(
                        small_enh,
                        scaleFactor=1.03,
                        minNeighbors=2,
                        minSize=(15, 15),
                        flags=cv2.CASCADE_SCALE_IMAGE,
                    )
                    
            except Exception:
                continue
        
        # If still nothing, try on original (non-enhanced) image
        if len(faces) == 0:
            try:
                face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
                if not face_cascade.empty():
                    # Sometimes enhancement hurts detection, try original
                    faces = face_cascade.detectMultiScale(
                        small,
                        scaleFactor=1.05,
                        minNeighbors=3,
                        minSize=(20, 20),
                        flags=cv2.CASCADE_SCALE_IMAGE,
                    )
            except Exception:
                pass

        if len(faces) == 0:
            return None, None

        # Choose the largest detected face (usually the main subject)
        faces = sorted(faces, key=lambda r: r[2] * r[3], reverse=True)
        (x_s, y_s, w_s, h_s) = faces[0]

        # Map coordinates back to original image scale
        x = int(x_s / scale)
        y = int(y_s / scale)
        w = int(w_s / scale)
        h = int(h_s / scale)

        # Pad bounding box slightly (pad_ratio of face size)
        pad_w = int(w * pad_ratio)
        pad_h = int(h * pad_ratio)
        x1 = max(0, x - pad_w)
        y1 = max(0, y - pad_h)
        x2 = min(w0, x + w + pad_w)
        y2 = min(h0, y + h + pad_h)

        face_crop = gray_full[y1:y2, x1:x2]

        # final resize to model input
        # Use INTER_CUBIC for better quality when upscaling small faces (preserves more detail for emotion recognition)
        face_resized = cv2.resize(face_crop, (target_size[1], target_size[0]), interpolation=cv2.INTER_CUBIC)

        # ensure numeric ndarray and float32 dtype
        face_arr = np.asarray(face_resized, dtype=np.float32)

        # normalize
        face_arr = face_arr / 255.0

        # channel & batch dims -> (1, H, W, 1)
        if face_arr.ndim == 2:
            face_arr = np.expand_dims(face_arr, axis=-1)
        face_arr = np.expand_dims(face_arr, axis=0)

        # final sanity checks
        if face_arr.dtype != np.float32:
            face_arr = face_arr.astype(np.float32)
        if not np.isfinite(face_arr).all():
            return None, None

        used_filename = os.path.basename(image_path) or "upload.jpg"
        return face_arr, used_filename

    except Exception:
        # don't leak internals to caller; let app log exceptions if needed
        return None, None