Spaces:
Running
Running
| # app/utils.py | |
| import os | |
| import cv2 | |
| import numpy as np | |
| from typing import Optional, Tuple | |
| def _enhance_for_detection(gray: np.ndarray) -> np.ndarray: | |
| """ | |
| Apply light preprocessing to improve face detection on low-contrast or slightly blurry images. | |
| Uses CLAHE (adaptive histogram equalization) and a mild bilateral filter. | |
| """ | |
| # CLAHE for contrast | |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) | |
| enhanced = clahe.apply(gray) | |
| # Mild bilateral filtering to reduce noise while preserving edges (helps detection on some images) | |
| enhanced = cv2.bilateralFilter(enhanced, d=5, sigmaColor=75, sigmaSpace=75) | |
| return enhanced | |
| def preprocess_face( | |
| image_path: str, | |
| target_size: Tuple[int, int] = (48, 48), | |
| detect_max_dim: int = 800, | |
| pad_ratio: float = 0.25, # Increased from 0.15 to 0.25 to preserve more context (eyes, eyebrows, mouth area) | |
| ) -> Tuple[Optional[np.ndarray], Optional[str]]: | |
| """ | |
| Load an image at image_path, detect a face and return a preprocessed array: | |
| - shape: (1, H, W, 1) | |
| - dtype: np.float32 | |
| - values scaled to [0,1] | |
| If no face detected or on error, returns (None, None). | |
| Parameters: | |
| - target_size: size expected by the model (height, width). | |
| - detect_max_dim: maximum size (longest side) used for the detection pass to speed up detection. | |
| - pad_ratio: fraction of face box to pad on each side (helps avoid tight crops). | |
| Returns: | |
| - (face_array, used_filename) | |
| """ | |
| try: | |
| img = cv2.imread(image_path) | |
| if img is None: | |
| return None, None | |
| h0, w0 = img.shape[:2] | |
| # grayscale copy for detection | |
| gray_full = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| # Downscale for faster detection if image is huge | |
| scale = 1.0 | |
| max_side = max(w0, h0) | |
| if max_side > detect_max_dim: | |
| scale = detect_max_dim / float(max_side) | |
| small = cv2.resize(gray_full, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR) | |
| else: | |
| small = gray_full.copy() | |
| # Try to enhance small image for better detection on blurry photos | |
| small_enh = _enhance_for_detection(small) | |
| # Try multiple cascade classifiers for better detection | |
| cascade_paths = [ | |
| "haarcascade_frontalface_default.xml", | |
| "haarcascade_frontalface_alt.xml", | |
| "haarcascade_frontalface_alt2.xml", | |
| ] | |
| faces = [] | |
| # Try each cascade with progressively more permissive parameters | |
| for cascade_name in cascade_paths: | |
| if len(faces) > 0: | |
| break | |
| try: | |
| face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + cascade_name) | |
| if face_cascade.empty(): | |
| continue | |
| # Attempt 1: Standard detection | |
| faces = face_cascade.detectMultiScale( | |
| small_enh, | |
| scaleFactor=1.1, | |
| minNeighbors=5, | |
| minSize=(30, 30), | |
| flags=cv2.CASCADE_SCALE_IMAGE, | |
| ) | |
| # Attempt 2: More permissive (helps blurry / odd-angle photos) | |
| if len(faces) == 0: | |
| faces = face_cascade.detectMultiScale( | |
| small_enh, | |
| scaleFactor=1.05, | |
| minNeighbors=3, | |
| minSize=(20, 20), | |
| flags=cv2.CASCADE_SCALE_IMAGE, | |
| ) | |
| # Attempt 3: Even more permissive (for challenging conditions) | |
| if len(faces) == 0: | |
| faces = face_cascade.detectMultiScale( | |
| small_enh, | |
| scaleFactor=1.03, | |
| minNeighbors=2, | |
| minSize=(15, 15), | |
| flags=cv2.CASCADE_SCALE_IMAGE, | |
| ) | |
| except Exception: | |
| continue | |
| # If still nothing, try on original (non-enhanced) image | |
| if len(faces) == 0: | |
| try: | |
| face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") | |
| if not face_cascade.empty(): | |
| # Sometimes enhancement hurts detection, try original | |
| faces = face_cascade.detectMultiScale( | |
| small, | |
| scaleFactor=1.05, | |
| minNeighbors=3, | |
| minSize=(20, 20), | |
| flags=cv2.CASCADE_SCALE_IMAGE, | |
| ) | |
| except Exception: | |
| pass | |
| if len(faces) == 0: | |
| return None, None | |
| # Choose the largest detected face (usually the main subject) | |
| faces = sorted(faces, key=lambda r: r[2] * r[3], reverse=True) | |
| (x_s, y_s, w_s, h_s) = faces[0] | |
| # Map coordinates back to original image scale | |
| x = int(x_s / scale) | |
| y = int(y_s / scale) | |
| w = int(w_s / scale) | |
| h = int(h_s / scale) | |
| # Pad bounding box slightly (pad_ratio of face size) | |
| pad_w = int(w * pad_ratio) | |
| pad_h = int(h * pad_ratio) | |
| x1 = max(0, x - pad_w) | |
| y1 = max(0, y - pad_h) | |
| x2 = min(w0, x + w + pad_w) | |
| y2 = min(h0, y + h + pad_h) | |
| face_crop = gray_full[y1:y2, x1:x2] | |
| # final resize to model input | |
| # Use INTER_CUBIC for better quality when upscaling small faces (preserves more detail for emotion recognition) | |
| face_resized = cv2.resize(face_crop, (target_size[1], target_size[0]), interpolation=cv2.INTER_CUBIC) | |
| # ensure numeric ndarray and float32 dtype | |
| face_arr = np.asarray(face_resized, dtype=np.float32) | |
| # normalize | |
| face_arr = face_arr / 255.0 | |
| # channel & batch dims -> (1, H, W, 1) | |
| if face_arr.ndim == 2: | |
| face_arr = np.expand_dims(face_arr, axis=-1) | |
| face_arr = np.expand_dims(face_arr, axis=0) | |
| # final sanity checks | |
| if face_arr.dtype != np.float32: | |
| face_arr = face_arr.astype(np.float32) | |
| if not np.isfinite(face_arr).all(): | |
| return None, None | |
| used_filename = os.path.basename(image_path) or "upload.jpg" | |
| return face_arr, used_filename | |
| except Exception: | |
| # don't leak internals to caller; let app log exceptions if needed | |
| return None, None | |