Spaces:
Running
Running
File size: 6,612 Bytes
1e4fc28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
# app/utils.py
import os
import cv2
import numpy as np
from typing import Optional, Tuple
def _enhance_for_detection(gray: np.ndarray) -> np.ndarray:
"""
Apply light preprocessing to improve face detection on low-contrast or slightly blurry images.
Uses CLAHE (adaptive histogram equalization) and a mild bilateral filter.
"""
# CLAHE for contrast
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# Mild bilateral filtering to reduce noise while preserving edges (helps detection on some images)
enhanced = cv2.bilateralFilter(enhanced, d=5, sigmaColor=75, sigmaSpace=75)
return enhanced
def preprocess_face(
image_path: str,
target_size: Tuple[int, int] = (48, 48),
detect_max_dim: int = 800,
pad_ratio: float = 0.25, # Increased from 0.15 to 0.25 to preserve more context (eyes, eyebrows, mouth area)
) -> Tuple[Optional[np.ndarray], Optional[str]]:
"""
Load an image at image_path, detect a face and return a preprocessed array:
- shape: (1, H, W, 1)
- dtype: np.float32
- values scaled to [0,1]
If no face detected or on error, returns (None, None).
Parameters:
- target_size: size expected by the model (height, width).
- detect_max_dim: maximum size (longest side) used for the detection pass to speed up detection.
- pad_ratio: fraction of face box to pad on each side (helps avoid tight crops).
Returns:
- (face_array, used_filename)
"""
try:
img = cv2.imread(image_path)
if img is None:
return None, None
h0, w0 = img.shape[:2]
# grayscale copy for detection
gray_full = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Downscale for faster detection if image is huge
scale = 1.0
max_side = max(w0, h0)
if max_side > detect_max_dim:
scale = detect_max_dim / float(max_side)
small = cv2.resize(gray_full, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR)
else:
small = gray_full.copy()
# Try to enhance small image for better detection on blurry photos
small_enh = _enhance_for_detection(small)
# Try multiple cascade classifiers for better detection
cascade_paths = [
"haarcascade_frontalface_default.xml",
"haarcascade_frontalface_alt.xml",
"haarcascade_frontalface_alt2.xml",
]
faces = []
# Try each cascade with progressively more permissive parameters
for cascade_name in cascade_paths:
if len(faces) > 0:
break
try:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + cascade_name)
if face_cascade.empty():
continue
# Attempt 1: Standard detection
faces = face_cascade.detectMultiScale(
small_enh,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE,
)
# Attempt 2: More permissive (helps blurry / odd-angle photos)
if len(faces) == 0:
faces = face_cascade.detectMultiScale(
small_enh,
scaleFactor=1.05,
minNeighbors=3,
minSize=(20, 20),
flags=cv2.CASCADE_SCALE_IMAGE,
)
# Attempt 3: Even more permissive (for challenging conditions)
if len(faces) == 0:
faces = face_cascade.detectMultiScale(
small_enh,
scaleFactor=1.03,
minNeighbors=2,
minSize=(15, 15),
flags=cv2.CASCADE_SCALE_IMAGE,
)
except Exception:
continue
# If still nothing, try on original (non-enhanced) image
if len(faces) == 0:
try:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
if not face_cascade.empty():
# Sometimes enhancement hurts detection, try original
faces = face_cascade.detectMultiScale(
small,
scaleFactor=1.05,
minNeighbors=3,
minSize=(20, 20),
flags=cv2.CASCADE_SCALE_IMAGE,
)
except Exception:
pass
if len(faces) == 0:
return None, None
# Choose the largest detected face (usually the main subject)
faces = sorted(faces, key=lambda r: r[2] * r[3], reverse=True)
(x_s, y_s, w_s, h_s) = faces[0]
# Map coordinates back to original image scale
x = int(x_s / scale)
y = int(y_s / scale)
w = int(w_s / scale)
h = int(h_s / scale)
# Pad bounding box slightly (pad_ratio of face size)
pad_w = int(w * pad_ratio)
pad_h = int(h * pad_ratio)
x1 = max(0, x - pad_w)
y1 = max(0, y - pad_h)
x2 = min(w0, x + w + pad_w)
y2 = min(h0, y + h + pad_h)
face_crop = gray_full[y1:y2, x1:x2]
# final resize to model input
# Use INTER_CUBIC for better quality when upscaling small faces (preserves more detail for emotion recognition)
face_resized = cv2.resize(face_crop, (target_size[1], target_size[0]), interpolation=cv2.INTER_CUBIC)
# ensure numeric ndarray and float32 dtype
face_arr = np.asarray(face_resized, dtype=np.float32)
# normalize
face_arr = face_arr / 255.0
# channel & batch dims -> (1, H, W, 1)
if face_arr.ndim == 2:
face_arr = np.expand_dims(face_arr, axis=-1)
face_arr = np.expand_dims(face_arr, axis=0)
# final sanity checks
if face_arr.dtype != np.float32:
face_arr = face_arr.astype(np.float32)
if not np.isfinite(face_arr).all():
return None, None
used_filename = os.path.basename(image_path) or "upload.jpg"
return face_arr, used_filename
except Exception:
# don't leak internals to caller; let app log exceptions if needed
return None, None
|