HimAJ's picture
upload 32 files for the ml
1e4fc28 verified
# app/utils.py
import os
import cv2
import numpy as np
from typing import Optional, Tuple
def _enhance_for_detection(gray: np.ndarray) -> np.ndarray:
"""
Apply light preprocessing to improve face detection on low-contrast or slightly blurry images.
Uses CLAHE (adaptive histogram equalization) and a mild bilateral filter.
"""
# CLAHE for contrast
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# Mild bilateral filtering to reduce noise while preserving edges (helps detection on some images)
enhanced = cv2.bilateralFilter(enhanced, d=5, sigmaColor=75, sigmaSpace=75)
return enhanced
def preprocess_face(
image_path: str,
target_size: Tuple[int, int] = (48, 48),
detect_max_dim: int = 800,
pad_ratio: float = 0.25, # Increased from 0.15 to 0.25 to preserve more context (eyes, eyebrows, mouth area)
) -> Tuple[Optional[np.ndarray], Optional[str]]:
"""
Load an image at image_path, detect a face and return a preprocessed array:
- shape: (1, H, W, 1)
- dtype: np.float32
- values scaled to [0,1]
If no face detected or on error, returns (None, None).
Parameters:
- target_size: size expected by the model (height, width).
- detect_max_dim: maximum size (longest side) used for the detection pass to speed up detection.
- pad_ratio: fraction of face box to pad on each side (helps avoid tight crops).
Returns:
- (face_array, used_filename)
"""
try:
img = cv2.imread(image_path)
if img is None:
return None, None
h0, w0 = img.shape[:2]
# grayscale copy for detection
gray_full = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Downscale for faster detection if image is huge
scale = 1.0
max_side = max(w0, h0)
if max_side > detect_max_dim:
scale = detect_max_dim / float(max_side)
small = cv2.resize(gray_full, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR)
else:
small = gray_full.copy()
# Try to enhance small image for better detection on blurry photos
small_enh = _enhance_for_detection(small)
# Try multiple cascade classifiers for better detection
cascade_paths = [
"haarcascade_frontalface_default.xml",
"haarcascade_frontalface_alt.xml",
"haarcascade_frontalface_alt2.xml",
]
faces = []
# Try each cascade with progressively more permissive parameters
for cascade_name in cascade_paths:
if len(faces) > 0:
break
try:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + cascade_name)
if face_cascade.empty():
continue
# Attempt 1: Standard detection
faces = face_cascade.detectMultiScale(
small_enh,
scaleFactor=1.1,
minNeighbors=5,
minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE,
)
# Attempt 2: More permissive (helps blurry / odd-angle photos)
if len(faces) == 0:
faces = face_cascade.detectMultiScale(
small_enh,
scaleFactor=1.05,
minNeighbors=3,
minSize=(20, 20),
flags=cv2.CASCADE_SCALE_IMAGE,
)
# Attempt 3: Even more permissive (for challenging conditions)
if len(faces) == 0:
faces = face_cascade.detectMultiScale(
small_enh,
scaleFactor=1.03,
minNeighbors=2,
minSize=(15, 15),
flags=cv2.CASCADE_SCALE_IMAGE,
)
except Exception:
continue
# If still nothing, try on original (non-enhanced) image
if len(faces) == 0:
try:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
if not face_cascade.empty():
# Sometimes enhancement hurts detection, try original
faces = face_cascade.detectMultiScale(
small,
scaleFactor=1.05,
minNeighbors=3,
minSize=(20, 20),
flags=cv2.CASCADE_SCALE_IMAGE,
)
except Exception:
pass
if len(faces) == 0:
return None, None
# Choose the largest detected face (usually the main subject)
faces = sorted(faces, key=lambda r: r[2] * r[3], reverse=True)
(x_s, y_s, w_s, h_s) = faces[0]
# Map coordinates back to original image scale
x = int(x_s / scale)
y = int(y_s / scale)
w = int(w_s / scale)
h = int(h_s / scale)
# Pad bounding box slightly (pad_ratio of face size)
pad_w = int(w * pad_ratio)
pad_h = int(h * pad_ratio)
x1 = max(0, x - pad_w)
y1 = max(0, y - pad_h)
x2 = min(w0, x + w + pad_w)
y2 = min(h0, y + h + pad_h)
face_crop = gray_full[y1:y2, x1:x2]
# final resize to model input
# Use INTER_CUBIC for better quality when upscaling small faces (preserves more detail for emotion recognition)
face_resized = cv2.resize(face_crop, (target_size[1], target_size[0]), interpolation=cv2.INTER_CUBIC)
# ensure numeric ndarray and float32 dtype
face_arr = np.asarray(face_resized, dtype=np.float32)
# normalize
face_arr = face_arr / 255.0
# channel & batch dims -> (1, H, W, 1)
if face_arr.ndim == 2:
face_arr = np.expand_dims(face_arr, axis=-1)
face_arr = np.expand_dims(face_arr, axis=0)
# final sanity checks
if face_arr.dtype != np.float32:
face_arr = face_arr.astype(np.float32)
if not np.isfinite(face_arr).all():
return None, None
used_filename = os.path.basename(image_path) or "upload.jpg"
return face_arr, used_filename
except Exception:
# don't leak internals to caller; let app log exceptions if needed
return None, None