Spaces:

HimAJ
/

emotion-detection-api

Running

App Files Files Community

emotion-detection-api / app /utils.py

HimAJ

upload 32 files for the ml

1e4fc28 verified about 1 month ago

raw

history blame contribute delete

6.61 kB

	# app/utils.py
	import os
	import cv2
	import numpy as np
	from typing import Optional, Tuple

	def _enhance_for_detection(gray: np.ndarray) -> np.ndarray:
	"""
	Apply light preprocessing to improve face detection on low-contrast or slightly blurry images.
	Uses CLAHE (adaptive histogram equalization) and a mild bilateral filter.
	"""
	# CLAHE for contrast
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
	enhanced = clahe.apply(gray)

	# Mild bilateral filtering to reduce noise while preserving edges (helps detection on some images)
	enhanced = cv2.bilateralFilter(enhanced, d=5, sigmaColor=75, sigmaSpace=75)
	return enhanced


	def preprocess_face(
	image_path: str,
	target_size: Tuple[int, int] = (48, 48),
	detect_max_dim: int = 800,
	pad_ratio: float = 0.25, # Increased from 0.15 to 0.25 to preserve more context (eyes, eyebrows, mouth area)
	) -> Tuple[Optional[np.ndarray], Optional[str]]:
	"""
	Load an image at image_path, detect a face and return a preprocessed array:
	- shape: (1, H, W, 1)
	- dtype: np.float32
	- values scaled to [0,1]

	If no face detected or on error, returns (None, None).

	Parameters:
	- target_size: size expected by the model (height, width).
	- detect_max_dim: maximum size (longest side) used for the detection pass to speed up detection.
	- pad_ratio: fraction of face box to pad on each side (helps avoid tight crops).

	Returns:
	- (face_array, used_filename)
	"""
	try:
	img = cv2.imread(image_path)
	if img is None:
	return None, None

	h0, w0 = img.shape[:2]
	# grayscale copy for detection
	gray_full = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# Downscale for faster detection if image is huge
	scale = 1.0
	max_side = max(w0, h0)
	if max_side > detect_max_dim:
	scale = detect_max_dim / float(max_side)
	small = cv2.resize(gray_full, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR)
	else:
	small = gray_full.copy()

	# Try to enhance small image for better detection on blurry photos
	small_enh = _enhance_for_detection(small)

	# Try multiple cascade classifiers for better detection
	cascade_paths = [
	"haarcascade_frontalface_default.xml",
	"haarcascade_frontalface_alt.xml",
	"haarcascade_frontalface_alt2.xml",
	]

	faces = []

	# Try each cascade with progressively more permissive parameters
	for cascade_name in cascade_paths:
	if len(faces) > 0:
	break

	try:
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + cascade_name)
	if face_cascade.empty():
	continue

	# Attempt 1: Standard detection
	faces = face_cascade.detectMultiScale(
	small_enh,
	scaleFactor=1.1,
	minNeighbors=5,
	minSize=(30, 30),
	flags=cv2.CASCADE_SCALE_IMAGE,
	)

	# Attempt 2: More permissive (helps blurry / odd-angle photos)
	if len(faces) == 0:
	faces = face_cascade.detectMultiScale(
	small_enh,
	scaleFactor=1.05,
	minNeighbors=3,
	minSize=(20, 20),
	flags=cv2.CASCADE_SCALE_IMAGE,
	)

	# Attempt 3: Even more permissive (for challenging conditions)
	if len(faces) == 0:
	faces = face_cascade.detectMultiScale(
	small_enh,
	scaleFactor=1.03,
	minNeighbors=2,
	minSize=(15, 15),
	flags=cv2.CASCADE_SCALE_IMAGE,
	)

	except Exception:
	continue

	# If still nothing, try on original (non-enhanced) image
	if len(faces) == 0:
	try:
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
	if not face_cascade.empty():
	# Sometimes enhancement hurts detection, try original
	faces = face_cascade.detectMultiScale(
	small,
	scaleFactor=1.05,
	minNeighbors=3,
	minSize=(20, 20),
	flags=cv2.CASCADE_SCALE_IMAGE,
	)
	except Exception:
	pass

	if len(faces) == 0:
	return None, None

	# Choose the largest detected face (usually the main subject)
	faces = sorted(faces, key=lambda r: r[2] * r[3], reverse=True)
	(x_s, y_s, w_s, h_s) = faces[0]

	# Map coordinates back to original image scale
	x = int(x_s / scale)
	y = int(y_s / scale)
	w = int(w_s / scale)
	h = int(h_s / scale)

	# Pad bounding box slightly (pad_ratio of face size)
	pad_w = int(w * pad_ratio)
	pad_h = int(h * pad_ratio)
	x1 = max(0, x - pad_w)
	y1 = max(0, y - pad_h)
	x2 = min(w0, x + w + pad_w)
	y2 = min(h0, y + h + pad_h)

	face_crop = gray_full[y1:y2, x1:x2]

	# final resize to model input
	# Use INTER_CUBIC for better quality when upscaling small faces (preserves more detail for emotion recognition)
	face_resized = cv2.resize(face_crop, (target_size[1], target_size[0]), interpolation=cv2.INTER_CUBIC)

	# ensure numeric ndarray and float32 dtype
	face_arr = np.asarray(face_resized, dtype=np.float32)

	# normalize
	face_arr = face_arr / 255.0

	# channel & batch dims -> (1, H, W, 1)
	if face_arr.ndim == 2:
	face_arr = np.expand_dims(face_arr, axis=-1)
	face_arr = np.expand_dims(face_arr, axis=0)

	# final sanity checks
	if face_arr.dtype != np.float32:
	face_arr = face_arr.astype(np.float32)
	if not np.isfinite(face_arr).all():
	return None, None

	used_filename = os.path.basename(image_path) or "upload.jpg"
	return face_arr, used_filename

	except Exception:
	# don't leak internals to caller; let app log exceptions if needed
	return None, None