Spaces:
Running
Running
refactor: remove unused image quality and obstruction detection functions from image_processor.py
Browse files- models/image_processor.py +7 -387
models/image_processor.py
CHANGED
|
@@ -3,333 +3,11 @@ import base64
|
|
| 3 |
import numpy as np
|
| 4 |
from uuid import uuid4
|
| 5 |
from PIL import Image as PILImage
|
| 6 |
-
import cv2
|
| 7 |
-
import mediapipe as mp
|
| 8 |
|
| 9 |
from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe
|
| 10 |
from utils.vtt_parser import parse_vtt_offsets
|
| 11 |
|
| 12 |
|
| 13 |
-
def assess_image_quality(image):
|
| 14 |
-
"""
|
| 15 |
-
Assess image quality based on blur, brightness, and contrast
|
| 16 |
-
|
| 17 |
-
Parameters:
|
| 18 |
-
image: numpy array of image
|
| 19 |
-
|
| 20 |
-
Returns:
|
| 21 |
-
dict with quality metrics (all normalized to 0-1 range)
|
| 22 |
-
"""
|
| 23 |
-
# Convert to grayscale for analysis
|
| 24 |
-
if len(image.shape) == 3:
|
| 25 |
-
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
| 26 |
-
else:
|
| 27 |
-
gray = image
|
| 28 |
-
|
| 29 |
-
# Blur detection using Laplacian variance
|
| 30 |
-
blur_score = cv2.Laplacian(gray, cv2.CV_64F).var()
|
| 31 |
-
# Normalize blur score (higher is better, typical range 0-2000)
|
| 32 |
-
blur_normalized = min(blur_score / 1000.0, 1.0)
|
| 33 |
-
|
| 34 |
-
# Brightness assessment (0-255 range)
|
| 35 |
-
brightness = np.mean(gray)
|
| 36 |
-
# Normalize brightness (optimal range 50-200, penalize very dark/bright)
|
| 37 |
-
if brightness < 50:
|
| 38 |
-
brightness_normalized = brightness / 50.0
|
| 39 |
-
elif brightness > 200:
|
| 40 |
-
brightness_normalized = 1.0 - (brightness - 200) / 55.0
|
| 41 |
-
else:
|
| 42 |
-
brightness_normalized = 1.0
|
| 43 |
-
brightness_normalized = max(0.0, min(1.0, brightness_normalized))
|
| 44 |
-
|
| 45 |
-
# Contrast assessment using standard deviation
|
| 46 |
-
contrast = np.std(gray)
|
| 47 |
-
# Normalize contrast (higher is better, typical range 0-100)
|
| 48 |
-
contrast_normalized = min(contrast / 80.0, 1.0)
|
| 49 |
-
|
| 50 |
-
# Overall quality score (weighted average)
|
| 51 |
-
overall_quality = (blur_normalized * 0.4 + brightness_normalized * 0.3 + contrast_normalized * 0.3)
|
| 52 |
-
|
| 53 |
-
return {
|
| 54 |
-
'blur': blur_normalized,
|
| 55 |
-
'brightness': brightness_normalized,
|
| 56 |
-
'contrast': contrast_normalized,
|
| 57 |
-
'overall': overall_quality
|
| 58 |
-
}
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
def detect_face_obstruction(image, confidence_threshold=0.5, overlay_path=None):
|
| 62 |
-
"""
|
| 63 |
-
Detect face obstruction using MediaPipe facial landmarks and optionally export overlay image.
|
| 64 |
-
|
| 65 |
-
Parameters:
|
| 66 |
-
image: numpy array of face image
|
| 67 |
-
confidence_threshold: minimum confidence for landmark detection
|
| 68 |
-
overlay_path: if provided, saves overlay image with landmarks to this path
|
| 69 |
-
|
| 70 |
-
Returns:
|
| 71 |
-
dict with obstruction metrics
|
| 72 |
-
"""
|
| 73 |
-
mp_face_mesh = mp.solutions.face_mesh
|
| 74 |
-
|
| 75 |
-
with mp_face_mesh.FaceMesh(
|
| 76 |
-
static_image_mode=True,
|
| 77 |
-
max_num_faces=1,
|
| 78 |
-
refine_landmarks=True,
|
| 79 |
-
min_detection_confidence=confidence_threshold
|
| 80 |
-
) as face_mesh:
|
| 81 |
-
|
| 82 |
-
# Convert RGB to BGR for MediaPipe
|
| 83 |
-
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
| 84 |
-
results = face_mesh.process(image_bgr)
|
| 85 |
-
|
| 86 |
-
if not results.multi_face_landmarks:
|
| 87 |
-
if overlay_path:
|
| 88 |
-
cv2.imwrite(overlay_path, image_bgr)
|
| 89 |
-
return {'obstruction_score': 0.0, 'landmark_visibility': 0.0}
|
| 90 |
-
|
| 91 |
-
landmarks = results.multi_face_landmarks[0]
|
| 92 |
-
|
| 93 |
-
# Key facial landmarks indices for obstruction detection
|
| 94 |
-
key_landmarks = {
|
| 95 |
-
'left_eye': [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246],
|
| 96 |
-
'right_eye': [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398],
|
| 97 |
-
'nose': [1, 2, 5, 4, 6, 168, 8, 9, 10, 151, 195, 197, 196, 3, 51, 48, 115, 131, 134, 102, 49, 220, 305, 291, 303, 267, 269, 270, 267, 271, 272],
|
| 98 |
-
'mouth': [61, 84, 17, 314, 405, 320, 307, 375, 321, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415]
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
total_landmarks = sum(len(indices) for indices in key_landmarks.values())
|
| 102 |
-
visible_landmarks = 0
|
| 103 |
-
|
| 104 |
-
# Prepare overlay image if needed
|
| 105 |
-
overlay_img = image_bgr.copy() if overlay_path else None
|
| 106 |
-
|
| 107 |
-
h, w = image_bgr.shape[:2]
|
| 108 |
-
|
| 109 |
-
# Check visibility of key landmarks and draw if overlay requested
|
| 110 |
-
for region, indices in key_landmarks.items():
|
| 111 |
-
color = {
|
| 112 |
-
'left_eye': (0, 255, 0),
|
| 113 |
-
'right_eye': (0, 255, 255),
|
| 114 |
-
'nose': (255, 0, 0),
|
| 115 |
-
'mouth': (255, 0, 255)
|
| 116 |
-
}.get(region, (255, 255, 255))
|
| 117 |
-
for idx in indices:
|
| 118 |
-
if idx < len(landmarks.landmark):
|
| 119 |
-
landmark = landmarks.landmark[idx]
|
| 120 |
-
if 0 <= landmark.x <= 1 and 0 <= landmark.y <= 1:
|
| 121 |
-
visible_landmarks += 1
|
| 122 |
-
if overlay_img is not None:
|
| 123 |
-
cx, cy = int(landmark.x * w), int(landmark.y * h)
|
| 124 |
-
cv2.circle(overlay_img, (cx, cy), 2, color, -1)
|
| 125 |
-
|
| 126 |
-
landmark_visibility = visible_landmarks / total_landmarks
|
| 127 |
-
obstruction_score = landmark_visibility
|
| 128 |
-
|
| 129 |
-
# Save overlay image if requested
|
| 130 |
-
if overlay_path and overlay_img is not None:
|
| 131 |
-
cv2.imwrite(overlay_path, overlay_img)
|
| 132 |
-
|
| 133 |
-
return {
|
| 134 |
-
'obstruction_score': obstruction_score,
|
| 135 |
-
'landmark_visibility': landmark_visibility
|
| 136 |
-
}
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
def calculate_relative_face_size(face_area, frame_area):
|
| 140 |
-
"""
|
| 141 |
-
Calculate relative face size with logarithmic scaling
|
| 142 |
-
|
| 143 |
-
Parameters:
|
| 144 |
-
face_area: area of detected face in pixels
|
| 145 |
-
frame_area: total area of frame in pixels
|
| 146 |
-
|
| 147 |
-
Returns:
|
| 148 |
-
normalized size score (0-1 range)
|
| 149 |
-
"""
|
| 150 |
-
if frame_area == 0:
|
| 151 |
-
return 0.0
|
| 152 |
-
|
| 153 |
-
relative_size = face_area / frame_area
|
| 154 |
-
|
| 155 |
-
# Apply logarithmic scaling to prevent huge faces from dominating
|
| 156 |
-
# Optimal face size is around 5-20% of frame
|
| 157 |
-
if relative_size < 0.01: # Very small face
|
| 158 |
-
size_score = relative_size / 0.01
|
| 159 |
-
elif relative_size <= 0.20: # Optimal range
|
| 160 |
-
size_score = 1.0
|
| 161 |
-
else: # Very large face
|
| 162 |
-
size_score = max(0.1, 1.0 - (relative_size - 0.20) / 0.30)
|
| 163 |
-
|
| 164 |
-
return min(1.0, max(0.0, size_score))
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
def detect_face_orientation(image, confidence_threshold=0.5, debug=False):
|
| 168 |
-
"""
|
| 169 |
-
Detect face orientation to score frontal faces higher
|
| 170 |
-
Uses MediaPipe facial landmarks to determine face angle
|
| 171 |
-
|
| 172 |
-
Parameters:
|
| 173 |
-
image: numpy array of face image
|
| 174 |
-
confidence_threshold: minimum confidence for landmark detection
|
| 175 |
-
|
| 176 |
-
Returns:
|
| 177 |
-
dict with orientation metrics (higher score = more frontal)
|
| 178 |
-
"""
|
| 179 |
-
mp_face_mesh = mp.solutions.face_mesh
|
| 180 |
-
|
| 181 |
-
with mp_face_mesh.FaceMesh(
|
| 182 |
-
static_image_mode=True,
|
| 183 |
-
max_num_faces=1,
|
| 184 |
-
refine_landmarks=True,
|
| 185 |
-
min_detection_confidence=confidence_threshold
|
| 186 |
-
) as face_mesh:
|
| 187 |
-
|
| 188 |
-
# Convert RGB to BGR for MediaPipe
|
| 189 |
-
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
| 190 |
-
results = face_mesh.process(image_bgr)
|
| 191 |
-
|
| 192 |
-
if not results.multi_face_landmarks:
|
| 193 |
-
return {'orientation_score': 0.0, 'is_frontal': False}
|
| 194 |
-
|
| 195 |
-
landmarks = results.multi_face_landmarks[0]
|
| 196 |
-
|
| 197 |
-
# Key landmark indices for orientation detection
|
| 198 |
-
nose_tip = 1 # nose tip
|
| 199 |
-
left_eye_inner = 133 # left eye inner corner
|
| 200 |
-
right_eye_inner = 362 # right eye inner corner
|
| 201 |
-
left_mouth_corner = 61 # left mouth corner
|
| 202 |
-
right_mouth_corner = 291 # right mouth corner
|
| 203 |
-
chin = 18 # chin center
|
| 204 |
-
|
| 205 |
-
# Get landmark coordinates
|
| 206 |
-
h, w = image_bgr.shape[:2]
|
| 207 |
-
|
| 208 |
-
def get_landmark_coord(idx):
|
| 209 |
-
if idx < len(landmarks.landmark):
|
| 210 |
-
lm = landmarks.landmark[idx]
|
| 211 |
-
return (lm.x * w, lm.y * h)
|
| 212 |
-
return None
|
| 213 |
-
|
| 214 |
-
nose_coord = get_landmark_coord(nose_tip)
|
| 215 |
-
left_eye_coord = get_landmark_coord(left_eye_inner)
|
| 216 |
-
right_eye_coord = get_landmark_coord(right_eye_inner)
|
| 217 |
-
left_mouth_coord = get_landmark_coord(left_mouth_corner)
|
| 218 |
-
right_mouth_coord = get_landmark_coord(right_mouth_corner)
|
| 219 |
-
chin_coord = get_landmark_coord(chin)
|
| 220 |
-
|
| 221 |
-
# Skip if key landmarks are missing
|
| 222 |
-
if not all([nose_coord, left_eye_coord, right_eye_coord, left_mouth_coord, right_mouth_coord]):
|
| 223 |
-
if debug:
|
| 224 |
-
print(f"DEBUG: Missing key landmarks - returning 0.0 orientation score")
|
| 225 |
-
return {'orientation_score': 0.0, 'is_frontal': False}
|
| 226 |
-
|
| 227 |
-
# Calculate symmetry metrics
|
| 228 |
-
# 1. Eye distance symmetry - frontal faces have balanced eye distances from nose
|
| 229 |
-
eye_center_x = (left_eye_coord[0] + right_eye_coord[0]) / 2
|
| 230 |
-
nose_to_eye_center = abs(nose_coord[0] - eye_center_x)
|
| 231 |
-
eye_distance = abs(right_eye_coord[0] - left_eye_coord[0])
|
| 232 |
-
eye_symmetry = 1.0 - min(1.0, nose_to_eye_center / (eye_distance / 2)) if eye_distance > 0 else 0.0
|
| 233 |
-
|
| 234 |
-
# 2. Mouth symmetry - frontal faces have balanced mouth corners from nose
|
| 235 |
-
mouth_center_x = (left_mouth_coord[0] + right_mouth_coord[0]) / 2
|
| 236 |
-
nose_to_mouth_center = abs(nose_coord[0] - mouth_center_x)
|
| 237 |
-
mouth_width = abs(right_mouth_coord[0] - left_mouth_coord[0])
|
| 238 |
-
mouth_symmetry = 1.0 - min(1.0, nose_to_mouth_center / (mouth_width / 2)) if mouth_width > 0 else 0.0
|
| 239 |
-
|
| 240 |
-
# 3. Vertical alignment - nose should be roughly centered between eyes and mouth
|
| 241 |
-
if chin_coord:
|
| 242 |
-
eye_y = (left_eye_coord[1] + right_eye_coord[1]) / 2
|
| 243 |
-
vertical_center = (eye_y + chin_coord[1]) / 2
|
| 244 |
-
vertical_alignment = 1.0 - min(1.0, abs(nose_coord[1] - vertical_center) / (abs(chin_coord[1] - eye_y) / 2))
|
| 245 |
-
else:
|
| 246 |
-
vertical_alignment = 0.5
|
| 247 |
-
|
| 248 |
-
# 4. Face width ratio - frontal faces show more balanced left/right visibility
|
| 249 |
-
face_width = abs(right_eye_coord[0] - left_eye_coord[0])
|
| 250 |
-
left_visibility = abs(nose_coord[0] - left_eye_coord[0])
|
| 251 |
-
right_visibility = abs(right_eye_coord[0] - nose_coord[0])
|
| 252 |
-
|
| 253 |
-
if face_width > 0:
|
| 254 |
-
width_ratio = min(left_visibility, right_visibility) / max(left_visibility, right_visibility)
|
| 255 |
-
else:
|
| 256 |
-
width_ratio = 0.0
|
| 257 |
-
|
| 258 |
-
# Combine metrics with weights
|
| 259 |
-
orientation_score = (
|
| 260 |
-
eye_symmetry * 0.3 +
|
| 261 |
-
mouth_symmetry * 0.3 +
|
| 262 |
-
vertical_alignment * 0.2 +
|
| 263 |
-
width_ratio * 0.2
|
| 264 |
-
)
|
| 265 |
-
|
| 266 |
-
# Determine if face is frontal (threshold-based)
|
| 267 |
-
is_frontal = orientation_score > 0.7
|
| 268 |
-
|
| 269 |
-
if debug:
|
| 270 |
-
print(f"DEBUG ORIENTATION DETECTION:")
|
| 271 |
-
print(f" Eye coordinates: Left={left_eye_coord}, Right={right_eye_coord}, Nose={nose_coord}")
|
| 272 |
-
print(f" Eye center: {eye_center_x:.1f}, Eye distance: {eye_distance:.1f}")
|
| 273 |
-
print(f" Nose to eye center distance: {nose_to_eye_center:.1f}")
|
| 274 |
-
print(f" Eye symmetry: {eye_symmetry:.3f} (1.0 = perfect symmetry)")
|
| 275 |
-
print(f" Mouth coordinates: Left={left_mouth_coord}, Right={right_mouth_coord}")
|
| 276 |
-
print(f" Mouth center: {mouth_center_x:.1f}, Mouth width: {mouth_width:.1f}")
|
| 277 |
-
print(f" Nose to mouth center distance: {nose_to_mouth_center:.1f}")
|
| 278 |
-
print(f" Mouth symmetry: {mouth_symmetry:.3f} (1.0 = perfect symmetry)")
|
| 279 |
-
print(f" Vertical alignment: {vertical_alignment:.3f} (1.0 = perfect alignment)")
|
| 280 |
-
print(f" Width ratio: {width_ratio:.3f} (1.0 = perfect balance)")
|
| 281 |
-
print(f" Final orientation score: {orientation_score:.3f} (higher = more frontal)")
|
| 282 |
-
print(f" Is frontal: {is_frontal}")
|
| 283 |
-
|
| 284 |
-
return {
|
| 285 |
-
'orientation_score': orientation_score,
|
| 286 |
-
'is_frontal': is_frontal,
|
| 287 |
-
'eye_symmetry': eye_symmetry,
|
| 288 |
-
'mouth_symmetry': mouth_symmetry,
|
| 289 |
-
'vertical_alignment': vertical_alignment,
|
| 290 |
-
'width_ratio': width_ratio
|
| 291 |
-
}
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
def compute_composite_score(confidence, quality, size, obstruction, orientation=None, weights=None, debug=False):
|
| 295 |
-
"""
|
| 296 |
-
Compute composite score from multiple quality factors
|
| 297 |
-
|
| 298 |
-
Parameters:
|
| 299 |
-
confidence: face detection confidence (0-1)
|
| 300 |
-
quality: image quality score (0-1)
|
| 301 |
-
size: face size score (0-1)
|
| 302 |
-
obstruction: face obstruction score (0-1)
|
| 303 |
-
orientation: face orientation score (0-1, higher = more frontal)
|
| 304 |
-
weights: dict with weights for each factor
|
| 305 |
-
debug: if True, print debugging information
|
| 306 |
-
|
| 307 |
-
Returns:
|
| 308 |
-
composite score (0-1 range)
|
| 309 |
-
"""
|
| 310 |
-
if weights is None:
|
| 311 |
-
weights = {
|
| 312 |
-
'confidence': 0.4, # Face detection confidence is most important
|
| 313 |
-
'quality': 0.2, # Image quality matters
|
| 314 |
-
'size': 0.2, # Appropriate face size
|
| 315 |
-
'obstruction': 0.1, # Less obstruction is better
|
| 316 |
-
'orientation': 0.1 # Frontal faces preferred but not dominating
|
| 317 |
-
}
|
| 318 |
-
|
| 319 |
-
composite = (
|
| 320 |
-
confidence * weights['confidence'] +
|
| 321 |
-
quality * weights['quality'] +
|
| 322 |
-
size * weights['size'] +
|
| 323 |
-
obstruction * weights['obstruction']
|
| 324 |
-
)
|
| 325 |
-
|
| 326 |
-
# Add orientation score if provided
|
| 327 |
-
orientation_contribution = 0.0
|
| 328 |
-
if orientation is not None:
|
| 329 |
-
orientation_contribution = orientation * weights['orientation']
|
| 330 |
-
composite += orientation_contribution
|
| 331 |
-
|
| 332 |
-
return min(1.0, max(0.0, composite))
|
| 333 |
|
| 334 |
def get_face_predictions(face, ensemble, data_manager, results):
|
| 335 |
"""
|
|
@@ -408,18 +86,16 @@ def image_search_performers(image, data_manager, threshold=0.5, results=3):
|
|
| 408 |
})
|
| 409 |
return response
|
| 410 |
|
| 411 |
-
def find_faces_in_sprite(image, vtt_file
|
| 412 |
"""
|
| 413 |
-
Find faces in a sprite image using VTT data
|
| 414 |
|
| 415 |
Parameters:
|
| 416 |
image: PIL Image object
|
| 417 |
vtt_file: File object containing VTT data
|
| 418 |
-
sort_by_quality: If True, sort results by composite quality score
|
| 419 |
-
debug: If True, print debugging information
|
| 420 |
|
| 421 |
Returns:
|
| 422 |
-
List of dictionaries with face information
|
| 423 |
"""
|
| 424 |
with open(vtt_file.name, 'r', encoding='utf-8') as f:
|
| 425 |
vtt = f.read().encode('utf-8')
|
|
@@ -428,65 +104,9 @@ def find_faces_in_sprite(image, vtt_file, sort_by_quality=True, debug=True):
|
|
| 428 |
results = []
|
| 429 |
for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)):
|
| 430 |
cut_frame = sprite.crop((left, top, left + right, top + bottom))
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
# Extract faces with detailed information
|
| 434 |
-
faces = extract_faces_mediapipe(cut_frame_array, enforce_detection=False, align=False)
|
| 435 |
faces = [face for face in faces if face['confidence'] > 0.6]
|
| 436 |
-
|
| 437 |
if faces:
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
face_size = face_area['w'] * face_area['h']
|
| 442 |
-
frame_size = cut_frame_array.shape[0] * cut_frame_array.shape[1]
|
| 443 |
-
|
| 444 |
-
# Extract face region for quality assessment
|
| 445 |
-
face_x1 = max(0, int(face_area['x']))
|
| 446 |
-
face_y1 = max(0, int(face_area['y']))
|
| 447 |
-
face_x2 = min(cut_frame_array.shape[1], int(face_area['x'] + face_area['w']))
|
| 448 |
-
face_y2 = min(cut_frame_array.shape[0], int(face_area['y'] + face_area['h']))
|
| 449 |
-
|
| 450 |
-
face_region = cut_frame_array[face_y1:face_y2, face_x1:face_x2]
|
| 451 |
-
|
| 452 |
-
# Skip if face region is too small
|
| 453 |
-
if face_region.size == 0:
|
| 454 |
-
continue
|
| 455 |
-
|
| 456 |
-
# Assess quality metrics
|
| 457 |
-
quality_metrics = assess_image_quality(face_region)
|
| 458 |
-
obstruction_metrics = detect_face_obstruction(face_region)
|
| 459 |
-
orientation_metrics = detect_face_orientation(face_region, debug=debug)
|
| 460 |
-
size_score = calculate_relative_face_size(face_size, frame_size)
|
| 461 |
-
|
| 462 |
-
# Compute composite score
|
| 463 |
-
composite_score = compute_composite_score(
|
| 464 |
-
confidence=best_face['confidence'],
|
| 465 |
-
quality=quality_metrics['overall'],
|
| 466 |
-
size=size_score,
|
| 467 |
-
obstruction=obstruction_metrics['obstruction_score'],
|
| 468 |
-
orientation=orientation_metrics['orientation_score'],
|
| 469 |
-
debug=debug
|
| 470 |
-
)
|
| 471 |
-
|
| 472 |
-
# Create result data with enhanced metrics
|
| 473 |
-
data = {
|
| 474 |
-
'id': str(uuid4()),
|
| 475 |
-
'offset': (left, top, right, bottom),
|
| 476 |
-
'frame': i,
|
| 477 |
-
'time': time_seconds,
|
| 478 |
-
'size': face_size,
|
| 479 |
-
'confidence': best_face['confidence'],
|
| 480 |
-
'quality_metrics': quality_metrics,
|
| 481 |
-
'obstruction_metrics': obstruction_metrics,
|
| 482 |
-
'orientation_metrics': orientation_metrics,
|
| 483 |
-
'size_score': size_score,
|
| 484 |
-
'composite_score': composite_score
|
| 485 |
-
}
|
| 486 |
-
results.append(data)
|
| 487 |
-
|
| 488 |
-
# Sort by composite score (highest first) if requested
|
| 489 |
-
if sort_by_quality:
|
| 490 |
-
results.sort(key=lambda x: x['composite_score'], reverse=True)
|
| 491 |
-
|
| 492 |
-
return results
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from uuid import uuid4
|
| 5 |
from PIL import Image as PILImage
|
|
|
|
|
|
|
| 6 |
|
| 7 |
from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe
|
| 8 |
from utils.vtt_parser import parse_vtt_offsets
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def get_face_predictions(face, ensemble, data_manager, results):
|
| 13 |
"""
|
|
|
|
| 86 |
})
|
| 87 |
return response
|
| 88 |
|
| 89 |
+
def find_faces_in_sprite(image, vtt_file):
|
| 90 |
"""
|
| 91 |
+
Find faces in a sprite image using VTT data
|
| 92 |
|
| 93 |
Parameters:
|
| 94 |
image: PIL Image object
|
| 95 |
vtt_file: File object containing VTT data
|
|
|
|
|
|
|
| 96 |
|
| 97 |
Returns:
|
| 98 |
+
List of dictionaries with face information
|
| 99 |
"""
|
| 100 |
with open(vtt_file.name, 'r', encoding='utf-8') as f:
|
| 101 |
vtt = f.read().encode('utf-8')
|
|
|
|
| 104 |
results = []
|
| 105 |
for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)):
|
| 106 |
cut_frame = sprite.crop((left, top, left + right, top + bottom))
|
| 107 |
+
faces = extract_faces_mediapipe(np.asarray(cut_frame), enforce_detection=False, align=False)
|
|
|
|
|
|
|
|
|
|
| 108 |
faces = [face for face in faces if face['confidence'] > 0.6]
|
|
|
|
| 109 |
if faces:
|
| 110 |
+
size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h']
|
| 111 |
+
data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size}
|
| 112 |
+
results.append(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|