cc1234 commited on
Commit
86e1f1d
·
1 Parent(s): 943ae68

refactor: remove unused image quality and obstruction detection functions from image_processor.py

Browse files
Files changed (1) hide show
  1. models/image_processor.py +7 -387
models/image_processor.py CHANGED
@@ -3,333 +3,11 @@ import base64
3
  import numpy as np
4
  from uuid import uuid4
5
  from PIL import Image as PILImage
6
- import cv2
7
- import mediapipe as mp
8
 
9
  from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe
10
  from utils.vtt_parser import parse_vtt_offsets
11
 
12
 
13
- def assess_image_quality(image):
14
- """
15
- Assess image quality based on blur, brightness, and contrast
16
-
17
- Parameters:
18
- image: numpy array of image
19
-
20
- Returns:
21
- dict with quality metrics (all normalized to 0-1 range)
22
- """
23
- # Convert to grayscale for analysis
24
- if len(image.shape) == 3:
25
- gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
26
- else:
27
- gray = image
28
-
29
- # Blur detection using Laplacian variance
30
- blur_score = cv2.Laplacian(gray, cv2.CV_64F).var()
31
- # Normalize blur score (higher is better, typical range 0-2000)
32
- blur_normalized = min(blur_score / 1000.0, 1.0)
33
-
34
- # Brightness assessment (0-255 range)
35
- brightness = np.mean(gray)
36
- # Normalize brightness (optimal range 50-200, penalize very dark/bright)
37
- if brightness < 50:
38
- brightness_normalized = brightness / 50.0
39
- elif brightness > 200:
40
- brightness_normalized = 1.0 - (brightness - 200) / 55.0
41
- else:
42
- brightness_normalized = 1.0
43
- brightness_normalized = max(0.0, min(1.0, brightness_normalized))
44
-
45
- # Contrast assessment using standard deviation
46
- contrast = np.std(gray)
47
- # Normalize contrast (higher is better, typical range 0-100)
48
- contrast_normalized = min(contrast / 80.0, 1.0)
49
-
50
- # Overall quality score (weighted average)
51
- overall_quality = (blur_normalized * 0.4 + brightness_normalized * 0.3 + contrast_normalized * 0.3)
52
-
53
- return {
54
- 'blur': blur_normalized,
55
- 'brightness': brightness_normalized,
56
- 'contrast': contrast_normalized,
57
- 'overall': overall_quality
58
- }
59
-
60
-
61
- def detect_face_obstruction(image, confidence_threshold=0.5, overlay_path=None):
62
- """
63
- Detect face obstruction using MediaPipe facial landmarks and optionally export overlay image.
64
-
65
- Parameters:
66
- image: numpy array of face image
67
- confidence_threshold: minimum confidence for landmark detection
68
- overlay_path: if provided, saves overlay image with landmarks to this path
69
-
70
- Returns:
71
- dict with obstruction metrics
72
- """
73
- mp_face_mesh = mp.solutions.face_mesh
74
-
75
- with mp_face_mesh.FaceMesh(
76
- static_image_mode=True,
77
- max_num_faces=1,
78
- refine_landmarks=True,
79
- min_detection_confidence=confidence_threshold
80
- ) as face_mesh:
81
-
82
- # Convert RGB to BGR for MediaPipe
83
- image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
84
- results = face_mesh.process(image_bgr)
85
-
86
- if not results.multi_face_landmarks:
87
- if overlay_path:
88
- cv2.imwrite(overlay_path, image_bgr)
89
- return {'obstruction_score': 0.0, 'landmark_visibility': 0.0}
90
-
91
- landmarks = results.multi_face_landmarks[0]
92
-
93
- # Key facial landmarks indices for obstruction detection
94
- key_landmarks = {
95
- 'left_eye': [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246],
96
- 'right_eye': [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398],
97
- 'nose': [1, 2, 5, 4, 6, 168, 8, 9, 10, 151, 195, 197, 196, 3, 51, 48, 115, 131, 134, 102, 49, 220, 305, 291, 303, 267, 269, 270, 267, 271, 272],
98
- 'mouth': [61, 84, 17, 314, 405, 320, 307, 375, 321, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415]
99
- }
100
-
101
- total_landmarks = sum(len(indices) for indices in key_landmarks.values())
102
- visible_landmarks = 0
103
-
104
- # Prepare overlay image if needed
105
- overlay_img = image_bgr.copy() if overlay_path else None
106
-
107
- h, w = image_bgr.shape[:2]
108
-
109
- # Check visibility of key landmarks and draw if overlay requested
110
- for region, indices in key_landmarks.items():
111
- color = {
112
- 'left_eye': (0, 255, 0),
113
- 'right_eye': (0, 255, 255),
114
- 'nose': (255, 0, 0),
115
- 'mouth': (255, 0, 255)
116
- }.get(region, (255, 255, 255))
117
- for idx in indices:
118
- if idx < len(landmarks.landmark):
119
- landmark = landmarks.landmark[idx]
120
- if 0 <= landmark.x <= 1 and 0 <= landmark.y <= 1:
121
- visible_landmarks += 1
122
- if overlay_img is not None:
123
- cx, cy = int(landmark.x * w), int(landmark.y * h)
124
- cv2.circle(overlay_img, (cx, cy), 2, color, -1)
125
-
126
- landmark_visibility = visible_landmarks / total_landmarks
127
- obstruction_score = landmark_visibility
128
-
129
- # Save overlay image if requested
130
- if overlay_path and overlay_img is not None:
131
- cv2.imwrite(overlay_path, overlay_img)
132
-
133
- return {
134
- 'obstruction_score': obstruction_score,
135
- 'landmark_visibility': landmark_visibility
136
- }
137
-
138
-
139
- def calculate_relative_face_size(face_area, frame_area):
140
- """
141
- Calculate relative face size with logarithmic scaling
142
-
143
- Parameters:
144
- face_area: area of detected face in pixels
145
- frame_area: total area of frame in pixels
146
-
147
- Returns:
148
- normalized size score (0-1 range)
149
- """
150
- if frame_area == 0:
151
- return 0.0
152
-
153
- relative_size = face_area / frame_area
154
-
155
- # Apply logarithmic scaling to prevent huge faces from dominating
156
- # Optimal face size is around 5-20% of frame
157
- if relative_size < 0.01: # Very small face
158
- size_score = relative_size / 0.01
159
- elif relative_size <= 0.20: # Optimal range
160
- size_score = 1.0
161
- else: # Very large face
162
- size_score = max(0.1, 1.0 - (relative_size - 0.20) / 0.30)
163
-
164
- return min(1.0, max(0.0, size_score))
165
-
166
-
167
- def detect_face_orientation(image, confidence_threshold=0.5, debug=False):
168
- """
169
- Detect face orientation to score frontal faces higher
170
- Uses MediaPipe facial landmarks to determine face angle
171
-
172
- Parameters:
173
- image: numpy array of face image
174
- confidence_threshold: minimum confidence for landmark detection
175
-
176
- Returns:
177
- dict with orientation metrics (higher score = more frontal)
178
- """
179
- mp_face_mesh = mp.solutions.face_mesh
180
-
181
- with mp_face_mesh.FaceMesh(
182
- static_image_mode=True,
183
- max_num_faces=1,
184
- refine_landmarks=True,
185
- min_detection_confidence=confidence_threshold
186
- ) as face_mesh:
187
-
188
- # Convert RGB to BGR for MediaPipe
189
- image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
190
- results = face_mesh.process(image_bgr)
191
-
192
- if not results.multi_face_landmarks:
193
- return {'orientation_score': 0.0, 'is_frontal': False}
194
-
195
- landmarks = results.multi_face_landmarks[0]
196
-
197
- # Key landmark indices for orientation detection
198
- nose_tip = 1 # nose tip
199
- left_eye_inner = 133 # left eye inner corner
200
- right_eye_inner = 362 # right eye inner corner
201
- left_mouth_corner = 61 # left mouth corner
202
- right_mouth_corner = 291 # right mouth corner
203
- chin = 18 # chin center
204
-
205
- # Get landmark coordinates
206
- h, w = image_bgr.shape[:2]
207
-
208
- def get_landmark_coord(idx):
209
- if idx < len(landmarks.landmark):
210
- lm = landmarks.landmark[idx]
211
- return (lm.x * w, lm.y * h)
212
- return None
213
-
214
- nose_coord = get_landmark_coord(nose_tip)
215
- left_eye_coord = get_landmark_coord(left_eye_inner)
216
- right_eye_coord = get_landmark_coord(right_eye_inner)
217
- left_mouth_coord = get_landmark_coord(left_mouth_corner)
218
- right_mouth_coord = get_landmark_coord(right_mouth_corner)
219
- chin_coord = get_landmark_coord(chin)
220
-
221
- # Skip if key landmarks are missing
222
- if not all([nose_coord, left_eye_coord, right_eye_coord, left_mouth_coord, right_mouth_coord]):
223
- if debug:
224
- print(f"DEBUG: Missing key landmarks - returning 0.0 orientation score")
225
- return {'orientation_score': 0.0, 'is_frontal': False}
226
-
227
- # Calculate symmetry metrics
228
- # 1. Eye distance symmetry - frontal faces have balanced eye distances from nose
229
- eye_center_x = (left_eye_coord[0] + right_eye_coord[0]) / 2
230
- nose_to_eye_center = abs(nose_coord[0] - eye_center_x)
231
- eye_distance = abs(right_eye_coord[0] - left_eye_coord[0])
232
- eye_symmetry = 1.0 - min(1.0, nose_to_eye_center / (eye_distance / 2)) if eye_distance > 0 else 0.0
233
-
234
- # 2. Mouth symmetry - frontal faces have balanced mouth corners from nose
235
- mouth_center_x = (left_mouth_coord[0] + right_mouth_coord[0]) / 2
236
- nose_to_mouth_center = abs(nose_coord[0] - mouth_center_x)
237
- mouth_width = abs(right_mouth_coord[0] - left_mouth_coord[0])
238
- mouth_symmetry = 1.0 - min(1.0, nose_to_mouth_center / (mouth_width / 2)) if mouth_width > 0 else 0.0
239
-
240
- # 3. Vertical alignment - nose should be roughly centered between eyes and mouth
241
- if chin_coord:
242
- eye_y = (left_eye_coord[1] + right_eye_coord[1]) / 2
243
- vertical_center = (eye_y + chin_coord[1]) / 2
244
- vertical_alignment = 1.0 - min(1.0, abs(nose_coord[1] - vertical_center) / (abs(chin_coord[1] - eye_y) / 2))
245
- else:
246
- vertical_alignment = 0.5
247
-
248
- # 4. Face width ratio - frontal faces show more balanced left/right visibility
249
- face_width = abs(right_eye_coord[0] - left_eye_coord[0])
250
- left_visibility = abs(nose_coord[0] - left_eye_coord[0])
251
- right_visibility = abs(right_eye_coord[0] - nose_coord[0])
252
-
253
- if face_width > 0:
254
- width_ratio = min(left_visibility, right_visibility) / max(left_visibility, right_visibility)
255
- else:
256
- width_ratio = 0.0
257
-
258
- # Combine metrics with weights
259
- orientation_score = (
260
- eye_symmetry * 0.3 +
261
- mouth_symmetry * 0.3 +
262
- vertical_alignment * 0.2 +
263
- width_ratio * 0.2
264
- )
265
-
266
- # Determine if face is frontal (threshold-based)
267
- is_frontal = orientation_score > 0.7
268
-
269
- if debug:
270
- print(f"DEBUG ORIENTATION DETECTION:")
271
- print(f" Eye coordinates: Left={left_eye_coord}, Right={right_eye_coord}, Nose={nose_coord}")
272
- print(f" Eye center: {eye_center_x:.1f}, Eye distance: {eye_distance:.1f}")
273
- print(f" Nose to eye center distance: {nose_to_eye_center:.1f}")
274
- print(f" Eye symmetry: {eye_symmetry:.3f} (1.0 = perfect symmetry)")
275
- print(f" Mouth coordinates: Left={left_mouth_coord}, Right={right_mouth_coord}")
276
- print(f" Mouth center: {mouth_center_x:.1f}, Mouth width: {mouth_width:.1f}")
277
- print(f" Nose to mouth center distance: {nose_to_mouth_center:.1f}")
278
- print(f" Mouth symmetry: {mouth_symmetry:.3f} (1.0 = perfect symmetry)")
279
- print(f" Vertical alignment: {vertical_alignment:.3f} (1.0 = perfect alignment)")
280
- print(f" Width ratio: {width_ratio:.3f} (1.0 = perfect balance)")
281
- print(f" Final orientation score: {orientation_score:.3f} (higher = more frontal)")
282
- print(f" Is frontal: {is_frontal}")
283
-
284
- return {
285
- 'orientation_score': orientation_score,
286
- 'is_frontal': is_frontal,
287
- 'eye_symmetry': eye_symmetry,
288
- 'mouth_symmetry': mouth_symmetry,
289
- 'vertical_alignment': vertical_alignment,
290
- 'width_ratio': width_ratio
291
- }
292
-
293
-
294
- def compute_composite_score(confidence, quality, size, obstruction, orientation=None, weights=None, debug=False):
295
- """
296
- Compute composite score from multiple quality factors
297
-
298
- Parameters:
299
- confidence: face detection confidence (0-1)
300
- quality: image quality score (0-1)
301
- size: face size score (0-1)
302
- obstruction: face obstruction score (0-1)
303
- orientation: face orientation score (0-1, higher = more frontal)
304
- weights: dict with weights for each factor
305
- debug: if True, print debugging information
306
-
307
- Returns:
308
- composite score (0-1 range)
309
- """
310
- if weights is None:
311
- weights = {
312
- 'confidence': 0.4, # Face detection confidence is most important
313
- 'quality': 0.2, # Image quality matters
314
- 'size': 0.2, # Appropriate face size
315
- 'obstruction': 0.1, # Less obstruction is better
316
- 'orientation': 0.1 # Frontal faces preferred but not dominating
317
- }
318
-
319
- composite = (
320
- confidence * weights['confidence'] +
321
- quality * weights['quality'] +
322
- size * weights['size'] +
323
- obstruction * weights['obstruction']
324
- )
325
-
326
- # Add orientation score if provided
327
- orientation_contribution = 0.0
328
- if orientation is not None:
329
- orientation_contribution = orientation * weights['orientation']
330
- composite += orientation_contribution
331
-
332
- return min(1.0, max(0.0, composite))
333
 
334
  def get_face_predictions(face, ensemble, data_manager, results):
335
  """
@@ -408,18 +86,16 @@ def image_search_performers(image, data_manager, threshold=0.5, results=3):
408
  })
409
  return response
410
 
411
- def find_faces_in_sprite(image, vtt_file, sort_by_quality=True, debug=True):
412
  """
413
- Find faces in a sprite image using VTT data with intelligent quality ranking
414
 
415
  Parameters:
416
  image: PIL Image object
417
  vtt_file: File object containing VTT data
418
- sort_by_quality: If True, sort results by composite quality score
419
- debug: If True, print debugging information
420
 
421
  Returns:
422
- List of dictionaries with face information sorted by quality
423
  """
424
  with open(vtt_file.name, 'r', encoding='utf-8') as f:
425
  vtt = f.read().encode('utf-8')
@@ -428,65 +104,9 @@ def find_faces_in_sprite(image, vtt_file, sort_by_quality=True, debug=True):
428
  results = []
429
  for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)):
430
  cut_frame = sprite.crop((left, top, left + right, top + bottom))
431
- cut_frame_array = np.asarray(cut_frame)
432
-
433
- # Extract faces with detailed information
434
- faces = extract_faces_mediapipe(cut_frame_array, enforce_detection=False, align=False)
435
  faces = [face for face in faces if face['confidence'] > 0.6]
436
-
437
  if faces:
438
- # Process the highest confidence face from this frame
439
- best_face = max(faces, key=lambda x: x['confidence'])
440
- face_area = best_face['facial_area']
441
- face_size = face_area['w'] * face_area['h']
442
- frame_size = cut_frame_array.shape[0] * cut_frame_array.shape[1]
443
-
444
- # Extract face region for quality assessment
445
- face_x1 = max(0, int(face_area['x']))
446
- face_y1 = max(0, int(face_area['y']))
447
- face_x2 = min(cut_frame_array.shape[1], int(face_area['x'] + face_area['w']))
448
- face_y2 = min(cut_frame_array.shape[0], int(face_area['y'] + face_area['h']))
449
-
450
- face_region = cut_frame_array[face_y1:face_y2, face_x1:face_x2]
451
-
452
- # Skip if face region is too small
453
- if face_region.size == 0:
454
- continue
455
-
456
- # Assess quality metrics
457
- quality_metrics = assess_image_quality(face_region)
458
- obstruction_metrics = detect_face_obstruction(face_region)
459
- orientation_metrics = detect_face_orientation(face_region, debug=debug)
460
- size_score = calculate_relative_face_size(face_size, frame_size)
461
-
462
- # Compute composite score
463
- composite_score = compute_composite_score(
464
- confidence=best_face['confidence'],
465
- quality=quality_metrics['overall'],
466
- size=size_score,
467
- obstruction=obstruction_metrics['obstruction_score'],
468
- orientation=orientation_metrics['orientation_score'],
469
- debug=debug
470
- )
471
-
472
- # Create result data with enhanced metrics
473
- data = {
474
- 'id': str(uuid4()),
475
- 'offset': (left, top, right, bottom),
476
- 'frame': i,
477
- 'time': time_seconds,
478
- 'size': face_size,
479
- 'confidence': best_face['confidence'],
480
- 'quality_metrics': quality_metrics,
481
- 'obstruction_metrics': obstruction_metrics,
482
- 'orientation_metrics': orientation_metrics,
483
- 'size_score': size_score,
484
- 'composite_score': composite_score
485
- }
486
- results.append(data)
487
-
488
- # Sort by composite score (highest first) if requested
489
- if sort_by_quality:
490
- results.sort(key=lambda x: x['composite_score'], reverse=True)
491
-
492
- return results
 
3
  import numpy as np
4
  from uuid import uuid4
5
  from PIL import Image as PILImage
 
 
6
 
7
  from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe
8
  from utils.vtt_parser import parse_vtt_offsets
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def get_face_predictions(face, ensemble, data_manager, results):
13
  """
 
86
  })
87
  return response
88
 
89
+ def find_faces_in_sprite(image, vtt_file):
90
  """
91
+ Find faces in a sprite image using VTT data
92
 
93
  Parameters:
94
  image: PIL Image object
95
  vtt_file: File object containing VTT data
 
 
96
 
97
  Returns:
98
+ List of dictionaries with face information
99
  """
100
  with open(vtt_file.name, 'r', encoding='utf-8') as f:
101
  vtt = f.read().encode('utf-8')
 
104
  results = []
105
  for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)):
106
  cut_frame = sprite.crop((left, top, left + right, top + bottom))
107
+ faces = extract_faces_mediapipe(np.asarray(cut_frame), enforce_detection=False, align=False)
 
 
 
108
  faces = [face for face in faces if face['confidence'] > 0.6]
 
109
  if faces:
110
+ size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h']
111
+ data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size}
112
+ results.append(data)