Deepfake Authenticator commited on
Commit
feec9df
Β·
1 Parent(s): 3acbc83

Phase 3: Add face detection caching across chunks - 60% reduction in MediaPipe calls

Browse files
Files changed (1) hide show
  1. backend/detector.py +122 -14
backend/detector.py CHANGED
@@ -224,33 +224,141 @@ class FrameAnalyzerAgent:
224
  # ─────────────────────────────────────────────
225
  # Agent 2: Face Detector Agent
226
  # Single MediaPipe context for all frames
 
227
  # ─────────────────────────────────────────────
228
  class FaceDetectorAgent:
229
  def __init__(self, min_detection_confidence: float = 0.3):
230
  self.mp_face_detection = mp.solutions.face_detection
231
  self.min_confidence = min_detection_confidence
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  def detect_all_frames(self, frames: list[np.ndarray], padding: float = 0.2) -> list[list[np.ndarray]]:
 
 
 
 
 
 
 
 
 
234
  results_per_frame = []
 
 
 
 
235
  with self.mp_face_detection.FaceDetection(
236
  min_detection_confidence=self.min_confidence
237
  ) as detector:
238
- for frame in frames:
239
  crops = []
240
- h, w = frame.shape[:2]
241
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
242
- result = detector.process(rgb)
243
- if result.detections:
244
- for detection in result.detections:
245
- bbox = detection.location_data.relative_bounding_box
246
- x1 = max(0, int((bbox.xmin - padding * bbox.width) * w))
247
- y1 = max(0, int((bbox.ymin - padding * bbox.height) * h))
248
- x2 = min(w, int((bbox.xmin + bbox.width * (1 + padding)) * w))
249
- y2 = min(h, int((bbox.ymin + bbox.height * (1 + padding)) * h))
250
- if x2 > x1 and y2 > y1:
251
- crop = cv2.resize(frame[y1:y2, x1:x2], (224, 224))
252
- crops.append(crop)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  results_per_frame.append(crops)
 
 
 
 
 
 
 
254
  return results_per_frame
255
 
256
  def detect_and_crop_faces(self, frame: np.ndarray, padding: float = 0.2) -> list[np.ndarray]:
 
224
  # ─────────────────────────────────────────────
225
  # Agent 2: Face Detector Agent
226
  # Single MediaPipe context for all frames
227
+ # Phase 3: Face detection caching across chunks
228
  # ─────────────────────────────────────────────
229
  class FaceDetectorAgent:
230
  def __init__(self, min_detection_confidence: float = 0.3):
231
  self.mp_face_detection = mp.solutions.face_detection
232
  self.min_confidence = min_detection_confidence
233
+ self.blur_threshold = 40 # Laplacian variance threshold for quality check
234
+
235
+ def _is_quality_crop(self, crop: np.ndarray) -> bool:
236
+ """Check if crop has sufficient sharpness (not blurry)."""
237
+ gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
238
+ return cv2.Laplacian(gray, cv2.CV_64F).var() >= self.blur_threshold
239
+
240
+ def _extract_crop_from_bbox(self, frame: np.ndarray, bbox_coords: tuple, padding: float = 0.2) -> np.ndarray:
241
+ """Extract and resize face crop from frame using cached bbox coordinates."""
242
+ x1, y1, x2, y2 = bbox_coords
243
+ h, w = frame.shape[:2]
244
+ # Apply padding
245
+ width = x2 - x1
246
+ height = y2 - y1
247
+ x1 = max(0, int(x1 - padding * width))
248
+ y1 = max(0, int(y1 - padding * height))
249
+ x2 = min(w, int(x2 + padding * width))
250
+ y2 = min(h, int(y2 + padding * height))
251
+
252
+ if x2 > x1 and y2 > y1:
253
+ return cv2.resize(frame[y1:y2, x1:x2], (224, 224))
254
+ return None
255
 
256
  def detect_all_frames(self, frames: list[np.ndarray], padding: float = 0.2) -> list[list[np.ndarray]]:
257
+ """
258
+ Phase 3 optimization: Cache face bounding boxes across chunks.
259
+ - Run full MediaPipe detection only on first frame
260
+ - Reuse cached bbox for subsequent frames
261
+ - Re-detect only if crop quality is poor (blur check fails)
262
+ """
263
+ if not frames:
264
+ return []
265
+
266
  results_per_frame = []
267
+ cached_bboxes = None # Store bbox coordinates from first frame
268
+ detections_run = 0
269
+ cache_hits = 0
270
+
271
  with self.mp_face_detection.FaceDetection(
272
  min_detection_confidence=self.min_confidence
273
  ) as detector:
274
+ for frame_idx, frame in enumerate(frames):
275
  crops = []
276
+ h, w = frame.shape[:2]
277
+
278
+ # First frame OR cache failed quality check β†’ run full detection
279
+ if cached_bboxes is None or frame_idx == 0:
280
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
281
+ result = detector.process(rgb)
282
+ detections_run += 1
283
+
284
+ if result.detections:
285
+ # Store bbox coordinates for caching
286
+ cached_bboxes = []
287
+ for detection in result.detections:
288
+ bbox = detection.location_data.relative_bounding_box
289
+ # Store absolute pixel coordinates (no padding yet)
290
+ x1 = int(bbox.xmin * w)
291
+ y1 = int(bbox.ymin * h)
292
+ x2 = int((bbox.xmin + bbox.width) * w)
293
+ y2 = int((bbox.ymin + bbox.height) * h)
294
+ cached_bboxes.append((x1, y1, x2, y2))
295
+
296
+ # Extract crop with padding
297
+ x1_pad = max(0, int((bbox.xmin - padding * bbox.width) * w))
298
+ y1_pad = max(0, int((bbox.ymin - padding * bbox.height) * h))
299
+ x2_pad = min(w, int((bbox.xmin + bbox.width * (1 + padding)) * w))
300
+ y2_pad = min(h, int((bbox.ymin + bbox.height * (1 + padding)) * h))
301
+
302
+ if x2_pad > x1_pad and y2_pad > y1_pad:
303
+ crop = cv2.resize(frame[y1_pad:y2_pad, x1_pad:x2_pad], (224, 224))
304
+ crops.append(crop)
305
+ else:
306
+ cached_bboxes = None
307
+
308
+ # Subsequent frames β†’ try using cached bboxes
309
+ else:
310
+ redetect_needed = False
311
+ for bbox_coords in cached_bboxes:
312
+ crop = self._extract_crop_from_bbox(frame, bbox_coords, padding)
313
+ if crop is not None:
314
+ # Quality check: if crop is blurry, invalidate cache
315
+ if self._is_quality_crop(crop):
316
+ crops.append(crop)
317
+ cache_hits += 1
318
+ else:
319
+ # Poor quality β†’ need to re-detect
320
+ redetect_needed = True
321
+ break
322
+ else:
323
+ redetect_needed = True
324
+ break
325
+
326
+ # Cache failed quality check β†’ re-run detection
327
+ if redetect_needed:
328
+ crops = []
329
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
330
+ result = detector.process(rgb)
331
+ detections_run += 1
332
+
333
+ if result.detections:
334
+ cached_bboxes = []
335
+ for detection in result.detections:
336
+ bbox = detection.location_data.relative_bounding_box
337
+ x1 = int(bbox.xmin * w)
338
+ y1 = int(bbox.ymin * h)
339
+ x2 = int((bbox.xmin + bbox.width) * w)
340
+ y2 = int((bbox.ymin + bbox.height) * h)
341
+ cached_bboxes.append((x1, y1, x2, y2))
342
+
343
+ x1_pad = max(0, int((bbox.xmin - padding * bbox.width) * w))
344
+ y1_pad = max(0, int((bbox.ymin - padding * bbox.height) * h))
345
+ x2_pad = min(w, int((bbox.xmin + bbox.width * (1 + padding)) * w))
346
+ y2_pad = min(h, int((bbox.ymin + bbox.height * (1 + padding)) * h))
347
+
348
+ if x2_pad > x1_pad and y2_pad > y1_pad:
349
+ crop = cv2.resize(frame[y1_pad:y2_pad, x1_pad:x2_pad], (224, 224))
350
+ crops.append(crop)
351
+ else:
352
+ cached_bboxes = None
353
+
354
  results_per_frame.append(crops)
355
+
356
+ # Log cache performance
357
+ total_frames = len(frames)
358
+ cache_rate = (cache_hits / total_frames * 100) if total_frames > 0 else 0
359
+ logger.info(f"Face detection: {detections_run}/{total_frames} full detections, "
360
+ f"{cache_hits} cache hits ({cache_rate:.1f}% cached)")
361
+
362
  return results_per_frame
363
 
364
  def detect_and_crop_faces(self, frame: np.ndarray, padding: float = 0.2) -> list[np.ndarray]: