Spaces:

cc1234
/

stashface

Running

App Files Files

cc1234 commited on Jul 26, 2025

Commit

86e1f1d

1 Parent(s): 943ae68

refactor: remove unused image quality and obstruction detection functions from image_processor.py

Browse files

Files changed (1) hide show

models/image_processor.py +7 -387

models/image_processor.py CHANGED Viewed

@@ -3,333 +3,11 @@ import base64
 import numpy as np
 from uuid import uuid4
 from PIL import Image as PILImage
-import cv2
-import mediapipe as mp
 from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe
 from utils.vtt_parser import parse_vtt_offsets
-def assess_image_quality(image):
-    """
-    Assess image quality based on blur, brightness, and contrast
-    Parameters:
-    image: numpy array of image
-    Returns:
-    dict with quality metrics (all normalized to 0-1 range)
-    """
-    # Convert to grayscale for analysis
-    if len(image.shape) == 3:
-        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-    else:
-        gray = image
-    # Blur detection using Laplacian variance
-    blur_score = cv2.Laplacian(gray, cv2.CV_64F).var()
-    # Normalize blur score (higher is better, typical range 0-2000)
-    blur_normalized = min(blur_score / 1000.0, 1.0)
-    # Brightness assessment (0-255 range)
-    brightness = np.mean(gray)
-    # Normalize brightness (optimal range 50-200, penalize very dark/bright)
-    if brightness < 50:
-        brightness_normalized = brightness / 50.0
-    elif brightness > 200:
-        brightness_normalized = 1.0 - (brightness - 200) / 55.0
-    else:
-        brightness_normalized = 1.0
-    brightness_normalized = max(0.0, min(1.0, brightness_normalized))
-    # Contrast assessment using standard deviation
-    contrast = np.std(gray)
-    # Normalize contrast (higher is better, typical range 0-100)
-    contrast_normalized = min(contrast / 80.0, 1.0)
-    # Overall quality score (weighted average)
-    overall_quality = (blur_normalized * 0.4 + brightness_normalized * 0.3 + contrast_normalized * 0.3)
-    return {
-        'blur': blur_normalized,
-        'brightness': brightness_normalized,
-        'contrast': contrast_normalized,
-        'overall': overall_quality
-    }
-def detect_face_obstruction(image, confidence_threshold=0.5, overlay_path=None):
-    """
-    Detect face obstruction using MediaPipe facial landmarks and optionally export overlay image.
-    Parameters:
-    image: numpy array of face image
-    confidence_threshold: minimum confidence for landmark detection
-    overlay_path: if provided, saves overlay image with landmarks to this path
-    Returns:
-    dict with obstruction metrics
-    """
-    mp_face_mesh = mp.solutions.face_mesh
-    with mp_face_mesh.FaceMesh(
-        static_image_mode=True,
-        max_num_faces=1,
-        refine_landmarks=True,
-        min_detection_confidence=confidence_threshold
-    ) as face_mesh:
-        # Convert RGB to BGR for MediaPipe
-        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-        results = face_mesh.process(image_bgr)
-        if not results.multi_face_landmarks:
-            if overlay_path:
-                cv2.imwrite(overlay_path, image_bgr)
-            return {'obstruction_score': 0.0, 'landmark_visibility': 0.0}
-        landmarks = results.multi_face_landmarks[0]
-        # Key facial landmarks indices for obstruction detection
-        key_landmarks = {
-            'left_eye': [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246],
-            'right_eye': [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398],
-            'nose': [1, 2, 5, 4, 6, 168, 8, 9, 10, 151, 195, 197, 196, 3, 51, 48, 115, 131, 134, 102, 49, 220, 305, 291, 303, 267, 269, 270, 267, 271, 272],
-            'mouth': [61, 84, 17, 314, 405, 320, 307, 375, 321, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308, 78, 191, 80, 81, 82, 13, 312, 311, 310, 415]
-        }
-        total_landmarks = sum(len(indices) for indices in key_landmarks.values())
-        visible_landmarks = 0
-        # Prepare overlay image if needed
-        overlay_img = image_bgr.copy() if overlay_path else None
-        h, w = image_bgr.shape[:2]
-        # Check visibility of key landmarks and draw if overlay requested
-        for region, indices in key_landmarks.items():
-            color = {
-                'left_eye': (0, 255, 0),
-                'right_eye': (0, 255, 255),
-                'nose': (255, 0, 0),
-                'mouth': (255, 0, 255)
-            }.get(region, (255, 255, 255))
-            for idx in indices:
-                if idx < len(landmarks.landmark):
-                    landmark = landmarks.landmark[idx]
-                    if 0 <= landmark.x <= 1 and 0 <= landmark.y <= 1:
-                        visible_landmarks += 1
-                        if overlay_img is not None:
-                            cx, cy = int(landmark.x * w), int(landmark.y * h)
-                            cv2.circle(overlay_img, (cx, cy), 2, color, -1)
-        landmark_visibility = visible_landmarks / total_landmarks
-        obstruction_score = landmark_visibility
-        # Save overlay image if requested
-        if overlay_path and overlay_img is not None:
-            cv2.imwrite(overlay_path, overlay_img)
-        return {
-            'obstruction_score': obstruction_score,
-            'landmark_visibility': landmark_visibility
-        }
-def calculate_relative_face_size(face_area, frame_area):
-    """
-    Calculate relative face size with logarithmic scaling
-    Parameters:
-    face_area: area of detected face in pixels
-    frame_area: total area of frame in pixels
-    Returns:
-    normalized size score (0-1 range)
-    """
-    if frame_area == 0:
-        return 0.0
-    relative_size = face_area / frame_area
-    # Apply logarithmic scaling to prevent huge faces from dominating
-    # Optimal face size is around 5-20% of frame
-    if relative_size < 0.01:  # Very small face
-        size_score = relative_size / 0.01
-    elif relative_size <= 0.20:  # Optimal range
-        size_score = 1.0
-    else:  # Very large face
-        size_score = max(0.1, 1.0 - (relative_size - 0.20) / 0.30)
-    return min(1.0, max(0.0, size_score))
-def detect_face_orientation(image, confidence_threshold=0.5, debug=False):
-    """
-    Detect face orientation to score frontal faces higher
-    Uses MediaPipe facial landmarks to determine face angle
-    Parameters:
-    image: numpy array of face image
-    confidence_threshold: minimum confidence for landmark detection
-    Returns:
-    dict with orientation metrics (higher score = more frontal)
-    """
-    mp_face_mesh = mp.solutions.face_mesh
-    with mp_face_mesh.FaceMesh(
-        static_image_mode=True,
-        max_num_faces=1,
-        refine_landmarks=True,
-        min_detection_confidence=confidence_threshold
-    ) as face_mesh:
-        # Convert RGB to BGR for MediaPipe
-        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-        results = face_mesh.process(image_bgr)
-        if not results.multi_face_landmarks:
-            return {'orientation_score': 0.0, 'is_frontal': False}
-        landmarks = results.multi_face_landmarks[0]
-        # Key landmark indices for orientation detection
-        nose_tip = 1  # nose tip
-        left_eye_inner = 133  # left eye inner corner
-        right_eye_inner = 362  # right eye inner corner
-        left_mouth_corner = 61  # left mouth corner
-        right_mouth_corner = 291  # right mouth corner
-        chin = 18  # chin center
-        # Get landmark coordinates
-        h, w = image_bgr.shape[:2]
-        def get_landmark_coord(idx):
-            if idx < len(landmarks.landmark):
-                lm = landmarks.landmark[idx]
-                return (lm.x * w, lm.y * h)
-            return None
-        nose_coord = get_landmark_coord(nose_tip)
-        left_eye_coord = get_landmark_coord(left_eye_inner)
-        right_eye_coord = get_landmark_coord(right_eye_inner)
-        left_mouth_coord = get_landmark_coord(left_mouth_corner)
-        right_mouth_coord = get_landmark_coord(right_mouth_corner)
-        chin_coord = get_landmark_coord(chin)
-        # Skip if key landmarks are missing
-        if not all([nose_coord, left_eye_coord, right_eye_coord, left_mouth_coord, right_mouth_coord]):
-            if debug:
-                print(f"DEBUG: Missing key landmarks - returning 0.0 orientation score")
-            return {'orientation_score': 0.0, 'is_frontal': False}
-        # Calculate symmetry metrics
-        # 1. Eye distance symmetry - frontal faces have balanced eye distances from nose
-        eye_center_x = (left_eye_coord[0] + right_eye_coord[0]) / 2
-        nose_to_eye_center = abs(nose_coord[0] - eye_center_x)
-        eye_distance = abs(right_eye_coord[0] - left_eye_coord[0])
-        eye_symmetry = 1.0 - min(1.0, nose_to_eye_center / (eye_distance / 2)) if eye_distance > 0 else 0.0
-        # 2. Mouth symmetry - frontal faces have balanced mouth corners from nose
-        mouth_center_x = (left_mouth_coord[0] + right_mouth_coord[0]) / 2
-        nose_to_mouth_center = abs(nose_coord[0] - mouth_center_x)
-        mouth_width = abs(right_mouth_coord[0] - left_mouth_coord[0])
-        mouth_symmetry = 1.0 - min(1.0, nose_to_mouth_center / (mouth_width / 2)) if mouth_width > 0 else 0.0
-        # 3. Vertical alignment - nose should be roughly centered between eyes and mouth
-        if chin_coord:
-            eye_y = (left_eye_coord[1] + right_eye_coord[1]) / 2
-            vertical_center = (eye_y + chin_coord[1]) / 2
-            vertical_alignment = 1.0 - min(1.0, abs(nose_coord[1] - vertical_center) / (abs(chin_coord[1] - eye_y) / 2))
-        else:
-            vertical_alignment = 0.5
-        # 4. Face width ratio - frontal faces show more balanced left/right visibility
-        face_width = abs(right_eye_coord[0] - left_eye_coord[0])
-        left_visibility = abs(nose_coord[0] - left_eye_coord[0])
-        right_visibility = abs(right_eye_coord[0] - nose_coord[0])
-        if face_width > 0:
-            width_ratio = min(left_visibility, right_visibility) / max(left_visibility, right_visibility)
-        else:
-            width_ratio = 0.0
-        # Combine metrics with weights
-        orientation_score = (
-            eye_symmetry * 0.3 +
-            mouth_symmetry * 0.3 +
-            vertical_alignment * 0.2 +
-            width_ratio * 0.2
-        )
-        # Determine if face is frontal (threshold-based)
-        is_frontal = orientation_score > 0.7
-        if debug:
-            print(f"DEBUG ORIENTATION DETECTION:")
-            print(f"  Eye coordinates: Left={left_eye_coord}, Right={right_eye_coord}, Nose={nose_coord}")
-            print(f"  Eye center: {eye_center_x:.1f}, Eye distance: {eye_distance:.1f}")
-            print(f"  Nose to eye center distance: {nose_to_eye_center:.1f}")
-            print(f"  Eye symmetry: {eye_symmetry:.3f} (1.0 = perfect symmetry)")
-            print(f"  Mouth coordinates: Left={left_mouth_coord}, Right={right_mouth_coord}")
-            print(f"  Mouth center: {mouth_center_x:.1f}, Mouth width: {mouth_width:.1f}")
-            print(f"  Nose to mouth center distance: {nose_to_mouth_center:.1f}")
-            print(f"  Mouth symmetry: {mouth_symmetry:.3f} (1.0 = perfect symmetry)")
-            print(f"  Vertical alignment: {vertical_alignment:.3f} (1.0 = perfect alignment)")
-            print(f"  Width ratio: {width_ratio:.3f} (1.0 = perfect balance)")
-            print(f"  Final orientation score: {orientation_score:.3f} (higher = more frontal)")
-            print(f"  Is frontal: {is_frontal}")
-        return {
-            'orientation_score': orientation_score,
-            'is_frontal': is_frontal,
-            'eye_symmetry': eye_symmetry,
-            'mouth_symmetry': mouth_symmetry,
-            'vertical_alignment': vertical_alignment,
-            'width_ratio': width_ratio
-        }
-def compute_composite_score(confidence, quality, size, obstruction, orientation=None, weights=None, debug=False):
-    """
-    Compute composite score from multiple quality factors
-    Parameters:
-    confidence: face detection confidence (0-1)
-    quality: image quality score (0-1)
-    size: face size score (0-1)
-    obstruction: face obstruction score (0-1)
-    orientation: face orientation score (0-1, higher = more frontal)
-    weights: dict with weights for each factor
-    debug: if True, print debugging information
-    Returns:
-    composite score (0-1 range)
-    """
-    if weights is None:
-        weights = {
-            'confidence': 0.4,  # Face detection confidence is most important
-            'quality': 0.2,     # Image quality matters
-            'size': 0.2,        # Appropriate face size
-            'obstruction': 0.1, # Less obstruction is better
-            'orientation': 0.1  # Frontal faces preferred but not dominating
-        }
-    composite = (
-        confidence * weights['confidence'] +
-        quality * weights['quality'] +
-        size * weights['size'] +
-        obstruction * weights['obstruction']
-    )
-    # Add orientation score if provided
-    orientation_contribution = 0.0
-    if orientation is not None:
-        orientation_contribution = orientation * weights['orientation']
-        composite += orientation_contribution
-    return min(1.0, max(0.0, composite))
 def get_face_predictions(face, ensemble, data_manager, results):
     """
@@ -408,18 +86,16 @@ def image_search_performers(image, data_manager, threshold=0.5, results=3):
         })
     return response
-def find_faces_in_sprite(image, vtt_file, sort_by_quality=True, debug=True):
     """
-    Find faces in a sprite image using VTT data with intelligent quality ranking
     Parameters:
     image: PIL Image object
     vtt_file: File object containing VTT data
-    sort_by_quality: If True, sort results by composite quality score
-    debug: If True, print debugging information
     Returns:
-    List of dictionaries with face information sorted by quality
     """
     with open(vtt_file.name, 'r', encoding='utf-8') as f:
         vtt = f.read().encode('utf-8')
@@ -428,65 +104,9 @@ def find_faces_in_sprite(image, vtt_file, sort_by_quality=True, debug=True):
     results = []
     for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)):
         cut_frame = sprite.crop((left, top, left + right, top + bottom))
-        cut_frame_array = np.asarray(cut_frame)
-        # Extract faces with detailed information
-        faces = extract_faces_mediapipe(cut_frame_array, enforce_detection=False, align=False)
         faces = [face for face in faces if face['confidence'] > 0.6]
         if faces:
-            # Process the highest confidence face from this frame
-            best_face = max(faces, key=lambda x: x['confidence'])
-            face_area = best_face['facial_area']
-            face_size = face_area['w'] * face_area['h']
-            frame_size = cut_frame_array.shape[0] * cut_frame_array.shape[1]
-            # Extract face region for quality assessment
-            face_x1 = max(0, int(face_area['x']))
-            face_y1 = max(0, int(face_area['y']))
-            face_x2 = min(cut_frame_array.shape[1], int(face_area['x'] + face_area['w']))
-            face_y2 = min(cut_frame_array.shape[0], int(face_area['y'] + face_area['h']))
-            face_region = cut_frame_array[face_y1:face_y2, face_x1:face_x2]
-            # Skip if face region is too small
-            if face_region.size == 0:
-                continue
-            # Assess quality metrics
-            quality_metrics = assess_image_quality(face_region)
-            obstruction_metrics = detect_face_obstruction(face_region)
-            orientation_metrics = detect_face_orientation(face_region, debug=debug)
-            size_score = calculate_relative_face_size(face_size, frame_size)
-            # Compute composite score
-            composite_score = compute_composite_score(
-                confidence=best_face['confidence'],
-                quality=quality_metrics['overall'],
-                size=size_score,
-                obstruction=obstruction_metrics['obstruction_score'],
-                orientation=orientation_metrics['orientation_score'],
-                debug=debug
-            )
-            # Create result data with enhanced metrics
-            data = {
-                'id': str(uuid4()),
-                'offset': (left, top, right, bottom),
-                'frame': i,
-                'time': time_seconds,
-                'size': face_size,
-                'confidence': best_face['confidence'],
-                'quality_metrics': quality_metrics,
-                'obstruction_metrics': obstruction_metrics,
-                'orientation_metrics': orientation_metrics,
-                'size_score': size_score,
-                'composite_score': composite_score
-            }
-            results.append(data)
-    # Sort by composite score (highest first) if requested
-    if sort_by_quality:
-        results.sort(key=lambda x: x['composite_score'], reverse=True)
-    return results

 import numpy as np
 from uuid import uuid4
 from PIL import Image as PILImage
 from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe
 from utils.vtt_parser import parse_vtt_offsets
 def get_face_predictions(face, ensemble, data_manager, results):
     """
         })
     return response
+def find_faces_in_sprite(image, vtt_file):
     """
+    Find faces in a sprite image using VTT data
     Parameters:
     image: PIL Image object
     vtt_file: File object containing VTT data
     Returns:
+    List of dictionaries with face information
     """
     with open(vtt_file.name, 'r', encoding='utf-8') as f:
         vtt = f.read().encode('utf-8')
     results = []
     for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)):
         cut_frame = sprite.crop((left, top, left + right, top + bottom))
+        faces = extract_faces_mediapipe(np.asarray(cut_frame), enforce_detection=False, align=False)
         faces = [face for face in faces if face['confidence'] > 0.6]
         if faces:
+            size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h']
+            data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size}
+            results.append(data)