File size: 2,767 Bytes
af35098
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import math
import numpy as np
from PIL import Image
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

class FaceExtractor:
    def __init__(self, model_path: str = "assets/blaze_face_short_range.tflite"):
        self.model_path = model_path
        base_options = python.BaseOptions(model_asset_path=self.model_path)
        options = vision.FaceDetectorOptions(
            base_options=base_options, 
            running_mode=vision.RunningMode.IMAGE,
            min_detection_confidence=0.70
        )
        self.detector = vision.FaceDetector.create_from_options(options)
        self.offset_percentage = 0.30

    def extract_main_face(self, pil_image: Image.Image) -> Image.Image:
        """
        Detects faces in the given PIL Image, scores them to find the main face,
        and returns the cropped main face. Returns None if no face is detected.
        """
        # Convert PIL Image to numpy array (RGB)
        frame = np.array(pil_image)
        
        img_h, img_w, _ = frame.shape
        frame_cx, frame_cy = img_w / 2, img_h / 2
        
        # Mediapipe requires the image to be in ImageFormat.SRGB
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        results = self.detector.detect(mp_image)
        
        if not results.detections:
            return None
            
        best_face_bbox = None
        highest_score = -float('inf')
        
        for detection in results.detections:
            bbox = detection.bounding_box
            confidence = detection.categories[0].score
            x, y, w, h = bbox.origin_x, bbox.origin_y, bbox.width, bbox.height
            face_cx, face_cy = x + (w / 2), y + (h / 2)
            
            area = w * h
            distance_to_center = math.sqrt((frame_cx - face_cx)**2 + (frame_cy - face_cy)**2)
            score = (area * confidence) - (distance_to_center * 50) 
            
            if score > highest_score:
                highest_score = score
                best_face_bbox = (x, y, w, h)
                
        if not best_face_bbox:
            return None
            
        # Crop with offset
        x, y, w, h = best_face_bbox
        offset_w = int(w * self.offset_percentage)
        offset_h = int(h * self.offset_percentage)
        
        new_x = max(0, x - offset_w)
        new_y = max(0, y - offset_h)
        new_w = min(img_w - new_x, w + (2 * offset_w))
        new_h = min(img_h - new_y, h + (2 * offset_h))
        
        cropped_face_np = frame[new_y:new_y+new_h, new_x:new_x+new_w]
        
        # Convert back to PIL Image
        if cropped_face_np.size > 0:
            return Image.fromarray(cropped_face_np)
            
        return None