File size: 7,870 Bytes
3219a41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
from __future__ import annotations

from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union

import cv2
import mediapipe as mp
import numpy as np
import pickle
import argparse

# Paths to model files (assumed to be in the same directory as this script)
PROJECT_DIR = Path(__file__).parent
MODEL_FILE = PROJECT_DIR / 'face_shape_model.pkl'
LABEL_ENCODER_FILE = PROJECT_DIR / 'label_encoder.pkl'

Keypoint = Dict[str, float]
NormalizedLandmark = Tuple[float, float, float]


def normalize_landmarks(keypoints: Sequence[Keypoint], width: int, height: int) -> List[NormalizedLandmark]:
    """
    Normalize keypoints to be centered, roll-corrected, and scaled.
    Retains 3D coordinates (Z) but aligns to the 2D plane based on eyes.
    
    Returns list of tuples: [(x, y, z), ...]
    
    (Copied from create.py to ensure consistent preprocessing)
    """
    if not keypoints:
        return []

    # Convert to numpy array (N, 3)
    landmarks = np.array([[kp["x"], kp["y"], kp["z"]] for kp in keypoints])
    
    # Denormalize x, y, z to pixel/aspect-correct coordinates
    # MediaPipe Z is roughly same scale as X (relative to image width)
    landmarks[:, 0] *= width
    landmarks[:, 1] *= height
    landmarks[:, 2] *= width 

    # Indices for irises (refine_landmarks=True gives 478 points)
    # 468: Left Iris Center (Subject's Left, Image Right)
    # 473: Right Iris Center (Subject's Right, Image Left)
    left_iris_idx = 468
    right_iris_idx = 473

    if len(landmarks) > right_iris_idx:
        left_iris = landmarks[left_iris_idx]
        right_iris = landmarks[right_iris_idx]
    else:
        # Fallback to eye corners if iris landmarks missing
        p1 = landmarks[33]  # Left eye outer
        p2 = landmarks[133] # Left eye inner
        left_iris = (p1 + p2) / 2
        p3 = landmarks[362] # Right eye inner
        p4 = landmarks[263] # Right eye outer
        right_iris = (p3 + p4) / 2

    # 1. Centering: Move midpoint of eyes to origin
    eye_center = (left_iris + right_iris) / 2.0
    landmarks -= eye_center

    # 2. Rotation (Roll Correction)
    delta = left_iris - right_iris
    dX, dY = delta[0], delta[1]
    
    # Calculate angle of this vector relative to horizontal
    angle = np.arctan2(dY, dX)
    
    # Rotate by -angle to align with X-axis
    c, s = np.cos(-angle), np.sin(-angle)
    
    # Rotation matrix around Z axis
    R = np.array([
        [c, -s, 0],
        [s, c, 0],
        [0, 0, 1]
    ])
    
    landmarks = landmarks.dot(R.T)

    # 3. Scaling: Scale such that inter-ocular distance is 1.0
    dist = np.sqrt(dX**2 + dY**2)
    if dist > 0:
        scale = 1.0 / dist
        landmarks *= scale

    # Convert to list of tuples
    return [(round(float(l[0]), 5), round(float(l[1]), 5), round(float(l[2]), 5)) 
            for l in landmarks]


def create_face_mesh(image_path: Union[str, Path]) -> Tuple[Optional[List[Keypoint]], Optional[np.ndarray]]:
    """
    Process image to get face mesh data using MediaPipe
    Returns: keypoints, img_bgr or None if failed
    
    (Copied from create.py to ensure consistent preprocessing)
    """
    max_width_or_height = 512

    mp_face_mesh = mp.solutions.face_mesh
    
    # Initialize face mesh
    with mp_face_mesh.FaceMesh(
            static_image_mode=True,
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.5) as face_mesh:
        
        # Read image from file
        img_bgr = cv2.imread(str(image_path))
        
        if img_bgr is None:
            print(f"Error: Could not read image: {image_path}")
            return None, None

        # Downscale large images to speed up inference (keep aspect ratio)
        h, w = img_bgr.shape[:2]
        longest = max(h, w)
        if longest > max_width_or_height:
            scale = max_width_or_height / float(longest)
            new_w = max(1, int(round(w * scale)))
            new_h = max(1, int(round(h * scale)))
            img_bgr = cv2.resize(img_bgr, (new_w, new_h), interpolation=cv2.INTER_AREA)
        
        # Convert BGR to RGB for MediaPipe processing
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        
        # Process the image
        results = face_mesh.process(img_rgb)
        
        if not results.multi_face_landmarks:
            print(f"Error: No face detected in: {image_path}")
            return None, None

        keypoints = []
        for landmark in results.multi_face_landmarks[0].landmark:
            keypoints.append({
                "x": round(landmark.x, 5),
                "y": round(landmark.y, 5),
                "z": round(landmark.z, 5)
            })
        return keypoints, img_bgr


def load_model_resources() -> Tuple[Any, Any]:
    """Load the trained model and label encoder."""
    if not MODEL_FILE.exists():
        raise FileNotFoundError(f"Model file not found at {MODEL_FILE}. Please run create_model.py first.")
    
    if not LABEL_ENCODER_FILE.exists():
        raise FileNotFoundError(f"Label encoder file not found at {LABEL_ENCODER_FILE}. Please run create_model.py first.")

    print(f"Loading model from {MODEL_FILE}...")
    with open(MODEL_FILE, 'rb') as f:
        model = pickle.load(f)
        
    print(f"Loading label encoder from {LABEL_ENCODER_FILE}...")
    with open(LABEL_ENCODER_FILE, 'rb') as f:
        label_encoder = pickle.load(f)
        
    return model, label_encoder


def predict_face_shape(image_path: Union[str, Path]) -> Optional[str]:
    """
    Main function to predict face shape for a given image.
    """
    # 1. Load Model
    try:
        model, label_encoder = load_model_resources()
    except Exception as e:
        print(f"Failed to load model resources: {e}")
        return None

    # 2. Process Image (Extract Landmarks)
    print(f"Processing image: {image_path}")
    keypoints, img_bgr = create_face_mesh(image_path)
    
    if keypoints is None:
        print("Could not extract landmarks. Exiting.")
        return None

    # 3. Normalize Landmarks
    h, w = img_bgr.shape[:2]
    normalized_kpts = normalize_landmarks(keypoints, w, h)
    
    # 4. Prepare Features (Flatten and drop Z)
    # The model expects a flattened array of [x1, y1, x2, y2, ...]
    flattened_features: List[float] = []
    for kp in normalized_kpts:
        flattened_features.extend([kp[0], kp[1]])  # x, y only
    
    # Reshape for sklearn (1 sample, N features)
    features_array = np.array([flattened_features])
    
    # 5. Predict
    print("Running prediction...")
    # Get probabilities
    probas = model.predict_proba(features_array)[0]
    # Get prediction
    prediction_idx = model.predict(features_array)[0]
    predicted_label = label_encoder.inverse_transform([prediction_idx])[0]
    
    # 6. Show Results
    print("\n" + "="*30)
    print(f"PREDICTED FACE SHAPE: {predicted_label.upper()}")
    print("="*30)
    
    print("\nConfidence Scores:")
    # Sort probabilities
    class_indices = np.argsort(probas)[::-1]
    for i in class_indices:
        class_name = label_encoder.classes_[i]
        score = probas[i]
        print(f"  {class_name}: {score:.4f}")

    return predicted_label


def parse_args(argv: Optional[Sequence[str]] = None) -> argparse.Namespace:
    """
    Parse command-line arguments.

    Note: Default behavior remains to run against `sample_image.jpg` when no args are provided.
    """
    parser = argparse.ArgumentParser(description="Predict face shape from an image using a trained sklearn model.")
    parser.add_argument(
        "image",
        nargs="?",
        default="sample_image.jpg",
        help="Path to the input image (default: sample_image.jpg).",
    )
    return parser.parse_args(argv)


if __name__ == "__main__":
    args = parse_args()
    predict_face_shape(args.image)