Spaces:

Uzbekswe
/

FaceCheck

Sleeping

File size: 7,721 Bytes

9b5157d

"""
Face detection + 478-landmark extraction + head pose estimation.
Uses MediaPipe FaceLandmarker Tasks API (pretrained, no training needed).
Head pose via PnP (Perspective-n-Point) solving with OpenCV.
"""

import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python as mp_python
from mediapipe.tasks.python import vision as mp_vision
from mediapipe.tasks.python.components.containers import NormalizedLandmark
from dataclasses import dataclass
from typing import Optional, Tuple
import logging
import os
import traceback
import urllib.request

logger = logging.getLogger(__name__)

# Model path
_MODEL_PATH = os.path.join(os.path.dirname(__file__), "../../models/face_landmarker.task")
_MODEL_URL = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"

def _ensure_model():
    if not os.path.exists(_MODEL_PATH):
        os.makedirs(os.path.dirname(_MODEL_PATH), exist_ok=True)
        logger.info("Downloading face_landmarker.task model...")
        urllib.request.urlretrieve(_MODEL_URL, _MODEL_PATH)
        logger.info("Model downloaded successfully.")

# 3D reference face model points (canonical face geometry)
FACE_3D_POINTS = np.array([
    [0.0,    0.0,    0.0],    # nose tip (landmark 1)
    [0.0,   -330.0, -65.0],   # chin (landmark 152)
    [-225.0, 170.0, -135.0],  # left eye corner (landmark 263)
    [225.0,  170.0, -135.0],  # right eye corner (landmark 33)
    [-150.0, -150.0, -125.0], # left mouth corner (landmark 287)
    [150.0,  -150.0, -125.0], # right mouth corner (landmark 57)
], dtype=np.float64)

# Corresponding MediaPipe landmark indices
FACE_LANDMARK_INDICES = [1, 152, 263, 33, 287, 57]

# Smile detection landmarks
UPPER_LIP_IDX = 13
LOWER_LIP_IDX = 14
LEFT_MOUTH_IDX = 61
RIGHT_MOUTH_IDX = 291
LEFT_CHEEK_IDX = 116
RIGHT_CHEEK_IDX = 345


@dataclass
class FaceAnalysis:
    face_detected: bool
    yaw: float = 0.0        # left(-) / right(+)
    pitch: float = 0.0      # up(-) / down(+)
    roll: float = 0.0
    smile_score: float = 0.0
    landmarks: Optional[np.ndarray] = None
    face_bbox: Optional[Tuple[int, int, int, int]] = None  # x,y,w,h


class FaceAnalysisService:
    def __init__(self):
        self._detector = None
        self._loaded = False

    def load(self):
        """Lazy-load MediaPipe FaceLandmarker."""
        if not self._loaded:
            _ensure_model()
            model_path = os.path.abspath(_MODEL_PATH)
            base_options = mp_python.BaseOptions(model_asset_path=model_path)
            options = mp_vision.FaceLandmarkerOptions(
                base_options=base_options,
                running_mode=mp_vision.RunningMode.IMAGE,
                num_faces=1,
                min_face_detection_confidence=0.3,
                min_face_presence_confidence=0.3,
                min_tracking_confidence=0.3,
                output_face_blendshapes=False,
                output_facial_transformation_matrixes=False,
            )
            self._detector = mp_vision.FaceLandmarker.create_from_options(options)
            self._loaded = True
            logger.info("MediaPipe FaceLandmarker loaded")
        return self

    def analyze(self, image_bgr: np.ndarray) -> FaceAnalysis:
        """
        Run full face analysis pipeline on a BGR image.
        Returns FaceAnalysis with pose angles and smile score.
        """
        try:
            if not self._loaded:
                self.load()

            h, w = image_bgr.shape[:2]
            image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
            image_rgb = np.ascontiguousarray(image_rgb)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_rgb)

            result = self._detector.detect(mp_image)

            if not result.face_landmarks:
                return FaceAnalysis(face_detected=False)

            face_landmarks = result.face_landmarks[0]

            # Convert normalized landmarks → pixel coords
            lm_array = np.array([
                [lm.x * w, lm.y * h, lm.z]
                for lm in face_landmarks
            ], dtype=np.float64)

            # --- Head Pose via PnP ---
            yaw, pitch, roll = self._estimate_pose(lm_array, w, h)

            # --- Smile score ---
            smile_score = self._compute_smile_score(lm_array, w, h)

            # --- Face bounding box ---
            xs = lm_array[:, 0]
            ys = lm_array[:, 1]
            x1, y1 = int(xs.min()), int(ys.min())
            x2, y2 = int(xs.max()), int(ys.max())
            bbox = (x1, y1, x2 - x1, y2 - y1)

            return FaceAnalysis(
                face_detected=True,
                yaw=yaw,
                pitch=pitch,
                roll=roll,
                smile_score=smile_score,
                landmarks=lm_array,
                face_bbox=bbox,
            )
        except Exception as e:
            logger.error(f"Face analysis failed: {e}\n{traceback.format_exc()}")
            return FaceAnalysis(face_detected=False)

    def _estimate_pose(self, lm_array: np.ndarray, img_w: int, img_h: int):
        """
        Solve PnP to get rotation angles.
        Uses 6 stable landmark points mapped to 3D canonical model.
        """
        image_points = np.array([
            lm_array[idx, :2] for idx in FACE_LANDMARK_INDICES
        ], dtype=np.float64)

        focal_length = img_w
        center = (img_w / 2, img_h / 2)
        camera_matrix = np.array([
            [focal_length, 0,            center[0]],
            [0,            focal_length, center[1]],
            [0,            0,            1         ]
        ], dtype=np.float64)

        dist_coeffs = np.zeros((4, 1))

        success, rotation_vec, _ = cv2.solvePnP(
            FACE_3D_POINTS,
            image_points,
            camera_matrix,
            dist_coeffs,
            flags=cv2.SOLVEPNP_ITERATIVE,
        )

        if not success:
            return 0.0, 0.0, 0.0

        rotation_mat, _ = cv2.Rodrigues(rotation_vec)
        proj_matrix = np.hstack([rotation_mat, np.zeros((3, 1))])
        _, _, _, _, _, _, euler_angles = cv2.decomposeProjectionMatrix(proj_matrix)

        pitch = float(euler_angles[0])
        yaw   = float(euler_angles[1])
        roll  = float(euler_angles[2])

        return yaw, pitch, roll

    def _compute_smile_score(self, lm_array: np.ndarray, img_w: int, img_h: int) -> float:
        """
        Smile detection using mouth aspect ratio (MAR).
        """
        try:
            upper_lip = lm_array[UPPER_LIP_IDX, :2]
            lower_lip = lm_array[LOWER_LIP_IDX, :2]
            left_mouth = lm_array[LEFT_MOUTH_IDX, :2]
            right_mouth = lm_array[RIGHT_MOUTH_IDX, :2]
            left_cheek = lm_array[LEFT_CHEEK_IDX, :2]
            right_cheek = lm_array[RIGHT_CHEEK_IDX, :2]

            mouth_height = np.linalg.norm(lower_lip - upper_lip)
            mouth_width = np.linalg.norm(right_mouth - left_mouth)
            face_height = np.linalg.norm(right_cheek - left_cheek)

            if mouth_width < 1e-6 or face_height < 1e-6:
                return 0.0

            mar = mouth_height / mouth_width
            mouth_center_y = (upper_lip[1] + lower_lip[1]) / 2
            left_corner_elevation = mouth_center_y - left_mouth[1]
            right_corner_elevation = mouth_center_y - right_mouth[1]
            corner_score = (left_corner_elevation + right_corner_elevation) / (2 * face_height)

            smile_score = (mar * 2.0) + (corner_score * 3.0)
            return float(np.clip(smile_score, 0.0, 1.0))
        except Exception:
            return 0.0


# Module-level singleton
face_analysis_service = FaceAnalysisService()