Spaces:
Sleeping
Sleeping
File size: 3,567 Bytes
a6dbd37 2fb4cf4 d209694 2fb4cf4 a6dbd37 2fb4cf4 d209694 2fb4cf4 d209694 2fb4cf4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | from __future__ import annotations
import os
import time
from pathlib import Path
from urllib.request import urlretrieve
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks.python.vision import FaceLandmarkerOptions, FaceLandmarker, RunningMode
from mediapipe.tasks import python as mp_tasks
_MODEL_URL = (
"https://storage.googleapis.com/mediapipe-models/face_landmarker/"
"face_landmarker/float16/latest/face_landmarker.task"
)
def _ensure_model() -> str:
cache_dir = Path(os.environ.get(
"FOCUSGUARD_CACHE_DIR",
Path.home() / ".cache" / "focusguard",
))
model_path = cache_dir / "face_landmarker.task"
if model_path.exists():
return str(model_path)
cache_dir.mkdir(parents=True, exist_ok=True)
print(f"[FACE_MESH] Downloading model to {model_path}...")
urlretrieve(_MODEL_URL, model_path)
print("[FACE_MESH] Download complete.")
return str(model_path)
class FaceMeshDetector:
LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
LEFT_IRIS_INDICES = [468, 469, 470, 471, 472]
RIGHT_IRIS_INDICES = [473, 474, 475, 476, 477]
def __init__(
self,
max_num_faces: int = 1,
min_detection_confidence: float = 0.5,
min_tracking_confidence: float = 0.5,
):
model_path = _ensure_model()
options = FaceLandmarkerOptions(
base_options=mp_tasks.BaseOptions(
model_asset_path=model_path,
delegate=mp_tasks.BaseOptions.Delegate.CPU,
),
num_faces=max_num_faces,
min_face_detection_confidence=min_detection_confidence,
min_face_presence_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence,
running_mode=RunningMode.VIDEO,
)
self._landmarker = FaceLandmarker.create_from_options(options)
self._t0 = time.monotonic()
self._last_ts = 0
def process(self, bgr_frame: np.ndarray) -> np.ndarray | None:
# BGR in -> (478,3) norm x,y,z or None
rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
ts = max(int((time.monotonic() - self._t0) * 1000), self._last_ts + 1)
self._last_ts = ts
result = self._landmarker.detect_for_video(mp_image, ts)
if not result.face_landmarks:
return None
face = result.face_landmarks[0]
return np.array([(lm.x, lm.y, lm.z) for lm in face], dtype=np.float32)
def get_pixel_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
# norm -> pixel (x,y)
pixel = np.zeros((landmarks.shape[0], 2), dtype=np.int32)
pixel[:, 0] = (landmarks[:, 0] * frame_w).astype(np.int32)
pixel[:, 1] = (landmarks[:, 1] * frame_h).astype(np.int32)
return pixel
def get_3d_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
# norm -> pixel-scale x,y,z (z scaled by width)
pts = np.zeros_like(landmarks)
pts[:, 0] = landmarks[:, 0] * frame_w
pts[:, 1] = landmarks[:, 1] * frame_h
pts[:, 2] = landmarks[:, 2] * frame_w
return pts
def close(self):
self._landmarker.close()
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
|