Spaces:

FocusGuard
/

IntegrationTest

Sleeping

File size: 12,656 Bytes

8bbb872

import argparse
import os
import sys
import time

import cv2
import numpy as np
from mediapipe.tasks.python.vision import FaceLandmarksConnections

_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

from ui.pipeline import (
    FaceMeshPipeline, MLPPipeline, HybridFocusPipeline,
    XGBoostPipeline, _latest_model_artifacts,
)
from models.face_mesh import FaceMeshDetector

FONT = cv2.FONT_HERSHEY_SIMPLEX
CYAN = (255, 255, 0)
GREEN = (0, 255, 0)
MAGENTA = (255, 0, 255)
ORANGE = (0, 165, 255)
RED = (0, 0, 255)
WHITE = (255, 255, 255)
YELLOW = (0, 255, 255)
LIGHT_GREEN = (144, 238, 144)

_TESSELATION = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION]
_CONTOURS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS]
_LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
_RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
_NOSE_BRIDGE = [6, 197, 195, 5, 4, 1, 19, 94, 2]
_LIPS_OUTER = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 409, 270, 269, 267, 0, 37, 39, 40, 185, 61]
_LIPS_INNER = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78]
_LEFT_EAR_POINTS = [33, 160, 158, 133, 153, 145]
_RIGHT_EAR_POINTS = [362, 385, 387, 263, 373, 380]

MESH_FULL = 0
MESH_CONTOURS = 1
MESH_OFF = 2
_MESH_NAMES = ["FULL MESH", "CONTOURS", "MESH OFF"]

MODE_GEO = 0
MODE_MLP = 1
MODE_HYBRID = 2
MODE_XGB = 3
_MODE_NAMES = {MODE_GEO: "GEOMETRIC", MODE_MLP: "MLP", MODE_HYBRID: "HYBRID", MODE_XGB: "XGBOOST"}
_MODE_KEYS = {ord("1"): MODE_GEO, ord("2"): MODE_MLP, ord("3"): MODE_HYBRID, ord("4"): MODE_XGB}


def _lm_to_px(landmarks, idx, w, h):
    return (int(landmarks[idx, 0] * w), int(landmarks[idx, 1] * h))


def draw_tessellation(frame, landmarks, w, h):
    overlay = frame.copy()
    for conn in _TESSELATION:
        pt1 = _lm_to_px(landmarks, conn[0], w, h)
        pt2 = _lm_to_px(landmarks, conn[1], w, h)
        cv2.line(overlay, pt1, pt2, (200, 200, 200), 1, cv2.LINE_AA)
    cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)


def draw_contours(frame, landmarks, w, h):
    for conn in _CONTOURS:
        pt1 = _lm_to_px(landmarks, conn[0], w, h)
        pt2 = _lm_to_px(landmarks, conn[1], w, h)
        cv2.line(frame, pt1, pt2, CYAN, 1, cv2.LINE_AA)
    for indices in [_LEFT_EYEBROW, _RIGHT_EYEBROW]:
        for i in range(len(indices) - 1):
            pt1 = _lm_to_px(landmarks, indices[i], w, h)
            pt2 = _lm_to_px(landmarks, indices[i + 1], w, h)
            cv2.line(frame, pt1, pt2, LIGHT_GREEN, 2, cv2.LINE_AA)
    for i in range(len(_NOSE_BRIDGE) - 1):
        pt1 = _lm_to_px(landmarks, _NOSE_BRIDGE[i], w, h)
        pt2 = _lm_to_px(landmarks, _NOSE_BRIDGE[i + 1], w, h)
        cv2.line(frame, pt1, pt2, ORANGE, 1, cv2.LINE_AA)
    for i in range(len(_LIPS_OUTER) - 1):
        pt1 = _lm_to_px(landmarks, _LIPS_OUTER[i], w, h)
        pt2 = _lm_to_px(landmarks, _LIPS_OUTER[i + 1], w, h)
        cv2.line(frame, pt1, pt2, MAGENTA, 1, cv2.LINE_AA)
    for i in range(len(_LIPS_INNER) - 1):
        pt1 = _lm_to_px(landmarks, _LIPS_INNER[i], w, h)
        pt2 = _lm_to_px(landmarks, _LIPS_INNER[i + 1], w, h)
        cv2.line(frame, pt1, pt2, (200, 0, 200), 1, cv2.LINE_AA)


def draw_eyes_and_irises(frame, landmarks, w, h):
    left_pts = np.array(
        [_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.LEFT_EYE_INDICES],
        dtype=np.int32,
    )
    cv2.polylines(frame, [left_pts], True, GREEN, 2, cv2.LINE_AA)
    right_pts = np.array(
        [_lm_to_px(landmarks, i, w, h) for i in FaceMeshDetector.RIGHT_EYE_INDICES],
        dtype=np.int32,
    )
    cv2.polylines(frame, [right_pts], True, GREEN, 2, cv2.LINE_AA)
    for indices in [_LEFT_EAR_POINTS, _RIGHT_EAR_POINTS]:
        for idx in indices:
            pt = _lm_to_px(landmarks, idx, w, h)
            cv2.circle(frame, pt, 3, YELLOW, -1, cv2.LINE_AA)
    for iris_indices, eye_inner, eye_outer in [
        (FaceMeshDetector.LEFT_IRIS_INDICES, 133, 33),
        (FaceMeshDetector.RIGHT_IRIS_INDICES, 362, 263),
    ]:
        iris_pts = np.array(
            [_lm_to_px(landmarks, i, w, h) for i in iris_indices],
            dtype=np.int32,
        )
        center = iris_pts[0]
        if len(iris_pts) >= 5:
            radii = [np.linalg.norm(iris_pts[j] - center) for j in range(1, 5)]
            radius = max(int(np.mean(radii)), 2)
            cv2.circle(frame, tuple(center), radius, MAGENTA, 2, cv2.LINE_AA)
            cv2.circle(frame, tuple(center), 2, WHITE, -1, cv2.LINE_AA)
        eye_center_x = (landmarks[eye_inner, 0] + landmarks[eye_outer, 0]) / 2.0
        eye_center_y = (landmarks[eye_inner, 1] + landmarks[eye_outer, 1]) / 2.0
        eye_center = (int(eye_center_x * w), int(eye_center_y * h))
        dx = center[0] - eye_center[0]
        dy = center[1] - eye_center[1]
        gaze_end = (int(center[0] + dx * 3), int(center[1] + dy * 3))
        cv2.line(frame, tuple(center), gaze_end, RED, 1, cv2.LINE_AA)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--camera", type=int, default=0)
    parser.add_argument("--mlp-dir", type=str, default=None)
    parser.add_argument("--max-angle", type=float, default=22.0)
    parser.add_argument("--eye-model", type=str, default=None)
    parser.add_argument("--eye-backend", type=str, default="yolo", choices=["yolo", "geometric"])
    parser.add_argument("--eye-blend", type=float, default=0.5)
    parser.add_argument("--xgb-path", type=str, default=None)
    parser.add_argument("--xgb", action="store_true", help="Start in XGBoost mode")
    args = parser.parse_args()

    model_dir = args.mlp_dir or os.path.join(_PROJECT_ROOT, "checkpoints")
    if not os.path.exists(model_dir):
        model_dir = os.path.join(_PROJECT_ROOT, "MLP", "models")

    detector = FaceMeshDetector()
    pipelines = {}
    available_modes = []

    # 1. Geometric
    pipelines[MODE_GEO] = FaceMeshPipeline(
        max_angle=args.max_angle,
        eye_model_path=args.eye_model,
        eye_backend=args.eye_backend,
        eye_blend=args.eye_blend,
        detector=detector,
    )
    available_modes.append(MODE_GEO)

    # 2. MLP & Hybrid
    mlp_path, _, _ = _latest_model_artifacts(model_dir)
    if mlp_path is None and not args.mlp_dir:
        # Fallback to MLP/models
        alt_dir = os.path.join(_PROJECT_ROOT, "MLP", "models")
        mlp_path, _, _ = _latest_model_artifacts(alt_dir)
        if mlp_path:
            model_dir = alt_dir

    if mlp_path is not None:
        try:
            pipelines[MODE_MLP] = MLPPipeline(model_dir=model_dir, detector=detector)
            available_modes.append(MODE_MLP)
        except Exception as e:
            print(f"[DEMO] MLP unavailable: {e}")

        try:
            pipelines[MODE_HYBRID] = HybridFocusPipeline(
                model_dir=model_dir,
                eye_model_path=args.eye_model,
                eye_backend=args.eye_backend,
                eye_blend=args.eye_blend,
                max_angle=args.max_angle,
                detector=detector,
            )
            available_modes.append(MODE_HYBRID)
        except Exception as e:
            print(f"[DEMO] Hybrid unavailable: {e}")

    # 3. XGBoost
    try:
        pipelines[MODE_XGB] = XGBoostPipeline(model_path=args.xgb_path)
        available_modes.append(MODE_XGB)
    except Exception as e:
        print(f"[DEMO] XGBoost unavailable: {e}")

    if args.xgb and MODE_XGB in available_modes:
        current_mode = MODE_XGB
        pipeline = pipelines[MODE_XGB]
    else:
        current_mode = available_modes[0]
        pipeline = pipelines[current_mode]

    cap = cv2.VideoCapture(args.camera)
    if not cap.isOpened():
        print("[DEMO] ERROR: Cannot open camera")
        return

    mode_hint = "  ".join(f"{k+1}:{_MODE_NAMES[k]}" for k in available_modes)
    print(f"[DEMO] Available modes: {mode_hint}")
    print(f"[DEMO] Active: {_MODE_NAMES[current_mode]}")
    print("[DEMO] q=quit  m=mesh  1-5=switch mode")

    prev_time = time.time()
    fps = 0.0
    mesh_mode = MESH_FULL

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            result = pipeline.process_frame(frame)
            now = time.time()
            fps = 0.9 * fps + 0.1 * (1.0 / max(now - prev_time, 1e-6))
            prev_time = now

            h, w = frame.shape[:2]
            lm = result["landmarks"]
            if lm is not None:
                if mesh_mode == MESH_FULL:
                    draw_tessellation(frame, lm, w, h)
                    draw_contours(frame, lm, w, h)
                elif mesh_mode == MESH_CONTOURS:
                    draw_contours(frame, lm, w, h)
                draw_eyes_and_irises(frame, lm, w, h)
                
                if hasattr(pipeline, "head_pose"):
                    pipeline.head_pose.draw_axes(frame, lm)
                if result.get("left_bbox") and result.get("right_bbox"):
                    lx1, ly1, lx2, ly2 = result["left_bbox"]
                    rx1, ry1, rx2, ry2 = result["right_bbox"]
                    cv2.rectangle(frame, (lx1, ly1), (lx2, ly2), YELLOW, 1)
                    cv2.rectangle(frame, (rx1, ry1), (rx2, ry2), YELLOW, 1)

            # --- HUD ---
            status = "FOCUSED" if result["is_focused"] else "NOT FOCUSED"
            status_color = GREEN if result["is_focused"] else RED
            cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
            cv2.putText(frame, status, (10, 28), FONT, 0.8, status_color, 2, cv2.LINE_AA)

            mode_label = _MODE_NAMES[current_mode]
            cv2.putText(frame, f"{mode_label}  {_MESH_NAMES[mesh_mode]}  FPS:{fps:.0f}",
                        (w - 340, 28), FONT, 0.45, WHITE, 1, cv2.LINE_AA)

            detail = ""
            if current_mode == MODE_GEO:
                sf = result.get("s_face", 0)
                se = result.get("s_eye", 0)
                rs = result.get("raw_score", 0)
                mar_s = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
                detail = f"S_face:{sf:.2f} S_eye:{se:.2f}{mar_s} score:{rs:.2f}"
            elif current_mode == MODE_MLP:
                mp = result.get("mlp_prob", 0)
                rs = result.get("raw_score", 0)
                mar_s = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
                detail = f"mlp_prob:{mp:.2f} score:{rs:.2f}{mar_s}"
            elif current_mode == MODE_HYBRID:
                mp = result.get("mlp_prob", 0)
                gs = result.get("geo_score", 0)
                fs = result.get("focus_score", 0)
                mar_s = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
                detail = f"focus:{fs:.2f} mlp:{mp:.2f} geo:{gs:.2f}{mar_s}"
            elif current_mode == MODE_XGB:
                rs = result.get("raw_score", 0)
                detail = f"xgb_prob:{rs:.2f}"

            cv2.putText(frame, detail, (10, 48), FONT, 0.45, WHITE, 1, cv2.LINE_AA)

            if result.get("is_yawning"):
                cv2.putText(frame, "YAWN", (10, 75), FONT, 0.7, ORANGE, 2, cv2.LINE_AA)

            if result.get("yaw") is not None:
                cv2.putText(
                    frame,
                    f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}",
                    (w - 280, 48), FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA,
                )

            cv2.putText(frame, f"q:quit  m:mesh  {mode_hint}",
                        (10, h - 10), FONT, 0.35, (150, 150, 150), 1, cv2.LINE_AA)

            cv2.imshow("FocusGuard", frame)

            key = cv2.waitKey(1) & 0xFF
            if key == ord("q"):
                break
            elif key == ord("m"):
                mesh_mode = (mesh_mode + 1) % 3
                print(f"[DEMO] Mesh: {_MESH_NAMES[mesh_mode]}")
            elif key in _MODE_KEYS:
                requested = _MODE_KEYS[key]
                if requested in pipelines:
                    current_mode = requested
                    pipeline = pipelines[current_mode]
                    print(f"[DEMO] Switched to {_MODE_NAMES[current_mode]}")
                else:
                    print(f"[DEMO] {_MODE_NAMES[requested]} not available (no checkpoint)")

    finally:
        cap.release()
        cv2.destroyAllWindows()
        for p in pipelines.values():
            p.close()
        detector.close()
        print("[DEMO] Done")


if __name__ == "__main__":
    main()