"use client"; import { useEffect, useRef, useState, useCallback } from "react"; import type { FaceLandmarker, FaceLandmarkerResult } from "@mediapipe/tasks-vision"; export type EmotionType = "Happy" | "Sad" | "Angry" | "Surprised" | "Neutral"; export interface TrackingStats { fps: number; faceDetected: boolean; blendshapes: Record; } // Maps MediaPipe blendshape names to our keys const BLENDSHAPE_KEYS = [ "mouthSmileLeft", "mouthSmileRight", "browInnerUp", "jawOpen", "browDownLeft", "browDownRight", "mouthFrownLeft", "mouthFrownRight", ] as const; type BlendshapeKey = (typeof BLENDSHAPE_KEYS)[number]; function classifyEmotion(shapes: Record): EmotionType { const smileLeft = shapes["mouthSmileLeft"] ?? 0; const smileRight = shapes["mouthSmileRight"] ?? 0; const browUp = shapes["browInnerUp"] ?? 0; const jawOpen = shapes["jawOpen"] ?? 0; const browDownLeft = shapes["browDownLeft"] ?? 0; const browDownRight = shapes["browDownRight"] ?? 0; const frownLeft = shapes["mouthFrownLeft"] ?? 0; const frownRight = shapes["mouthFrownRight"] ?? 0; const smile = (smileLeft + smileRight) / 2; const frown = (frownLeft + frownRight) / 2; const browDown = (browDownLeft + browDownRight) / 2; // Score each emotion const scores: Record = { Happy: smile * 2.0 + (jawOpen > 0.3 ? 0.3 : 0), Surprised: browUp * 1.5 + jawOpen * 1.5, Angry: browDown * 1.5 + frown * 0.8, Sad: frown * 1.5 + browUp * 0.3 + (1 - smile) * 0.2, Neutral: 0.15, }; // Return the highest-scoring emotion return (Object.keys(scores) as EmotionType[]).reduce((a, b) => scores[a] > scores[b] ? a : b ); } const SMOOTH_WINDOW = 5; export function useFaceTracking( videoRef: React.RefObject, isReady: boolean ) { const [currentEmotion, setCurrentEmotion] = useState("Neutral"); const [stats, setStats] = useState({ fps: 0, faceDetected: false, blendshapes: {}, }); const [isLoaded, setIsLoaded] = useState(false); const [loadError, setLoadError] = useState(null); const landmarkerRef = useRef(null); const rafRef = useRef(null); const emotionHistoryRef = useRef([]); const lastTimeRef = useRef(0); const fpsCounterRef = useRef([]); // Load MediaPipe FaceLandmarker useEffect(() => { let cancelled = false; async function loadModel() { try { const vision = await import("@mediapipe/tasks-vision"); const { FaceLandmarker: FL, FilesetResolver } = vision; const filesetResolver = await FilesetResolver.forVisionTasks( "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.3/wasm" ); const faceLandmarker = await FL.createFromOptions(filesetResolver, { baseOptions: { modelAssetPath: "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task", delegate: "GPU", }, runningMode: "VIDEO", numFaces: 1, outputFaceBlendshapes: true, outputFacialTransformationMatrixes: false, }); if (!cancelled) { landmarkerRef.current = faceLandmarker; setIsLoaded(true); } } catch (err) { if (!cancelled) { console.error("Failed to load FaceLandmarker:", err); setLoadError("Failed to load AI model. Please refresh the page."); } } } loadModel(); return () => { cancelled = true; }; }, []); // requestAnimationFrame loop const runDetection = useCallback(() => { const video = videoRef.current; const landmarker = landmarkerRef.current; if (!video || !landmarker || video.readyState < 2) { rafRef.current = requestAnimationFrame(runDetection); return; } const now = performance.now(); const result: FaceLandmarkerResult = landmarker.detectForVideo(video, now); // FPS calculation fpsCounterRef.current.push(now); fpsCounterRef.current = fpsCounterRef.current.filter((t) => now - t < 1000); const fps = fpsCounterRef.current.length; if (result.faceBlendshapes && result.faceBlendshapes.length > 0) { // Build blendshape map const blendshapes: Record = {}; for (const category of result.faceBlendshapes[0].categories) { blendshapes[category.categoryName] = category.score; } // Classify and smooth const rawEmotion = classifyEmotion(blendshapes); emotionHistoryRef.current.push(rawEmotion); if (emotionHistoryRef.current.length > SMOOTH_WINDOW) { emotionHistoryRef.current.shift(); } // Mode of last N frames const counts: Partial> = {}; for (const e of emotionHistoryRef.current) { counts[e] = (counts[e] ?? 0) + 1; } const smoothedEmotion = (Object.keys(counts) as EmotionType[]).reduce( (a, b) => ((counts[a] ?? 0) > (counts[b] ?? 0) ? a : b) ); setCurrentEmotion(smoothedEmotion); setStats({ fps, faceDetected: true, blendshapes }); } else { emotionHistoryRef.current = []; setStats((prev) => ({ ...prev, fps, faceDetected: false })); } rafRef.current = requestAnimationFrame(runDetection); }, [videoRef]); // Start/stop loop useEffect(() => { if (!isReady || !isLoaded) return; rafRef.current = requestAnimationFrame(runDetection); return () => { if (rafRef.current !== null) cancelAnimationFrame(rafRef.current); }; }, [isReady, isLoaded, runDetection]); return { currentEmotion, stats, isLoaded, loadError }; }