Face-Expression-Analyzer / frontend /app /hooks /useFaceTracking.ts
Antigravity AI
Initial deployment of FaceVision AI: Combined Next.js and FastAPI with Docker
954551b
"use client";
import { useEffect, useRef, useState, useCallback } from "react";
import type { FaceLandmarker, FaceLandmarkerResult } from "@mediapipe/tasks-vision";
export type EmotionType = "Happy" | "Sad" | "Angry" | "Surprised" | "Neutral";
export interface TrackingStats {
fps: number;
faceDetected: boolean;
blendshapes: Record<string, number>;
}
// Maps MediaPipe blendshape names to our keys
const BLENDSHAPE_KEYS = [
"mouthSmileLeft",
"mouthSmileRight",
"browInnerUp",
"jawOpen",
"browDownLeft",
"browDownRight",
"mouthFrownLeft",
"mouthFrownRight",
] as const;
type BlendshapeKey = (typeof BLENDSHAPE_KEYS)[number];
function classifyEmotion(shapes: Record<string, number>): EmotionType {
const smileLeft = shapes["mouthSmileLeft"] ?? 0;
const smileRight = shapes["mouthSmileRight"] ?? 0;
const browUp = shapes["browInnerUp"] ?? 0;
const jawOpen = shapes["jawOpen"] ?? 0;
const browDownLeft = shapes["browDownLeft"] ?? 0;
const browDownRight = shapes["browDownRight"] ?? 0;
const frownLeft = shapes["mouthFrownLeft"] ?? 0;
const frownRight = shapes["mouthFrownRight"] ?? 0;
const smile = (smileLeft + smileRight) / 2;
const frown = (frownLeft + frownRight) / 2;
const browDown = (browDownLeft + browDownRight) / 2;
// Score each emotion
const scores: Record<EmotionType, number> = {
Happy: smile * 2.0 + (jawOpen > 0.3 ? 0.3 : 0),
Surprised: browUp * 1.5 + jawOpen * 1.5,
Angry: browDown * 1.5 + frown * 0.8,
Sad: frown * 1.5 + browUp * 0.3 + (1 - smile) * 0.2,
Neutral: 0.15,
};
// Return the highest-scoring emotion
return (Object.keys(scores) as EmotionType[]).reduce((a, b) =>
scores[a] > scores[b] ? a : b
);
}
const SMOOTH_WINDOW = 5;
export function useFaceTracking(
videoRef: React.RefObject<HTMLVideoElement | null>,
isReady: boolean
) {
const [currentEmotion, setCurrentEmotion] = useState<EmotionType>("Neutral");
const [stats, setStats] = useState<TrackingStats>({
fps: 0,
faceDetected: false,
blendshapes: {},
});
const [isLoaded, setIsLoaded] = useState(false);
const [loadError, setLoadError] = useState<string | null>(null);
const landmarkerRef = useRef<FaceLandmarker | null>(null);
const rafRef = useRef<number | null>(null);
const emotionHistoryRef = useRef<EmotionType[]>([]);
const lastTimeRef = useRef<number>(0);
const fpsCounterRef = useRef<number[]>([]);
// Load MediaPipe FaceLandmarker
useEffect(() => {
let cancelled = false;
async function loadModel() {
try {
const vision = await import("@mediapipe/tasks-vision");
const { FaceLandmarker: FL, FilesetResolver } = vision;
const filesetResolver = await FilesetResolver.forVisionTasks(
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.3/wasm"
);
const faceLandmarker = await FL.createFromOptions(filesetResolver, {
baseOptions: {
modelAssetPath:
"https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task",
delegate: "GPU",
},
runningMode: "VIDEO",
numFaces: 1,
outputFaceBlendshapes: true,
outputFacialTransformationMatrixes: false,
});
if (!cancelled) {
landmarkerRef.current = faceLandmarker;
setIsLoaded(true);
}
} catch (err) {
if (!cancelled) {
console.error("Failed to load FaceLandmarker:", err);
setLoadError("Failed to load AI model. Please refresh the page.");
}
}
}
loadModel();
return () => {
cancelled = true;
};
}, []);
// requestAnimationFrame loop
const runDetection = useCallback(() => {
const video = videoRef.current;
const landmarker = landmarkerRef.current;
if (!video || !landmarker || video.readyState < 2) {
rafRef.current = requestAnimationFrame(runDetection);
return;
}
const now = performance.now();
const result: FaceLandmarkerResult = landmarker.detectForVideo(video, now);
// FPS calculation
fpsCounterRef.current.push(now);
fpsCounterRef.current = fpsCounterRef.current.filter((t) => now - t < 1000);
const fps = fpsCounterRef.current.length;
if (result.faceBlendshapes && result.faceBlendshapes.length > 0) {
// Build blendshape map
const blendshapes: Record<string, number> = {};
for (const category of result.faceBlendshapes[0].categories) {
blendshapes[category.categoryName] = category.score;
}
// Classify and smooth
const rawEmotion = classifyEmotion(blendshapes);
emotionHistoryRef.current.push(rawEmotion);
if (emotionHistoryRef.current.length > SMOOTH_WINDOW) {
emotionHistoryRef.current.shift();
}
// Mode of last N frames
const counts: Partial<Record<EmotionType, number>> = {};
for (const e of emotionHistoryRef.current) {
counts[e] = (counts[e] ?? 0) + 1;
}
const smoothedEmotion = (Object.keys(counts) as EmotionType[]).reduce(
(a, b) => ((counts[a] ?? 0) > (counts[b] ?? 0) ? a : b)
);
setCurrentEmotion(smoothedEmotion);
setStats({ fps, faceDetected: true, blendshapes });
} else {
emotionHistoryRef.current = [];
setStats((prev) => ({ ...prev, fps, faceDetected: false }));
}
rafRef.current = requestAnimationFrame(runDetection);
}, [videoRef]);
// Start/stop loop
useEffect(() => {
if (!isReady || !isLoaded) return;
rafRef.current = requestAnimationFrame(runDetection);
return () => {
if (rafRef.current !== null) cancelAnimationFrame(rafRef.current);
};
}, [isReady, isLoaded, runDetection]);
return { currentEmotion, stats, isLoaded, loadError };
}