Sanket-Setu / frontend /src /hooks /useMediaPipe.ts
devrajsinh2012's picture
fix: model paths (.pth), landmark normalization, WS URL, GPU fallback; add ModelSelector; mobile layout improvements
c476eae
import { useEffect, useRef, useState, useCallback } from 'react';
import {
HandLandmarker,
FilesetResolver,
type HandLandmarkerResult,
type NormalizedLandmark,
} from '@mediapipe/tasks-vision';
import { normaliseLandmarks } from '../lib/landmarkUtils';
export interface MediaPipeState {
landmarks: number[] | null; // 63-float normalised vector
rawLandmarks: NormalizedLandmark[] | null; // 21-point raw result (for canvas drawing)
handedness: 'Left' | 'Right' | null;
isDetecting: boolean;
isLoading: boolean;
error: string | null;
startDetection: (video: HTMLVideoElement) => void;
stopDetection: () => void;
}
const WASM_URL =
'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm';
const MODEL_URL =
'https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task';
/**
* Hook that drives MediaPipe HandLandmarker inference on a video element.
* Runs at ~30 fps using requestAnimationFrame.
*/
export function useMediaPipe(): MediaPipeState {
const landmarkerRef = useRef<HandLandmarker | null>(null);
const rafRef = useRef<number>(0);
const lastTsRef = useRef<number>(0);
const [landmarks, setLandmarks] = useState<number[] | null>(null);
const [rawLandmarks, setRawLandmarks] = useState<NormalizedLandmark[] | null>(null);
const [handedness, setHandedness] = useState<'Left' | 'Right' | null>(null);
const [isDetecting, setIsDetecting] = useState(false);
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
// initialise on mount
useEffect(() => {
let cancelled = false;
setIsLoading(true);
(async () => {
try {
const vision = await FilesetResolver.forVisionTasks(WASM_URL);
// Try GPU first for best performance; fall back to CPU if unavailable.
let hl: HandLandmarker | null = null;
try {
hl = await HandLandmarker.createFromOptions(vision, {
baseOptions: { modelAssetPath: MODEL_URL, delegate: 'GPU' },
runningMode: 'VIDEO',
numHands: 1,
minHandDetectionConfidence: 0.4,
minHandPresenceConfidence: 0.4,
minTrackingConfidence: 0.4,
});
} catch {
console.warn('GPU delegate unavailable, falling back to CPU.');
hl = await HandLandmarker.createFromOptions(vision, {
baseOptions: { modelAssetPath: MODEL_URL, delegate: 'CPU' },
runningMode: 'VIDEO',
numHands: 1,
minHandDetectionConfidence: 0.4,
minHandPresenceConfidence: 0.4,
minTrackingConfidence: 0.4,
});
}
if (!cancelled) {
landmarkerRef.current = hl;
setIsLoading(false);
}
} catch (err) {
if (!cancelled) {
console.error('MediaPipe init error', err);
setError('Failed to load hand detection model. Check network.');
setIsLoading(false);
}
}
})();
return () => {
cancelled = true;
cancelAnimationFrame(rafRef.current);
landmarkerRef.current?.close();
};
}, []);
const startDetection = useCallback((video: HTMLVideoElement) => {
if (!landmarkerRef.current) return;
setIsDetecting(true);
const detect = (now: number) => {
if (!landmarkerRef.current || !video || video.paused || video.ended) {
rafRef.current = requestAnimationFrame(detect);
return;
}
// Throttle to ~30 fps
if (now - lastTsRef.current < 33) {
rafRef.current = requestAnimationFrame(detect);
return;
}
lastTsRef.current = now;
let result: HandLandmarkerResult;
try {
result = landmarkerRef.current.detectForVideo(video, now);
} catch {
rafRef.current = requestAnimationFrame(detect);
return;
}
if (result.handednesses.length > 0 && result.landmarks.length > 0) {
const raw = result.landmarks[0]; // NormalizedLandmark[]
const hand = result.handednesses[0][0].categoryName as 'Left' | 'Right';
try {
const flat = normaliseLandmarks(raw);
setLandmarks(flat);
setRawLandmarks(raw);
setHandedness(hand);
} catch {
setLandmarks(null);
setRawLandmarks(null);
}
} else {
setLandmarks(null);
setRawLandmarks(null);
setHandedness(null);
}
rafRef.current = requestAnimationFrame(detect);
};
rafRef.current = requestAnimationFrame(detect);
}, []);
const stopDetection = useCallback(() => {
cancelAnimationFrame(rafRef.current);
setIsDetecting(false);
setLandmarks(null);
setRawLandmarks(null);
setHandedness(null);
}, []);
return {
landmarks,
rawLandmarks,
handedness,
isDetecting,
isLoading,
error,
startDetection,
stopDetection,
};
}