/** * MediaPipeProcessor.js * * Advanced hand gesture recognition with swipe and pinch detection. * Integrates with LLM Flow for gesture-triggered actions. * * Features: * - Hand landmark detection (21 points) * - Face mesh detection (468 points) for face blur * - Swipe gesture detection (left, right, up, down) * - Pinch gesture detection * - Finger count detection * - Real-time gesture-to-action triggering */ class MediaPipeProcessor { constructor(options = {}) { this.options = { maxHands: options.maxHands || 1, modelComplexity: options.modelComplexity || 1, minDetectionConfidence: options.minDetectionConfidence || 0.5, minTrackingConfidence: options.minTrackingConfidence || 0.5, onGestureDetected: options.onGestureDetected || null, onLandmarksUpdate: options.onLandmarksUpdate || null, onSwipeDetected: options.onSwipeDetected || null, onPinchDetected: options.onPinchDetected || null, ...options }; this.hands = null; this.faceMesh = null; this.isInitialized = false; this.camera = null; this.lastLandmarks = null; this.landmarkHistory = []; this.maxHistorySize = 30; this.swipeDetector = new SwipeDetector(); this.pinchDetector = new PinchDetector(); this.fingerCounter = new FingerCounter(); this.gestureCallback = null; this.apiUrl = options.apiUrl || 'http://localhost:5001/api'; } async initialize() { if (this.isInitialized) return true; try { const { Hands } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/hands@0.4.1675469240/hands.js'); const { FaceMesh } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh@0.4.1633529614/face_mesh.js'); const { Camera } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils@0.3.1640029074/camera_utils.js'); this.hands = new Hands({ locateFile: (file) => `https://cdn.jsdelivr.net/npm/@mediapipe/hands@0.4.1675469240/${file}` }); this.hands.setOptions({ maxNumHands: this.options.maxHands, modelComplexity: this.options.modelComplexity, minDetectionConfidence: this.options.minDetectionConfidence, minTrackingConfidence: this.options.minTrackingConfidence }); this.hands.onResults((results) => this.onHandResults(results)); this.faceMesh = new FaceMesh({ locateFile: (file) => `https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh@0.4.1633529614/${file}` }); this.faceMesh.setOptions({ maxNumFaces: 1, refineLandmarks: true, minDetectionConfidence: 0.5, minTrackingConfidence: 0.5 }); this.faceMesh.onResults((results) => this.onFaceResults(results)); this.isInitialized = true; return true; } catch (error) { console.error('Failed to initialize MediaPipe:', error); return false; } } async startCamera(videoElement, canvasElement, overlayCanvasElement) { if (!this.isInitialized) { await this.initialize(); } this.videoElement = videoElement; this.canvasElement = canvasElement; this.overlayCanvasElement = overlayCanvasElement; try { const stream = await navigator.mediaDevices.getUserMedia({ video: { facingMode: 'user', width: 640, height: 480 } }); videoElement.srcObject = stream; await videoElement.play(); this.camera = new Camera(videoElement, { onFrame: async () => { if (this.hands && this.faceMesh) { await this.hands.send({ image: videoElement }); await this.faceMesh.send({ image: videoElement }); } }, width: 640, height: 480 }); await this.camera.start(); return true; } catch (error) { console.error('Failed to start camera:', error); return false; } } stopCamera() { if (this.camera) { this.camera.stop(); this.camera = null; } if (this.videoElement && this.videoElement.srcObject) { this.videoElement.srcObject.getTracks().forEach(track => track.stop()); this.videoElement.srcObject = null; } } onHandResults(results) { if (results.multiHandLandmarks && results.multiHandLandmarks.length > 0) { const landmarks = results.multiHandLandmarks[0]; this.lastLandmarks = landmarks; this.landmarkHistory.push({ landmarks: landmarks.map(lm => [lm.x, lm.y, lm.z]), timestamp: Date.now() }); if (this.landmarkHistory.length > this.maxHistorySize) { this.landmarkHistory.shift(); } if (this.options.onLandmarksUpdate) { this.options.onLandmarksUpdate(landmarks); } const fingerCount = this.fingerCounter.count(landmarks); const swipe = this.swipeDetector.detect(landmarks, fingerCount, this.landmarkHistory); const pinch = this.pinchDetector.detect(landmarks); if (swipe) { this.onSwipeGesture(swipe, fingerCount); } if (pinch) { this.onPinchGesture(pinch); } const basicGesture = this.detectBasicGesture(landmarks, fingerCount); if (basicGesture && this.options.onGestureDetected) { this.options.onGestureDetected(basicGesture); } this.drawHandLandmarks(landmarks); } else { this.lastLandmarks = null; this.clearOverlay(); } } onFaceResults(results) { if (results.multiFaceLandmarks && results.multiFaceLandmarks.length > 0) { this.applyFaceBlur(results.multiFaceLandmarks[0]); } } onSwipeGesture(swipe, fingerCount) { const gestureName = `${fingerCount}_finger_swipe_${swipe.direction}`; console.log(`Swipe detected: ${gestureName} (speed: ${swipe.speed.toFixed(3)})`); if (this.options.onSwipeDetected) { this.options.onSwipeDetected({ ...swipe, fingerCount, gestureName }); } this.sendGestureToBackend(gestureName, { direction: swipe.direction, fingerCount, speed: swipe.speed, startPosition: swipe.startPosition, endPosition: swipe.endPosition }); } onPinchGesture(pinch) { const gestureName = `pinch_${pinch.type}`; console.log(`Pinch detected: ${gestureName} (distance: ${pinch.distance.toFixed(3)})`); if (this.options.onPinchDetected) { this.options.onPinchDetected(pinch); } this.sendGestureToBackend(gestureName, { type: pinch.type, distance: pinch.distance, thumbTip: pinch.thumbTip, indexTip: pinch.indexTip }); } detectBasicGesture(landmarks, fingerCount) { if (fingerCount >= 4) { return { type: 'open_palm', fingerCount }; } if (fingerCount === 1) { const indexTip = landmarks[8]; const indexBase = landmarks[5]; if (indexTip.y < indexBase.y) { return { type: 'pointing', fingerCount: 1 }; } } if (this.isThumbsUp(landmarks)) { return { type: 'thumbs_up', fingerCount }; } return null; } isThumbsUp(landmarks) { const thumbTip = landmarks[4]; const thumbIP = landmarks[3]; const indexTip = landmarks[8]; const middleTip = landmarks[12]; const thumbUp = thumbTip.y < thumbIP.y; const fingersDown = indexTip.y > landmarks[6].y && middleTip.y > landmarks[10].y; return thumbUp && fingersDown; } sendGestureToBackend(gestureName, parameters) { if (!this.lastLandmarks) return; const landmarksArray = this.lastLandmarks.map(lm => [lm.x, lm.y, lm.z]); fetch(`${this.apiUrl}/llm/gesture-action`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ user_id: 'demo', landmarks: landmarksArray, gesture_name: gestureName, context: { topic: document.title || 'learning', timestamp: Date.now() } }) }).catch(err => console.log('Backend not available for gesture')); } applyFaceBlur(landmarks) { if (!this.canvasElement || !this.videoElement) return; const ctx = this.canvasElement.getContext('2d'); const video = this.videoElement; if (video.videoWidth === 0 || video.videoHeight === 0) return; this.canvasElement.width = video.videoWidth; this.canvasElement.height = video.videoHeight; ctx.drawImage(video, 0, 0); let minX = 1, maxX = 0, minY = 1, maxY = 0; for (const lm of landmarks) { minX = Math.min(minX, lm.x); maxX = Math.max(maxX, lm.x); minY = Math.min(minY, lm.y); maxY = Math.max(maxY, lm.y); } const padding = 0.15; const padX = (maxX - minX) * padding; const padY = (maxY - minY) * padding; const x = Math.max(0, Math.floor((minX - padX) * video.videoWidth)); const y = Math.max(0, Math.floor((minY - padY) * video.videoHeight)); const w = Math.min(video.videoWidth, Math.floor((maxX + padX - minX + padX) * video.videoWidth)); const h = Math.min(video.videoHeight, Math.floor((maxY + padY - minY + padY) * video.videoHeight)); if (w > 10 && h > 10) { const imageData = ctx.getImageData(x, y, w, h); const data = imageData.data; const pixelSize = 15; for (let py = 0; py < h; py += pixelSize) { for (let px = 0; px < w; px += pixelSize) { const i = (py * w + px) * 4; const r = data[i]; const g = data[i + 1]; const b = data[i + 2]; for (let dy = 0; dy < pixelSize && py + dy < h; dy++) { for (let dx = 0; dx < pixelSize && px + dx < w; dx++) { const ni = ((py + dy) * w + (px + dx)) * 4; data[ni] = r; data[ni + 1] = g; data[ni + 2] = b; } } } } ctx.putImageData(imageData, x, y); } } drawHandLandmarks(landmarks) { if (!this.overlayCanvasElement || !this.videoElement) return; const canvas = this.overlayCanvasElement; const video = this.videoElement; canvas.width = video.videoWidth; canvas.height = video.videoHeight; const ctx = canvas.getContext('2d'); ctx.clearRect(0, 0, canvas.width, canvas.height); const connections = [ [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20], [5, 9], [9, 13], [13, 17] ]; ctx.strokeStyle = 'rgba(0, 255, 136, 0.8)'; ctx.lineWidth = 2; for (const connection of connections) { const start = landmarks[connection[0]]; const end = landmarks[connection[1]]; ctx.beginPath(); ctx.moveTo(start.x * canvas.width, start.y * canvas.height); ctx.lineTo(end.x * canvas.width, end.y * canvas.height); ctx.stroke(); } for (let i = 0; i < landmarks.length; i++) { const landmark = landmarks[i]; const x = landmark.x * canvas.width; const y = landmark.y * canvas.height; ctx.beginPath(); ctx.arc(x, y, i % 4 === 0 ? 6 : 4, 0, 2 * Math.PI); ctx.fillStyle = i % 4 === 0 ? '#00ff88' : '#ffffff'; ctx.fill(); ctx.strokeStyle = '#000000'; ctx.lineWidth = 1; ctx.stroke(); } const fingerCount = this.fingerCounter.count(landmarks); ctx.fillStyle = '#ffffff'; ctx.font = 'bold 16px sans-serif'; ctx.fillText(`Fingers: ${fingerCount}`, 10, 30); } clearOverlay() { if (!this.overlayCanvasElement) return; const ctx = this.overlayCanvasElement.getContext('2d'); ctx.clearRect(0, 0, this.overlayCanvasElement.width, this.overlayCanvasElement.height); } getLandmarks() { return this.lastLandmarks; } getLandmarkHistory() { return this.landmarkHistory; } } class SwipeDetector { constructor() { this.swipeThreshold = 0.12; this.minSwipeSpeed = 0.003; this.minHistoryForSwipe = 10; this.swipeStart = null; this.isSwipeInProgress = false; this.lastSwipeTime = 0; this.swipeCooldown = 500; } detect(landmarks, fingerCount, history) { if (!landmarks || landmarks.length < 21) return null; if (history.length < this.minHistoryForSwipe) return null; const now = Date.now(); if (now - this.lastSwipeTime < this.swipeCooldown) return null; const wrist = landmarks[0]; const middleFingerMcp = landmarks[9]; const currentPos = { x: middleFingerMcp.x, y: middleFingerMcp.y, z: middleFingerMcp.z || 0 }; if (!this.swipeStart && history.length >= 5) { const recent = history.slice(-5); const movementX = Math.abs(recent[recent.length - 1].landmarks[9][0] - recent[0].landmarks[9][0]); const movementY = Math.abs(recent[recent.length - 1].landmarks[9][1] - recent[0].landmarks[9][1]); if (movementX > this.swipeThreshold || movementY > this.swipeThreshold) { this.swipeStart = { ...currentPos, time: now }; this.isSwipeInProgress = true; } } if (this.isSwipeInProgress && this.swipeStart) { const timeDelta = (now - this.swipeStart.time) / 1000; const deltaX = currentPos.x - this.swipeStart.x; const deltaY = currentPos.y - this.swipeStart.y; const distance = Math.sqrt(deltaX * deltaX + deltaY * deltaY); const speed = distance / Math.max(timeDelta, 0.1); const recentHistory = history.slice(-8); const recentMovementX = Math.abs( recentHistory[recentHistory.length - 1].landmarks[9][0] - recentHistory[0].landmarks[9][0] ); const recentMovementY = Math.abs( recentHistory[recentHistory.length - 1].landmarks[9][1] - recentHistory[0].landmarks[9][1] ); if (recentMovementX < 0.008 && recentMovementY < 0.008 && distance > this.swipeThreshold) { const direction = this.getSwipeDirection(deltaX, deltaY); this.swipeStart = null; this.isSwipeInProgress = false; this.lastSwipeTime = now; return { direction, speed, fingerCount, startPosition: this.swipeStart ? { x: this.swipeStart.x, y: this.swipeStart.y } : null, endPosition: { x: currentPos.x, y: currentPos.y } }; } if (timeDelta > 2) { this.swipeStart = null; this.isSwipeInProgress = false; } } return null; } getSwipeDirection(dx, dy) { const absDx = Math.abs(dx); const absDy = Math.abs(dy); if (absDx > absDy) { return dx > 0 ? 'right' : 'left'; } else { return dy > 0 ? 'down' : 'up'; } } } class PinchDetector { constructor() { this.pinchThreshold = 0.08; this.zoomInThreshold = 0.10; this.zoomOutThreshold = 0.18; this.lastPinchTime = 0; this.pinchCooldown = 800; this.isPinching = false; } detect(landmarks) { if (!landmarks || landmarks.length < 21) return null; const now = Date.now(); if (now - this.lastPinchTime < this.pinchCooldown) return null; const thumbTip = landmarks[4]; const indexTip = landmarks[8]; const distance = Math.sqrt( Math.pow(thumbTip.x - indexTip.x, 2) + Math.pow(thumbTip.y - indexTip.y, 2) + Math.pow((thumbTip.z || 0) - (indexTip.z || 0), 2) ); let pinchType = null; if (distance < this.pinchThreshold && !this.isPinching) { pinchType = 'grab'; this.isPinching = true; this.lastPinchTime = now; } else if (distance > this.zoomOutThreshold && this.isPinching) { pinchType = 'zoom_out'; this.isPinching = false; this.lastPinchTime = now; } else if (distance > this.zoomInThreshold && distance < this.zoomOutThreshold && this.isPinching) { pinchType = 'zoom_in'; this.isPinching = false; this.lastPinchTime = now; } else if (distance > this.pinchThreshold * 2) { this.isPinching = false; } if (pinchType) { return { type: pinchType, distance, thumbTip: [thumbTip.x, thumbTip.y, thumbTip.z || 0], indexTip: [indexTip.x, indexTip.y, indexTip.z || 0] }; } return null; } } class FingerCounter { count(landmarks) { if (!landmarks || landmarks.length < 21) return 0; const fingerTips = [4, 8, 12, 16, 20]; const fingerBases = [3, 6, 10, 14, 18]; let extended = 0; for (let i = 0; i < fingerTips.length; i++) { const tip = landmarks[fingerTips[i]]; const base = landmarks[fingerBases[i]]; if (tip.y < base.y) { extended++; } } return extended; } } export default MediaPipeProcessor; export { MediaPipeProcessor, SwipeDetector, PinchDetector, FingerCounter };