| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| class MediaPipeProcessor { |
| constructor(options = {}) { |
| this.options = { |
| maxHands: options.maxHands || 1, |
| modelComplexity: options.modelComplexity || 1, |
| minDetectionConfidence: options.minDetectionConfidence || 0.5, |
| minTrackingConfidence: options.minTrackingConfidence || 0.5, |
| onGestureDetected: options.onGestureDetected || null, |
| onLandmarksUpdate: options.onLandmarksUpdate || null, |
| onSwipeDetected: options.onSwipeDetected || null, |
| onPinchDetected: options.onPinchDetected || null, |
| ...options |
| }; |
|
|
| this.hands = null; |
| this.faceMesh = null; |
| this.isInitialized = false; |
| this.camera = null; |
|
|
| this.lastLandmarks = null; |
| this.landmarkHistory = []; |
| this.maxHistorySize = 30; |
|
|
| this.swipeDetector = new SwipeDetector(); |
| this.pinchDetector = new PinchDetector(); |
| this.fingerCounter = new FingerCounter(); |
|
|
| this.gestureCallback = null; |
| this.apiUrl = options.apiUrl || 'http://localhost:5001/api'; |
| } |
|
|
| async initialize() { |
| if (this.isInitialized) return true; |
|
|
| try { |
| const { Hands } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/hands@0.4.1675469240/hands.js'); |
| const { FaceMesh } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh@0.4.1633529614/face_mesh.js'); |
| const { Camera } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils@0.3.1640029074/camera_utils.js'); |
|
|
| this.hands = new Hands({ |
| locateFile: (file) => `https://cdn.jsdelivr.net/npm/@mediapipe/hands@0.4.1675469240/${file}` |
| }); |
|
|
| this.hands.setOptions({ |
| maxNumHands: this.options.maxHands, |
| modelComplexity: this.options.modelComplexity, |
| minDetectionConfidence: this.options.minDetectionConfidence, |
| minTrackingConfidence: this.options.minTrackingConfidence |
| }); |
|
|
| this.hands.onResults((results) => this.onHandResults(results)); |
|
|
| this.faceMesh = new FaceMesh({ |
| locateFile: (file) => `https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh@0.4.1633529614/${file}` |
| }); |
|
|
| this.faceMesh.setOptions({ |
| maxNumFaces: 1, |
| refineLandmarks: true, |
| minDetectionConfidence: 0.5, |
| minTrackingConfidence: 0.5 |
| }); |
|
|
| this.faceMesh.onResults((results) => this.onFaceResults(results)); |
|
|
| this.isInitialized = true; |
| return true; |
| } catch (error) { |
| console.error('Failed to initialize MediaPipe:', error); |
| return false; |
| } |
| } |
|
|
| async startCamera(videoElement, canvasElement, overlayCanvasElement) { |
| if (!this.isInitialized) { |
| await this.initialize(); |
| } |
|
|
| this.videoElement = videoElement; |
| this.canvasElement = canvasElement; |
| this.overlayCanvasElement = overlayCanvasElement; |
|
|
| try { |
| const stream = await navigator.mediaDevices.getUserMedia({ |
| video: { facingMode: 'user', width: 640, height: 480 } |
| }); |
|
|
| videoElement.srcObject = stream; |
| await videoElement.play(); |
|
|
| this.camera = new Camera(videoElement, { |
| onFrame: async () => { |
| if (this.hands && this.faceMesh) { |
| await this.hands.send({ image: videoElement }); |
| await this.faceMesh.send({ image: videoElement }); |
| } |
| }, |
| width: 640, |
| height: 480 |
| }); |
|
|
| await this.camera.start(); |
| return true; |
| } catch (error) { |
| console.error('Failed to start camera:', error); |
| return false; |
| } |
| } |
|
|
| stopCamera() { |
| if (this.camera) { |
| this.camera.stop(); |
| this.camera = null; |
| } |
|
|
| if (this.videoElement && this.videoElement.srcObject) { |
| this.videoElement.srcObject.getTracks().forEach(track => track.stop()); |
| this.videoElement.srcObject = null; |
| } |
| } |
|
|
| onHandResults(results) { |
| if (results.multiHandLandmarks && results.multiHandLandmarks.length > 0) { |
| const landmarks = results.multiHandLandmarks[0]; |
| this.lastLandmarks = landmarks; |
| |
| this.landmarkHistory.push({ |
| landmarks: landmarks.map(lm => [lm.x, lm.y, lm.z]), |
| timestamp: Date.now() |
| }); |
| |
| if (this.landmarkHistory.length > this.maxHistorySize) { |
| this.landmarkHistory.shift(); |
| } |
|
|
| if (this.options.onLandmarksUpdate) { |
| this.options.onLandmarksUpdate(landmarks); |
| } |
|
|
| const fingerCount = this.fingerCounter.count(landmarks); |
| const swipe = this.swipeDetector.detect(landmarks, fingerCount, this.landmarkHistory); |
| const pinch = this.pinchDetector.detect(landmarks); |
|
|
| if (swipe) { |
| this.onSwipeGesture(swipe, fingerCount); |
| } |
|
|
| if (pinch) { |
| this.onPinchGesture(pinch); |
| } |
|
|
| const basicGesture = this.detectBasicGesture(landmarks, fingerCount); |
| if (basicGesture && this.options.onGestureDetected) { |
| this.options.onGestureDetected(basicGesture); |
| } |
|
|
| this.drawHandLandmarks(landmarks); |
| } else { |
| this.lastLandmarks = null; |
| this.clearOverlay(); |
| } |
| } |
|
|
| onFaceResults(results) { |
| if (results.multiFaceLandmarks && results.multiFaceLandmarks.length > 0) { |
| this.applyFaceBlur(results.multiFaceLandmarks[0]); |
| } |
| } |
|
|
| onSwipeGesture(swipe, fingerCount) { |
| const gestureName = `${fingerCount}_finger_swipe_${swipe.direction}`; |
| |
| console.log(`Swipe detected: ${gestureName} (speed: ${swipe.speed.toFixed(3)})`); |
|
|
| if (this.options.onSwipeDetected) { |
| this.options.onSwipeDetected({ |
| ...swipe, |
| fingerCount, |
| gestureName |
| }); |
| } |
|
|
| this.sendGestureToBackend(gestureName, { |
| direction: swipe.direction, |
| fingerCount, |
| speed: swipe.speed, |
| startPosition: swipe.startPosition, |
| endPosition: swipe.endPosition |
| }); |
| } |
|
|
| onPinchGesture(pinch) { |
| const gestureName = `pinch_${pinch.type}`; |
| |
| console.log(`Pinch detected: ${gestureName} (distance: ${pinch.distance.toFixed(3)})`); |
|
|
| if (this.options.onPinchDetected) { |
| this.options.onPinchDetected(pinch); |
| } |
|
|
| this.sendGestureToBackend(gestureName, { |
| type: pinch.type, |
| distance: pinch.distance, |
| thumbTip: pinch.thumbTip, |
| indexTip: pinch.indexTip |
| }); |
| } |
|
|
| detectBasicGesture(landmarks, fingerCount) { |
| if (fingerCount >= 4) { |
| return { type: 'open_palm', fingerCount }; |
| } |
| |
| if (fingerCount === 1) { |
| const indexTip = landmarks[8]; |
| const indexBase = landmarks[5]; |
| if (indexTip.y < indexBase.y) { |
| return { type: 'pointing', fingerCount: 1 }; |
| } |
| } |
|
|
| if (this.isThumbsUp(landmarks)) { |
| return { type: 'thumbs_up', fingerCount }; |
| } |
|
|
| return null; |
| } |
|
|
| isThumbsUp(landmarks) { |
| const thumbTip = landmarks[4]; |
| const thumbIP = landmarks[3]; |
| const indexTip = landmarks[8]; |
| const middleTip = landmarks[12]; |
| |
| const thumbUp = thumbTip.y < thumbIP.y; |
| const fingersDown = indexTip.y > landmarks[6].y && |
| middleTip.y > landmarks[10].y; |
| |
| return thumbUp && fingersDown; |
| } |
|
|
| sendGestureToBackend(gestureName, parameters) { |
| if (!this.lastLandmarks) return; |
|
|
| const landmarksArray = this.lastLandmarks.map(lm => [lm.x, lm.y, lm.z]); |
|
|
| fetch(`${this.apiUrl}/llm/gesture-action`, { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json' }, |
| body: JSON.stringify({ |
| user_id: 'demo', |
| landmarks: landmarksArray, |
| gesture_name: gestureName, |
| context: { |
| topic: document.title || 'learning', |
| timestamp: Date.now() |
| } |
| }) |
| }).catch(err => console.log('Backend not available for gesture')); |
| } |
|
|
| applyFaceBlur(landmarks) { |
| if (!this.canvasElement || !this.videoElement) return; |
|
|
| const ctx = this.canvasElement.getContext('2d'); |
| const video = this.videoElement; |
|
|
| if (video.videoWidth === 0 || video.videoHeight === 0) return; |
|
|
| this.canvasElement.width = video.videoWidth; |
| this.canvasElement.height = video.videoHeight; |
|
|
| ctx.drawImage(video, 0, 0); |
|
|
| let minX = 1, maxX = 0, minY = 1, maxY = 0; |
| for (const lm of landmarks) { |
| minX = Math.min(minX, lm.x); |
| maxX = Math.max(maxX, lm.x); |
| minY = Math.min(minY, lm.y); |
| maxY = Math.max(maxY, lm.y); |
| } |
|
|
| const padding = 0.15; |
| const padX = (maxX - minX) * padding; |
| const padY = (maxY - minY) * padding; |
|
|
| const x = Math.max(0, Math.floor((minX - padX) * video.videoWidth)); |
| const y = Math.max(0, Math.floor((minY - padY) * video.videoHeight)); |
| const w = Math.min(video.videoWidth, Math.floor((maxX + padX - minX + padX) * video.videoWidth)); |
| const h = Math.min(video.videoHeight, Math.floor((maxY + padY - minY + padY) * video.videoHeight)); |
|
|
| if (w > 10 && h > 10) { |
| const imageData = ctx.getImageData(x, y, w, h); |
| const data = imageData.data; |
| const pixelSize = 15; |
|
|
| for (let py = 0; py < h; py += pixelSize) { |
| for (let px = 0; px < w; px += pixelSize) { |
| const i = (py * w + px) * 4; |
| const r = data[i]; |
| const g = data[i + 1]; |
| const b = data[i + 2]; |
|
|
| for (let dy = 0; dy < pixelSize && py + dy < h; dy++) { |
| for (let dx = 0; dx < pixelSize && px + dx < w; dx++) { |
| const ni = ((py + dy) * w + (px + dx)) * 4; |
| data[ni] = r; |
| data[ni + 1] = g; |
| data[ni + 2] = b; |
| } |
| } |
| } |
| } |
|
|
| ctx.putImageData(imageData, x, y); |
| } |
| } |
|
|
| drawHandLandmarks(landmarks) { |
| if (!this.overlayCanvasElement || !this.videoElement) return; |
|
|
| const canvas = this.overlayCanvasElement; |
| const video = this.videoElement; |
| |
| canvas.width = video.videoWidth; |
| canvas.height = video.videoHeight; |
|
|
| const ctx = canvas.getContext('2d'); |
| ctx.clearRect(0, 0, canvas.width, canvas.height); |
|
|
| const connections = [ |
| [0, 1], [1, 2], [2, 3], [3, 4], |
| [0, 5], [5, 6], [6, 7], [7, 8], |
| [0, 9], [9, 10], [10, 11], [11, 12], |
| [0, 13], [13, 14], [14, 15], [15, 16], |
| [0, 17], [17, 18], [18, 19], [19, 20], |
| [5, 9], [9, 13], [13, 17] |
| ]; |
|
|
| ctx.strokeStyle = 'rgba(0, 255, 136, 0.8)'; |
| ctx.lineWidth = 2; |
|
|
| for (const connection of connections) { |
| const start = landmarks[connection[0]]; |
| const end = landmarks[connection[1]]; |
| |
| ctx.beginPath(); |
| ctx.moveTo(start.x * canvas.width, start.y * canvas.height); |
| ctx.lineTo(end.x * canvas.width, end.y * canvas.height); |
| ctx.stroke(); |
| } |
|
|
| for (let i = 0; i < landmarks.length; i++) { |
| const landmark = landmarks[i]; |
| const x = landmark.x * canvas.width; |
| const y = landmark.y * canvas.height; |
|
|
| ctx.beginPath(); |
| ctx.arc(x, y, i % 4 === 0 ? 6 : 4, 0, 2 * Math.PI); |
| ctx.fillStyle = i % 4 === 0 ? '#00ff88' : '#ffffff'; |
| ctx.fill(); |
| ctx.strokeStyle = '#000000'; |
| ctx.lineWidth = 1; |
| ctx.stroke(); |
| } |
|
|
| const fingerCount = this.fingerCounter.count(landmarks); |
| ctx.fillStyle = '#ffffff'; |
| ctx.font = 'bold 16px sans-serif'; |
| ctx.fillText(`Fingers: ${fingerCount}`, 10, 30); |
| } |
|
|
| clearOverlay() { |
| if (!this.overlayCanvasElement) return; |
| |
| const ctx = this.overlayCanvasElement.getContext('2d'); |
| ctx.clearRect(0, 0, this.overlayCanvasElement.width, this.overlayCanvasElement.height); |
| } |
|
|
| getLandmarks() { |
| return this.lastLandmarks; |
| } |
|
|
| getLandmarkHistory() { |
| return this.landmarkHistory; |
| } |
| } |
|
|
| class SwipeDetector { |
| constructor() { |
| this.swipeThreshold = 0.12; |
| this.minSwipeSpeed = 0.003; |
| this.minHistoryForSwipe = 10; |
| this.swipeStart = null; |
| this.isSwipeInProgress = false; |
| this.lastSwipeTime = 0; |
| this.swipeCooldown = 500; |
| } |
|
|
| detect(landmarks, fingerCount, history) { |
| if (!landmarks || landmarks.length < 21) return null; |
| if (history.length < this.minHistoryForSwipe) return null; |
|
|
| const now = Date.now(); |
| if (now - this.lastSwipeTime < this.swipeCooldown) return null; |
|
|
| const wrist = landmarks[0]; |
| const middleFingerMcp = landmarks[9]; |
|
|
| const currentPos = { |
| x: middleFingerMcp.x, |
| y: middleFingerMcp.y, |
| z: middleFingerMcp.z || 0 |
| }; |
|
|
| if (!this.swipeStart && history.length >= 5) { |
| const recent = history.slice(-5); |
| const movementX = Math.abs(recent[recent.length - 1].landmarks[9][0] - recent[0].landmarks[9][0]); |
| const movementY = Math.abs(recent[recent.length - 1].landmarks[9][1] - recent[0].landmarks[9][1]); |
|
|
| if (movementX > this.swipeThreshold || movementY > this.swipeThreshold) { |
| this.swipeStart = { ...currentPos, time: now }; |
| this.isSwipeInProgress = true; |
| } |
| } |
|
|
| if (this.isSwipeInProgress && this.swipeStart) { |
| const timeDelta = (now - this.swipeStart.time) / 1000; |
| const deltaX = currentPos.x - this.swipeStart.x; |
| const deltaY = currentPos.y - this.swipeStart.y; |
| |
| const distance = Math.sqrt(deltaX * deltaX + deltaY * deltaY); |
| const speed = distance / Math.max(timeDelta, 0.1); |
|
|
| const recentHistory = history.slice(-8); |
| const recentMovementX = Math.abs( |
| recentHistory[recentHistory.length - 1].landmarks[9][0] - |
| recentHistory[0].landmarks[9][0] |
| ); |
| const recentMovementY = Math.abs( |
| recentHistory[recentHistory.length - 1].landmarks[9][1] - |
| recentHistory[0].landmarks[9][1] |
| ); |
|
|
| if (recentMovementX < 0.008 && recentMovementY < 0.008 && distance > this.swipeThreshold) { |
| const direction = this.getSwipeDirection(deltaX, deltaY); |
| |
| this.swipeStart = null; |
| this.isSwipeInProgress = false; |
| this.lastSwipeTime = now; |
|
|
| return { |
| direction, |
| speed, |
| fingerCount, |
| startPosition: this.swipeStart ? { |
| x: this.swipeStart.x, |
| y: this.swipeStart.y |
| } : null, |
| endPosition: { |
| x: currentPos.x, |
| y: currentPos.y |
| } |
| }; |
| } |
|
|
| if (timeDelta > 2) { |
| this.swipeStart = null; |
| this.isSwipeInProgress = false; |
| } |
| } |
|
|
| return null; |
| } |
|
|
| getSwipeDirection(dx, dy) { |
| const absDx = Math.abs(dx); |
| const absDy = Math.abs(dy); |
|
|
| if (absDx > absDy) { |
| return dx > 0 ? 'right' : 'left'; |
| } else { |
| return dy > 0 ? 'down' : 'up'; |
| } |
| } |
| } |
|
|
| class PinchDetector { |
| constructor() { |
| this.pinchThreshold = 0.08; |
| this.zoomInThreshold = 0.10; |
| this.zoomOutThreshold = 0.18; |
| this.lastPinchTime = 0; |
| this.pinchCooldown = 800; |
| this.isPinching = false; |
| } |
|
|
| detect(landmarks) { |
| if (!landmarks || landmarks.length < 21) return null; |
|
|
| const now = Date.now(); |
| if (now - this.lastPinchTime < this.pinchCooldown) return null; |
|
|
| const thumbTip = landmarks[4]; |
| const indexTip = landmarks[8]; |
|
|
| const distance = Math.sqrt( |
| Math.pow(thumbTip.x - indexTip.x, 2) + |
| Math.pow(thumbTip.y - indexTip.y, 2) + |
| Math.pow((thumbTip.z || 0) - (indexTip.z || 0), 2) |
| ); |
|
|
| let pinchType = null; |
|
|
| if (distance < this.pinchThreshold && !this.isPinching) { |
| pinchType = 'grab'; |
| this.isPinching = true; |
| this.lastPinchTime = now; |
| } else if (distance > this.zoomOutThreshold && this.isPinching) { |
| pinchType = 'zoom_out'; |
| this.isPinching = false; |
| this.lastPinchTime = now; |
| } else if (distance > this.zoomInThreshold && distance < this.zoomOutThreshold && this.isPinching) { |
| pinchType = 'zoom_in'; |
| this.isPinching = false; |
| this.lastPinchTime = now; |
| } else if (distance > this.pinchThreshold * 2) { |
| this.isPinching = false; |
| } |
|
|
| if (pinchType) { |
| return { |
| type: pinchType, |
| distance, |
| thumbTip: [thumbTip.x, thumbTip.y, thumbTip.z || 0], |
| indexTip: [indexTip.x, indexTip.y, indexTip.z || 0] |
| }; |
| } |
|
|
| return null; |
| } |
| } |
|
|
| class FingerCounter { |
| count(landmarks) { |
| if (!landmarks || landmarks.length < 21) return 0; |
|
|
| const fingerTips = [4, 8, 12, 16, 20]; |
| const fingerBases = [3, 6, 10, 14, 18]; |
|
|
| let extended = 0; |
|
|
| for (let i = 0; i < fingerTips.length; i++) { |
| const tip = landmarks[fingerTips[i]]; |
| const base = landmarks[fingerBases[i]]; |
|
|
| if (tip.y < base.y) { |
| extended++; |
| } |
| } |
|
|
| return extended; |
| } |
| } |
|
|
| export default MediaPipeProcessor; |
| export { MediaPipeProcessor, SwipeDetector, PinchDetector, FingerCounter }; |
|
|