contextflow-rl / frontend /src /MediaPipeProcessor.js
namish10's picture
Upload frontend/src/MediaPipeProcessor.js with huggingface_hub
f77da70 verified
/**
* MediaPipeProcessor.js
*
* Advanced hand gesture recognition with swipe and pinch detection.
* Integrates with LLM Flow for gesture-triggered actions.
*
* Features:
* - Hand landmark detection (21 points)
* - Face mesh detection (468 points) for face blur
* - Swipe gesture detection (left, right, up, down)
* - Pinch gesture detection
* - Finger count detection
* - Real-time gesture-to-action triggering
*/
class MediaPipeProcessor {
constructor(options = {}) {
this.options = {
maxHands: options.maxHands || 1,
modelComplexity: options.modelComplexity || 1,
minDetectionConfidence: options.minDetectionConfidence || 0.5,
minTrackingConfidence: options.minTrackingConfidence || 0.5,
onGestureDetected: options.onGestureDetected || null,
onLandmarksUpdate: options.onLandmarksUpdate || null,
onSwipeDetected: options.onSwipeDetected || null,
onPinchDetected: options.onPinchDetected || null,
...options
};
this.hands = null;
this.faceMesh = null;
this.isInitialized = false;
this.camera = null;
this.lastLandmarks = null;
this.landmarkHistory = [];
this.maxHistorySize = 30;
this.swipeDetector = new SwipeDetector();
this.pinchDetector = new PinchDetector();
this.fingerCounter = new FingerCounter();
this.gestureCallback = null;
this.apiUrl = options.apiUrl || 'http://localhost:5001/api';
}
async initialize() {
if (this.isInitialized) return true;
try {
const { Hands } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/hands@0.4.1675469240/hands.js');
const { FaceMesh } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh@0.4.1633529614/face_mesh.js');
const { Camera } = await import('https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils@0.3.1640029074/camera_utils.js');
this.hands = new Hands({
locateFile: (file) => `https://cdn.jsdelivr.net/npm/@mediapipe/hands@0.4.1675469240/${file}`
});
this.hands.setOptions({
maxNumHands: this.options.maxHands,
modelComplexity: this.options.modelComplexity,
minDetectionConfidence: this.options.minDetectionConfidence,
minTrackingConfidence: this.options.minTrackingConfidence
});
this.hands.onResults((results) => this.onHandResults(results));
this.faceMesh = new FaceMesh({
locateFile: (file) => `https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh@0.4.1633529614/${file}`
});
this.faceMesh.setOptions({
maxNumFaces: 1,
refineLandmarks: true,
minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5
});
this.faceMesh.onResults((results) => this.onFaceResults(results));
this.isInitialized = true;
return true;
} catch (error) {
console.error('Failed to initialize MediaPipe:', error);
return false;
}
}
async startCamera(videoElement, canvasElement, overlayCanvasElement) {
if (!this.isInitialized) {
await this.initialize();
}
this.videoElement = videoElement;
this.canvasElement = canvasElement;
this.overlayCanvasElement = overlayCanvasElement;
try {
const stream = await navigator.mediaDevices.getUserMedia({
video: { facingMode: 'user', width: 640, height: 480 }
});
videoElement.srcObject = stream;
await videoElement.play();
this.camera = new Camera(videoElement, {
onFrame: async () => {
if (this.hands && this.faceMesh) {
await this.hands.send({ image: videoElement });
await this.faceMesh.send({ image: videoElement });
}
},
width: 640,
height: 480
});
await this.camera.start();
return true;
} catch (error) {
console.error('Failed to start camera:', error);
return false;
}
}
stopCamera() {
if (this.camera) {
this.camera.stop();
this.camera = null;
}
if (this.videoElement && this.videoElement.srcObject) {
this.videoElement.srcObject.getTracks().forEach(track => track.stop());
this.videoElement.srcObject = null;
}
}
onHandResults(results) {
if (results.multiHandLandmarks && results.multiHandLandmarks.length > 0) {
const landmarks = results.multiHandLandmarks[0];
this.lastLandmarks = landmarks;
this.landmarkHistory.push({
landmarks: landmarks.map(lm => [lm.x, lm.y, lm.z]),
timestamp: Date.now()
});
if (this.landmarkHistory.length > this.maxHistorySize) {
this.landmarkHistory.shift();
}
if (this.options.onLandmarksUpdate) {
this.options.onLandmarksUpdate(landmarks);
}
const fingerCount = this.fingerCounter.count(landmarks);
const swipe = this.swipeDetector.detect(landmarks, fingerCount, this.landmarkHistory);
const pinch = this.pinchDetector.detect(landmarks);
if (swipe) {
this.onSwipeGesture(swipe, fingerCount);
}
if (pinch) {
this.onPinchGesture(pinch);
}
const basicGesture = this.detectBasicGesture(landmarks, fingerCount);
if (basicGesture && this.options.onGestureDetected) {
this.options.onGestureDetected(basicGesture);
}
this.drawHandLandmarks(landmarks);
} else {
this.lastLandmarks = null;
this.clearOverlay();
}
}
onFaceResults(results) {
if (results.multiFaceLandmarks && results.multiFaceLandmarks.length > 0) {
this.applyFaceBlur(results.multiFaceLandmarks[0]);
}
}
onSwipeGesture(swipe, fingerCount) {
const gestureName = `${fingerCount}_finger_swipe_${swipe.direction}`;
console.log(`Swipe detected: ${gestureName} (speed: ${swipe.speed.toFixed(3)})`);
if (this.options.onSwipeDetected) {
this.options.onSwipeDetected({
...swipe,
fingerCount,
gestureName
});
}
this.sendGestureToBackend(gestureName, {
direction: swipe.direction,
fingerCount,
speed: swipe.speed,
startPosition: swipe.startPosition,
endPosition: swipe.endPosition
});
}
onPinchGesture(pinch) {
const gestureName = `pinch_${pinch.type}`;
console.log(`Pinch detected: ${gestureName} (distance: ${pinch.distance.toFixed(3)})`);
if (this.options.onPinchDetected) {
this.options.onPinchDetected(pinch);
}
this.sendGestureToBackend(gestureName, {
type: pinch.type,
distance: pinch.distance,
thumbTip: pinch.thumbTip,
indexTip: pinch.indexTip
});
}
detectBasicGesture(landmarks, fingerCount) {
if (fingerCount >= 4) {
return { type: 'open_palm', fingerCount };
}
if (fingerCount === 1) {
const indexTip = landmarks[8];
const indexBase = landmarks[5];
if (indexTip.y < indexBase.y) {
return { type: 'pointing', fingerCount: 1 };
}
}
if (this.isThumbsUp(landmarks)) {
return { type: 'thumbs_up', fingerCount };
}
return null;
}
isThumbsUp(landmarks) {
const thumbTip = landmarks[4];
const thumbIP = landmarks[3];
const indexTip = landmarks[8];
const middleTip = landmarks[12];
const thumbUp = thumbTip.y < thumbIP.y;
const fingersDown = indexTip.y > landmarks[6].y &&
middleTip.y > landmarks[10].y;
return thumbUp && fingersDown;
}
sendGestureToBackend(gestureName, parameters) {
if (!this.lastLandmarks) return;
const landmarksArray = this.lastLandmarks.map(lm => [lm.x, lm.y, lm.z]);
fetch(`${this.apiUrl}/llm/gesture-action`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
user_id: 'demo',
landmarks: landmarksArray,
gesture_name: gestureName,
context: {
topic: document.title || 'learning',
timestamp: Date.now()
}
})
}).catch(err => console.log('Backend not available for gesture'));
}
applyFaceBlur(landmarks) {
if (!this.canvasElement || !this.videoElement) return;
const ctx = this.canvasElement.getContext('2d');
const video = this.videoElement;
if (video.videoWidth === 0 || video.videoHeight === 0) return;
this.canvasElement.width = video.videoWidth;
this.canvasElement.height = video.videoHeight;
ctx.drawImage(video, 0, 0);
let minX = 1, maxX = 0, minY = 1, maxY = 0;
for (const lm of landmarks) {
minX = Math.min(minX, lm.x);
maxX = Math.max(maxX, lm.x);
minY = Math.min(minY, lm.y);
maxY = Math.max(maxY, lm.y);
}
const padding = 0.15;
const padX = (maxX - minX) * padding;
const padY = (maxY - minY) * padding;
const x = Math.max(0, Math.floor((minX - padX) * video.videoWidth));
const y = Math.max(0, Math.floor((minY - padY) * video.videoHeight));
const w = Math.min(video.videoWidth, Math.floor((maxX + padX - minX + padX) * video.videoWidth));
const h = Math.min(video.videoHeight, Math.floor((maxY + padY - minY + padY) * video.videoHeight));
if (w > 10 && h > 10) {
const imageData = ctx.getImageData(x, y, w, h);
const data = imageData.data;
const pixelSize = 15;
for (let py = 0; py < h; py += pixelSize) {
for (let px = 0; px < w; px += pixelSize) {
const i = (py * w + px) * 4;
const r = data[i];
const g = data[i + 1];
const b = data[i + 2];
for (let dy = 0; dy < pixelSize && py + dy < h; dy++) {
for (let dx = 0; dx < pixelSize && px + dx < w; dx++) {
const ni = ((py + dy) * w + (px + dx)) * 4;
data[ni] = r;
data[ni + 1] = g;
data[ni + 2] = b;
}
}
}
}
ctx.putImageData(imageData, x, y);
}
}
drawHandLandmarks(landmarks) {
if (!this.overlayCanvasElement || !this.videoElement) return;
const canvas = this.overlayCanvasElement;
const video = this.videoElement;
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
const ctx = canvas.getContext('2d');
ctx.clearRect(0, 0, canvas.width, canvas.height);
const connections = [
[0, 1], [1, 2], [2, 3], [3, 4],
[0, 5], [5, 6], [6, 7], [7, 8],
[0, 9], [9, 10], [10, 11], [11, 12],
[0, 13], [13, 14], [14, 15], [15, 16],
[0, 17], [17, 18], [18, 19], [19, 20],
[5, 9], [9, 13], [13, 17]
];
ctx.strokeStyle = 'rgba(0, 255, 136, 0.8)';
ctx.lineWidth = 2;
for (const connection of connections) {
const start = landmarks[connection[0]];
const end = landmarks[connection[1]];
ctx.beginPath();
ctx.moveTo(start.x * canvas.width, start.y * canvas.height);
ctx.lineTo(end.x * canvas.width, end.y * canvas.height);
ctx.stroke();
}
for (let i = 0; i < landmarks.length; i++) {
const landmark = landmarks[i];
const x = landmark.x * canvas.width;
const y = landmark.y * canvas.height;
ctx.beginPath();
ctx.arc(x, y, i % 4 === 0 ? 6 : 4, 0, 2 * Math.PI);
ctx.fillStyle = i % 4 === 0 ? '#00ff88' : '#ffffff';
ctx.fill();
ctx.strokeStyle = '#000000';
ctx.lineWidth = 1;
ctx.stroke();
}
const fingerCount = this.fingerCounter.count(landmarks);
ctx.fillStyle = '#ffffff';
ctx.font = 'bold 16px sans-serif';
ctx.fillText(`Fingers: ${fingerCount}`, 10, 30);
}
clearOverlay() {
if (!this.overlayCanvasElement) return;
const ctx = this.overlayCanvasElement.getContext('2d');
ctx.clearRect(0, 0, this.overlayCanvasElement.width, this.overlayCanvasElement.height);
}
getLandmarks() {
return this.lastLandmarks;
}
getLandmarkHistory() {
return this.landmarkHistory;
}
}
class SwipeDetector {
constructor() {
this.swipeThreshold = 0.12;
this.minSwipeSpeed = 0.003;
this.minHistoryForSwipe = 10;
this.swipeStart = null;
this.isSwipeInProgress = false;
this.lastSwipeTime = 0;
this.swipeCooldown = 500;
}
detect(landmarks, fingerCount, history) {
if (!landmarks || landmarks.length < 21) return null;
if (history.length < this.minHistoryForSwipe) return null;
const now = Date.now();
if (now - this.lastSwipeTime < this.swipeCooldown) return null;
const wrist = landmarks[0];
const middleFingerMcp = landmarks[9];
const currentPos = {
x: middleFingerMcp.x,
y: middleFingerMcp.y,
z: middleFingerMcp.z || 0
};
if (!this.swipeStart && history.length >= 5) {
const recent = history.slice(-5);
const movementX = Math.abs(recent[recent.length - 1].landmarks[9][0] - recent[0].landmarks[9][0]);
const movementY = Math.abs(recent[recent.length - 1].landmarks[9][1] - recent[0].landmarks[9][1]);
if (movementX > this.swipeThreshold || movementY > this.swipeThreshold) {
this.swipeStart = { ...currentPos, time: now };
this.isSwipeInProgress = true;
}
}
if (this.isSwipeInProgress && this.swipeStart) {
const timeDelta = (now - this.swipeStart.time) / 1000;
const deltaX = currentPos.x - this.swipeStart.x;
const deltaY = currentPos.y - this.swipeStart.y;
const distance = Math.sqrt(deltaX * deltaX + deltaY * deltaY);
const speed = distance / Math.max(timeDelta, 0.1);
const recentHistory = history.slice(-8);
const recentMovementX = Math.abs(
recentHistory[recentHistory.length - 1].landmarks[9][0] -
recentHistory[0].landmarks[9][0]
);
const recentMovementY = Math.abs(
recentHistory[recentHistory.length - 1].landmarks[9][1] -
recentHistory[0].landmarks[9][1]
);
if (recentMovementX < 0.008 && recentMovementY < 0.008 && distance > this.swipeThreshold) {
const direction = this.getSwipeDirection(deltaX, deltaY);
this.swipeStart = null;
this.isSwipeInProgress = false;
this.lastSwipeTime = now;
return {
direction,
speed,
fingerCount,
startPosition: this.swipeStart ? {
x: this.swipeStart.x,
y: this.swipeStart.y
} : null,
endPosition: {
x: currentPos.x,
y: currentPos.y
}
};
}
if (timeDelta > 2) {
this.swipeStart = null;
this.isSwipeInProgress = false;
}
}
return null;
}
getSwipeDirection(dx, dy) {
const absDx = Math.abs(dx);
const absDy = Math.abs(dy);
if (absDx > absDy) {
return dx > 0 ? 'right' : 'left';
} else {
return dy > 0 ? 'down' : 'up';
}
}
}
class PinchDetector {
constructor() {
this.pinchThreshold = 0.08;
this.zoomInThreshold = 0.10;
this.zoomOutThreshold = 0.18;
this.lastPinchTime = 0;
this.pinchCooldown = 800;
this.isPinching = false;
}
detect(landmarks) {
if (!landmarks || landmarks.length < 21) return null;
const now = Date.now();
if (now - this.lastPinchTime < this.pinchCooldown) return null;
const thumbTip = landmarks[4];
const indexTip = landmarks[8];
const distance = Math.sqrt(
Math.pow(thumbTip.x - indexTip.x, 2) +
Math.pow(thumbTip.y - indexTip.y, 2) +
Math.pow((thumbTip.z || 0) - (indexTip.z || 0), 2)
);
let pinchType = null;
if (distance < this.pinchThreshold && !this.isPinching) {
pinchType = 'grab';
this.isPinching = true;
this.lastPinchTime = now;
} else if (distance > this.zoomOutThreshold && this.isPinching) {
pinchType = 'zoom_out';
this.isPinching = false;
this.lastPinchTime = now;
} else if (distance > this.zoomInThreshold && distance < this.zoomOutThreshold && this.isPinching) {
pinchType = 'zoom_in';
this.isPinching = false;
this.lastPinchTime = now;
} else if (distance > this.pinchThreshold * 2) {
this.isPinching = false;
}
if (pinchType) {
return {
type: pinchType,
distance,
thumbTip: [thumbTip.x, thumbTip.y, thumbTip.z || 0],
indexTip: [indexTip.x, indexTip.y, indexTip.z || 0]
};
}
return null;
}
}
class FingerCounter {
count(landmarks) {
if (!landmarks || landmarks.length < 21) return 0;
const fingerTips = [4, 8, 12, 16, 20];
const fingerBases = [3, 6, 10, 14, 18];
let extended = 0;
for (let i = 0; i < fingerTips.length; i++) {
const tip = landmarks[fingerTips[i]];
const base = landmarks[fingerBases[i]];
if (tip.y < base.y) {
extended++;
}
}
return extended;
}
}
export default MediaPipeProcessor;
export { MediaPipeProcessor, SwipeDetector, PinchDetector, FingerCounter };