namish10
/

contextflow-rl

+"""
+Hand Gesture Recognition Agent
+Privacy-first approach:
+1. Camera feed is processed locally
+2. Face is auto-blurred before any processing
+3. Users TRAIN the model with their own hand gestures
+4. Gestures are used as behavioral/learning signals
+Training Flow:
+1. User defines a gesture (e.g., "thinking", "confused", "pause")
+2. User performs the gesture multiple times for training
+3. Model learns the user's specific gesture pattern
+4. Real-time recognition during learning sessions
+"""
+import numpy as np
+from typing import Dict, List, Any, Optional, Tuple
+from dataclasses import dataclass, field
+from datetime import datetime
+import json
+import base64
+@dataclass
+class GestureTemplate:
+    """A gesture that the user is training"""
+    gesture_id: str
+    name: str
+    description: str
+    samples: List[List[float]] = field(default_factory=list)
+    centroid: Optional[List[float]] = None
+    threshold: float = 0.3
+    trained: bool = False
+    created_at: datetime = field(default_factory=datetime.now)
+    gesture_type: str = "learning_signal"
+@dataclass
+class HandLandmark:
+    """21 hand landmarks from MediaPipe"""
+    landmarks: List[Tuple[float, float, float]]
+    timestamp: datetime
+@dataclass
+class GestureRecognition:
+    """Result of gesture recognition"""
+    gesture_id: str
+    gesture_name: str
+    confidence: float
+    gesture_type: str
+    timestamp: datetime
+class HandGestureAgent:
+    """
+    Hand gesture recognition agent with user-defined gestures.
+    Features:
+    - Local processing (privacy-safe)
+    - Face blur in camera feed
+    - User-defined gesture training
+    - Real-time recognition
+    - Learning signal integration
+    """
+    def __init__(self, user_id: str, config: Optional[Dict] = None):
+        self.user_id = user_id
+        self.config = config or {}
+        self.gestures: Dict[str, GestureTemplate] = {}
+        self.landmark_buffer = []
+        self.max_buffer_size = 30
+        self.is_training = False
+        self.current_training_gesture = None
+        self.training_samples_needed = 20
+        self.recognition_enabled = False
+        self.last_recognition = None
+        self._initialize_default_gestures()
+    def _initialize_default_gestures(self):
+        """Initialize with common learning gestures users might want to train"""
+        common_gestures = [
+            {
+                'id': 'thinking',
+                'name': 'Thinking',
+                'description': 'Hand on chin - signals contemplation',
+                'type': 'cognitive'
+            },
+            {
+                'id': 'confused',
+                'name': 'Confused',
+                'description': 'Scratching head - signals confusion',
+                'type': 'emotional'
+            },
+            {
+                'id': 'pause',
+                'name': 'Pause',
+                'description': 'Open palm toward camera - take a break',
+                'type': 'action'
+            },
+            {
+                'id': 'got_it',
+                'name': 'Got It!',
+                'description': 'Thumbs up - signals understanding',
+                'type': 'feedback'
+            },
+            {
+                'id': 'question',
+                'name': 'Question',
+                'description': 'Hand near ear with questioning look',
+                'type': 'doubt'
+            }
+        ]
+        for g in common_gestures:
+            self.gestures[g['id']] = GestureTemplate(
+                gesture_id=g['id'],
+                name=g['name'],
+                description=g['description'],
+                gesture_type=g['type']
+            )
+    def start_training(self, gesture_id: str) -> Dict:
+        """Start training a gesture"""
+        if gesture_id not in self.gestures:
+            return {'error': 'Gesture not found'}
+        self.is_training = True
+        self.current_training_gesture = gesture_id
+        self.gestures[gesture_id].samples = []
+        return {
+            'gesture_id': gesture_id,
+            'gesture_name': self.gestures[gesture_id].name,
+            'samples_needed': self.training_samples_needed,
+            'instructions': f"Perform the '{self.gestures[gesture_id].name}' gesture {self.training_samples_needed} times. Move your hand naturally."
+        }
+    def add_training_sample(self, landmarks: List[List[float]]) -> Dict:
+        """Add a hand landmark sample during training"""
+        if not self.is_training or not self.current_training_gesture:
+            return {'status': 'not_training'}
+        gesture = self.gestures[self.current_training_gesture]
+        features = self._extract_features(landmarks)
+        gesture.samples.append(features)
+        remaining = self.training_samples_needed - len(gesture.samples)
+        if remaining <= 0:
+            self._finalize_training(gesture)
+            return {
+                'status': 'completed',
+                'gesture_id': gesture.gesture_id,
+                'samples_collected': len(gesture.samples),
+                'message': f"'{gesture.name}' trained successfully!"
+            }
+        return {
+            'status': 'collecting',
+            'samples_collected': len(gesture.samples),
+            'samples_remaining': remaining
+        }
+    def _extract_features(self, landmarks: List[List[float]]) -> List[float]:
+        """Extract features from hand landmarks"""
+        if not landmarks or len(landmarks) < 21:
+            return [0] * 63
+        features = []
+        palm_center = np.mean(landmarks[:9], axis=0)
+        finger_tips = [4, 8, 12, 16, 20]
+        finger_bases = [2, 5, 9, 13, 17]
+        for tip, base in zip(finger_tips, finger_bases):
+            if tip < len(landmarks) and base < len(landmarks):
+                dx = landmarks[tip][0] - landmarks[base][0]
+                dy = landmarks[tip][1] - landmarks[base][1]
+                dz = landmarks[tip][2] - landmarks[base][2]
+                dist = np.sqrt(dx**2 + dy**2 + dz**2)
+                features.extend([dx, dy, dz, dist])
+        for i in range(0, 21, 3):
+            if i < len(landmarks):
+                dx = landmarks[i][0] - palm_center[0]
+                dy = landmarks[i][1] - palm_center[1]
+                dz = landmarks[i][2] - palm_center[2]
+                features.extend([dx, dy, dz])
+        wrist = landmarks[0]
+        middle_finger_mcp = landmarks[9]
+        hand_angle = np.arctan2(
+            middle_finger_mcp[1] - wrist[1],
+            middle_finger_mcp[0] - wrist[0]
+        )
+        features.append(hand_angle)
+        return features[:63]
+    def _finalize_training(self, gesture: GestureTemplate):
+        """Finalize gesture training"""
+        if len(gesture.samples) < 5:
+            gesture.trained = False
+            return
+        samples_array = np.array(gesture.samples)
+        gesture.centroid = np.mean(samples_array, axis=0).tolist()
+        distances = [
+            np.linalg.norm(s - gesture.centroid)
+            for s in samples_array
+        ]
+        gesture.threshold = np.mean(distances) * 1.5
+        gesture.trained = True
+        self.is_training = False
+        self.current_training_gesture = None
+    def cancel_training(self):
+        """Cancel current training session"""
+        if self.current_training_gesture:
+            self.gestures[self.current_training_gesture].samples = []
+        self.is_training = False
+        self.current_training_gesture = None
+    def recognize(self, landmarks: List[List[float]]) -> Optional[GestureRecognition]:
+        """Recognize a gesture from hand landmarks"""
+        if not self.recognition_enabled:
+            return None
+        features = self._extract_features(landmarks)
+        self.landmark_buffer.append(features)
+        if len(self.landmark_buffer) > self.max_buffer_size:
+            self.landmark_buffer.pop(0)
+        if len(self.landmark_buffer) < 3:
+            return None
+        avg_features = np.mean(self.landmark_buffer, axis=0)
+        best_match = None
+        best_confidence = 0
+        for gesture_id, gesture in self.gestures.items():
+            if not gesture.trained or not gesture.centroid:
+                continue
+            distance = np.linalg.norm(avg_features - gesture.centroid)
+            if distance < gesture.threshold:
+                confidence = 1 - (distance / gesture.threshold)
+                if confidence > best_confidence:
+                    best_confidence = confidence
+                    best_match = GestureRecognition(
+                        gesture_id=gesture_id,
+                        gesture_name=gesture.name,
+                        confidence=confidence,
+                        gesture_type=gesture.gesture_type,
+                        timestamp=datetime.now()
+                    )
+        self.last_recognition = best_match
+        return best_match
+    def enable_recognition(self):
+        """Enable real-time gesture recognition"""
+        self.recognition_enabled = True
+        trained_count = sum(1 for g in self.gestures.values() if g.trained)
+        return {
+            'enabled': True,
+            'trained_gestures': trained_count,
+            'gestures': [
+                {'id': g.gesture_id, 'name': g.name, 'trained': g.trained}
+                for g in self.gestures.values()
+            ]
+        }
+    def disable_recognition(self):
+        """Disable gesture recognition"""
+        self.recognition_enabled = False
+        return {'enabled': False}
+    def get_trained_gestures(self) -> List[Dict]:
+        """Get all trained gestures"""
+        return [
+            {
+                'id': g.gesture_id,
+                'name': g.name,
+                'description': g.description,
+                'type': g.gesture_type,
+                'trained': g.trained,
+                'samples': len(g.samples)
+            }
+            for g in self.gestures.values()
+        ]
+    def delete_gesture(self, gesture_id: str) -> Dict:
+        """Delete a gesture"""
+        if gesture_id in self.gestures:
+            del self.gestures[gesture_id]
+            return {'success': True}
+        return {'error': 'Gesture not found'}
+    def add_custom_gesture(self, name: str, description: str, gesture_type: str = "custom") -> str:
+        """Add a new custom gesture to train"""
+        gesture_id = f"custom_{name.lower().replace(' ', '_')}_{datetime.now().timestamp()}"
+        self.gestures[gesture_id] = GestureTemplate(
+            gesture_id=gesture_id,
+            name=name,
+            description=description,
+            gesture_type=gesture_type
+        )
+        return gesture_id
+    def export_model(self) -> Dict:
+        """Export gesture model for backup"""
+        return {
+            'user_id': self.user_id,
+            'gestures': [
+                {
+                    'gesture_id': g.gesture_id,
+                    'name': g.name,
+                    'description': g.description,
+                    'gesture_type': g.gesture_type,
+                    'trained': g.trained,
+                    'samples_count': len(g.samples),
+                    'centroid': g.centroid,
+                    'threshold': g.threshold,
+                    'created_at': g.created_at.isoformat()
+                }
+                for g in self.gestures.values()
+            ],
+            'export_timestamp': datetime.now().isoformat()
+        }
+    def import_model(self, model_data: Dict):
+        """Import gesture model from backup"""
+        for g_data in model_data.get('gestures', []):
+            gesture = GestureTemplate(
+                gesture_id=g_data['gesture_id'],
+                name=g_data['name'],
+                description=g_data.get('description', ''),
+                gesture_type=g_data.get('gesture_type', 'custom'),
+                trained=g_data.get('trained', False),
+                centroid=g_data.get('centroid'),
+                threshold=g_data.get('threshold', 0.3),
+                created_at=datetime.fromisoformat(g_data.get('created_at', datetime.now().isoformat()))
+            )
+            self.gestures[gesture.gesture_id] = gesture
+class FaceBlurProcessor:
+    """
+    Privacy filter - blurs face in camera feed before processing.
+    Uses MediaPipe Face Mesh to detect face region,
+    then applies Gaussian blur to that region.
+    """
+    def __init__(self, blur_strength: int = 50):
+        self.blur_strength = blur_strength
+        self.face_detected = False
+        self.face_box = None
+    def detect_face(self, landmarks: List) -> Optional[Tuple[int, int, int, int]]:
+        """Detect face region from face mesh landmarks"""
+        if not landmarks or len(landmarks) < 468:
+            return None
+        x_coords = [lm[0] for lm in landmarks[:468] if len(lm) >= 3]
+        y_coords = [lm[1] for lm in landmarks[:468] if len(lm) >= 3]
+        if not x_coords or not y_coords:
+            return None
+        min_x, max_x = min(x_coords), max(x_coords)
+        min_y, max_y = min(y_coords), max(y_coords)
+        padding = 20
+        min_x = max(0, int(min_x) - padding)
+        max_x = min(640, int(max_x) + padding)
+        min_y = max(0, int(min_y) - padding)
+        max_y = min(480, int(max_y) + padding)
+        self.face_detected = True
+        self.face_box = (min_x, min_y, max_x, max_y)
+        return self.face_box
+    def should_blur_region(self, x: int, y: int) -> bool:
+        """Check if a point is in the face region"""
+        if not self.face_box:
+            return False
+        min_x, min_y, max_x, max_y = self.face_box
+        return min_x <= x <= max_x and min_y <= y <= max_y
+class GestureSignalMapper:
+    """
+    Maps recognized gestures to learning signals.
+    Converts gesture recognition into behavioral signals
+    that the StudyOrchestrator can use.
+    """
+    def __init__(self):
+        self.gesture_to_signal = {
+            'thinking': {
+                'signal': 'cognitive_load',
+                'weight': 0.3,
+                'description': 'User is thinking deeply'
+            },
+            'confused': {
+                'signal': 'confusion',
+                'weight': 0.7,
+                'description': 'User seems confused'
+            },
+            'pause': {
+                'signal': 'break_needed',
+                'weight': 1.0,
+                'description': 'User wants to pause'
+            },
+            'got_it': {
+                'signal': 'understanding',
+                'weight': 0.5,
+                'description': 'User understands the concept'
+            },
+            'question': {
+                'signal': 'doubt_intent',
+                'weight': 0.8,
+                'description': 'User likely has a question'
+            }
+        }
+    def map_to_signal(self, recognition: GestureRecognition) -> Dict:
+        """Map gesture recognition to learning signal"""
+        mapping = self.gesture_to_signal.get(
+            recognition.gesture_id,
+            {'signal': 'unknown', 'weight': 0.5, 'description': 'Unknown gesture'}
+        )
+        return {
+            'signal_type': mapping['signal'],
+            'confidence': recognition.confidence * mapping['weight'],
+            'raw_confidence': recognition.confidence,
+            'gesture_name': recognition.gesture_name,
+            'description': mapping['description'],
+            'timestamp': recognition.timestamp.isoformat()
+        }
+    def add_custom_mapping(self, gesture_id: str, signal: str, weight: float):
+        """Add custom gesture to signal mapping"""
+        self.gesture_to_signal[gesture_id] = {
+            'signal': signal,
+            'weight': weight,
+            'description': f'Custom gesture: {gesture_id}'
+        }