namish10 commited on
Commit
22147a6
·
verified ·
1 Parent(s): f6ddf5e

Upload app/agents/hand_gesture_agent.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app/agents/hand_gesture_agent.py +475 -0
app/agents/hand_gesture_agent.py ADDED
@@ -0,0 +1,475 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hand Gesture Recognition Agent
3
+
4
+ Privacy-first approach:
5
+ 1. Camera feed is processed locally
6
+ 2. Face is auto-blurred before any processing
7
+ 3. Users TRAIN the model with their own hand gestures
8
+ 4. Gestures are used as behavioral/learning signals
9
+
10
+ Training Flow:
11
+ 1. User defines a gesture (e.g., "thinking", "confused", "pause")
12
+ 2. User performs the gesture multiple times for training
13
+ 3. Model learns the user's specific gesture pattern
14
+ 4. Real-time recognition during learning sessions
15
+ """
16
+
17
+ import numpy as np
18
+ from typing import Dict, List, Any, Optional, Tuple
19
+ from dataclasses import dataclass, field
20
+ from datetime import datetime
21
+ import json
22
+ import base64
23
+
24
+
25
+ @dataclass
26
+ class GestureTemplate:
27
+ """A gesture that the user is training"""
28
+ gesture_id: str
29
+ name: str
30
+ description: str
31
+ samples: List[List[float]] = field(default_factory=list)
32
+ centroid: Optional[List[float]] = None
33
+ threshold: float = 0.3
34
+ trained: bool = False
35
+ created_at: datetime = field(default_factory=datetime.now)
36
+ gesture_type: str = "learning_signal"
37
+
38
+
39
+ @dataclass
40
+ class HandLandmark:
41
+ """21 hand landmarks from MediaPipe"""
42
+ landmarks: List[Tuple[float, float, float]]
43
+ timestamp: datetime
44
+
45
+
46
+ @dataclass
47
+ class GestureRecognition:
48
+ """Result of gesture recognition"""
49
+ gesture_id: str
50
+ gesture_name: str
51
+ confidence: float
52
+ gesture_type: str
53
+ timestamp: datetime
54
+
55
+
56
+ class HandGestureAgent:
57
+ """
58
+ Hand gesture recognition agent with user-defined gestures.
59
+
60
+ Features:
61
+ - Local processing (privacy-safe)
62
+ - Face blur in camera feed
63
+ - User-defined gesture training
64
+ - Real-time recognition
65
+ - Learning signal integration
66
+ """
67
+
68
+ def __init__(self, user_id: str, config: Optional[Dict] = None):
69
+ self.user_id = user_id
70
+ self.config = config or {}
71
+
72
+ self.gestures: Dict[str, GestureTemplate] = {}
73
+ self.landmark_buffer = []
74
+ self.max_buffer_size = 30
75
+
76
+ self.is_training = False
77
+ self.current_training_gesture = None
78
+ self.training_samples_needed = 20
79
+
80
+ self.recognition_enabled = False
81
+ self.last_recognition = None
82
+
83
+ self._initialize_default_gestures()
84
+
85
+ def _initialize_default_gestures(self):
86
+ """Initialize with common learning gestures users might want to train"""
87
+ common_gestures = [
88
+ {
89
+ 'id': 'thinking',
90
+ 'name': 'Thinking',
91
+ 'description': 'Hand on chin - signals contemplation',
92
+ 'type': 'cognitive'
93
+ },
94
+ {
95
+ 'id': 'confused',
96
+ 'name': 'Confused',
97
+ 'description': 'Scratching head - signals confusion',
98
+ 'type': 'emotional'
99
+ },
100
+ {
101
+ 'id': 'pause',
102
+ 'name': 'Pause',
103
+ 'description': 'Open palm toward camera - take a break',
104
+ 'type': 'action'
105
+ },
106
+ {
107
+ 'id': 'got_it',
108
+ 'name': 'Got It!',
109
+ 'description': 'Thumbs up - signals understanding',
110
+ 'type': 'feedback'
111
+ },
112
+ {
113
+ 'id': 'question',
114
+ 'name': 'Question',
115
+ 'description': 'Hand near ear with questioning look',
116
+ 'type': 'doubt'
117
+ }
118
+ ]
119
+
120
+ for g in common_gestures:
121
+ self.gestures[g['id']] = GestureTemplate(
122
+ gesture_id=g['id'],
123
+ name=g['name'],
124
+ description=g['description'],
125
+ gesture_type=g['type']
126
+ )
127
+
128
+ def start_training(self, gesture_id: str) -> Dict:
129
+ """Start training a gesture"""
130
+ if gesture_id not in self.gestures:
131
+ return {'error': 'Gesture not found'}
132
+
133
+ self.is_training = True
134
+ self.current_training_gesture = gesture_id
135
+ self.gestures[gesture_id].samples = []
136
+
137
+ return {
138
+ 'gesture_id': gesture_id,
139
+ 'gesture_name': self.gestures[gesture_id].name,
140
+ 'samples_needed': self.training_samples_needed,
141
+ 'instructions': f"Perform the '{self.gestures[gesture_id].name}' gesture {self.training_samples_needed} times. Move your hand naturally."
142
+ }
143
+
144
+ def add_training_sample(self, landmarks: List[List[float]]) -> Dict:
145
+ """Add a hand landmark sample during training"""
146
+ if not self.is_training or not self.current_training_gesture:
147
+ return {'status': 'not_training'}
148
+
149
+ gesture = self.gestures[self.current_training_gesture]
150
+
151
+ features = self._extract_features(landmarks)
152
+ gesture.samples.append(features)
153
+
154
+ remaining = self.training_samples_needed - len(gesture.samples)
155
+
156
+ if remaining <= 0:
157
+ self._finalize_training(gesture)
158
+ return {
159
+ 'status': 'completed',
160
+ 'gesture_id': gesture.gesture_id,
161
+ 'samples_collected': len(gesture.samples),
162
+ 'message': f"'{gesture.name}' trained successfully!"
163
+ }
164
+
165
+ return {
166
+ 'status': 'collecting',
167
+ 'samples_collected': len(gesture.samples),
168
+ 'samples_remaining': remaining
169
+ }
170
+
171
+ def _extract_features(self, landmarks: List[List[float]]) -> List[float]:
172
+ """Extract features from hand landmarks"""
173
+ if not landmarks or len(landmarks) < 21:
174
+ return [0] * 63
175
+
176
+ features = []
177
+
178
+ palm_center = np.mean(landmarks[:9], axis=0)
179
+
180
+ finger_tips = [4, 8, 12, 16, 20]
181
+ finger_bases = [2, 5, 9, 13, 17]
182
+
183
+ for tip, base in zip(finger_tips, finger_bases):
184
+ if tip < len(landmarks) and base < len(landmarks):
185
+ dx = landmarks[tip][0] - landmarks[base][0]
186
+ dy = landmarks[tip][1] - landmarks[base][1]
187
+ dz = landmarks[tip][2] - landmarks[base][2]
188
+
189
+ dist = np.sqrt(dx**2 + dy**2 + dz**2)
190
+
191
+ features.extend([dx, dy, dz, dist])
192
+
193
+ for i in range(0, 21, 3):
194
+ if i < len(landmarks):
195
+ dx = landmarks[i][0] - palm_center[0]
196
+ dy = landmarks[i][1] - palm_center[1]
197
+ dz = landmarks[i][2] - palm_center[2]
198
+ features.extend([dx, dy, dz])
199
+
200
+ wrist = landmarks[0]
201
+ middle_finger_mcp = landmarks[9]
202
+
203
+ hand_angle = np.arctan2(
204
+ middle_finger_mcp[1] - wrist[1],
205
+ middle_finger_mcp[0] - wrist[0]
206
+ )
207
+ features.append(hand_angle)
208
+
209
+ return features[:63]
210
+
211
+ def _finalize_training(self, gesture: GestureTemplate):
212
+ """Finalize gesture training"""
213
+ if len(gesture.samples) < 5:
214
+ gesture.trained = False
215
+ return
216
+
217
+ samples_array = np.array(gesture.samples)
218
+ gesture.centroid = np.mean(samples_array, axis=0).tolist()
219
+
220
+ distances = [
221
+ np.linalg.norm(s - gesture.centroid)
222
+ for s in samples_array
223
+ ]
224
+ gesture.threshold = np.mean(distances) * 1.5
225
+
226
+ gesture.trained = True
227
+ self.is_training = False
228
+ self.current_training_gesture = None
229
+
230
+ def cancel_training(self):
231
+ """Cancel current training session"""
232
+ if self.current_training_gesture:
233
+ self.gestures[self.current_training_gesture].samples = []
234
+
235
+ self.is_training = False
236
+ self.current_training_gesture = None
237
+
238
+ def recognize(self, landmarks: List[List[float]]) -> Optional[GestureRecognition]:
239
+ """Recognize a gesture from hand landmarks"""
240
+ if not self.recognition_enabled:
241
+ return None
242
+
243
+ features = self._extract_features(landmarks)
244
+
245
+ self.landmark_buffer.append(features)
246
+ if len(self.landmark_buffer) > self.max_buffer_size:
247
+ self.landmark_buffer.pop(0)
248
+
249
+ if len(self.landmark_buffer) < 3:
250
+ return None
251
+
252
+ avg_features = np.mean(self.landmark_buffer, axis=0)
253
+
254
+ best_match = None
255
+ best_confidence = 0
256
+
257
+ for gesture_id, gesture in self.gestures.items():
258
+ if not gesture.trained or not gesture.centroid:
259
+ continue
260
+
261
+ distance = np.linalg.norm(avg_features - gesture.centroid)
262
+
263
+ if distance < gesture.threshold:
264
+ confidence = 1 - (distance / gesture.threshold)
265
+
266
+ if confidence > best_confidence:
267
+ best_confidence = confidence
268
+ best_match = GestureRecognition(
269
+ gesture_id=gesture_id,
270
+ gesture_name=gesture.name,
271
+ confidence=confidence,
272
+ gesture_type=gesture.gesture_type,
273
+ timestamp=datetime.now()
274
+ )
275
+
276
+ self.last_recognition = best_match
277
+ return best_match
278
+
279
+ def enable_recognition(self):
280
+ """Enable real-time gesture recognition"""
281
+ self.recognition_enabled = True
282
+ trained_count = sum(1 for g in self.gestures.values() if g.trained)
283
+ return {
284
+ 'enabled': True,
285
+ 'trained_gestures': trained_count,
286
+ 'gestures': [
287
+ {'id': g.gesture_id, 'name': g.name, 'trained': g.trained}
288
+ for g in self.gestures.values()
289
+ ]
290
+ }
291
+
292
+ def disable_recognition(self):
293
+ """Disable gesture recognition"""
294
+ self.recognition_enabled = False
295
+ return {'enabled': False}
296
+
297
+ def get_trained_gestures(self) -> List[Dict]:
298
+ """Get all trained gestures"""
299
+ return [
300
+ {
301
+ 'id': g.gesture_id,
302
+ 'name': g.name,
303
+ 'description': g.description,
304
+ 'type': g.gesture_type,
305
+ 'trained': g.trained,
306
+ 'samples': len(g.samples)
307
+ }
308
+ for g in self.gestures.values()
309
+ ]
310
+
311
+ def delete_gesture(self, gesture_id: str) -> Dict:
312
+ """Delete a gesture"""
313
+ if gesture_id in self.gestures:
314
+ del self.gestures[gesture_id]
315
+ return {'success': True}
316
+ return {'error': 'Gesture not found'}
317
+
318
+ def add_custom_gesture(self, name: str, description: str, gesture_type: str = "custom") -> str:
319
+ """Add a new custom gesture to train"""
320
+ gesture_id = f"custom_{name.lower().replace(' ', '_')}_{datetime.now().timestamp()}"
321
+
322
+ self.gestures[gesture_id] = GestureTemplate(
323
+ gesture_id=gesture_id,
324
+ name=name,
325
+ description=description,
326
+ gesture_type=gesture_type
327
+ )
328
+
329
+ return gesture_id
330
+
331
+ def export_model(self) -> Dict:
332
+ """Export gesture model for backup"""
333
+ return {
334
+ 'user_id': self.user_id,
335
+ 'gestures': [
336
+ {
337
+ 'gesture_id': g.gesture_id,
338
+ 'name': g.name,
339
+ 'description': g.description,
340
+ 'gesture_type': g.gesture_type,
341
+ 'trained': g.trained,
342
+ 'samples_count': len(g.samples),
343
+ 'centroid': g.centroid,
344
+ 'threshold': g.threshold,
345
+ 'created_at': g.created_at.isoformat()
346
+ }
347
+ for g in self.gestures.values()
348
+ ],
349
+ 'export_timestamp': datetime.now().isoformat()
350
+ }
351
+
352
+ def import_model(self, model_data: Dict):
353
+ """Import gesture model from backup"""
354
+ for g_data in model_data.get('gestures', []):
355
+ gesture = GestureTemplate(
356
+ gesture_id=g_data['gesture_id'],
357
+ name=g_data['name'],
358
+ description=g_data.get('description', ''),
359
+ gesture_type=g_data.get('gesture_type', 'custom'),
360
+ trained=g_data.get('trained', False),
361
+ centroid=g_data.get('centroid'),
362
+ threshold=g_data.get('threshold', 0.3),
363
+ created_at=datetime.fromisoformat(g_data.get('created_at', datetime.now().isoformat()))
364
+ )
365
+
366
+ self.gestures[gesture.gesture_id] = gesture
367
+
368
+
369
+ class FaceBlurProcessor:
370
+ """
371
+ Privacy filter - blurs face in camera feed before processing.
372
+
373
+ Uses MediaPipe Face Mesh to detect face region,
374
+ then applies Gaussian blur to that region.
375
+ """
376
+
377
+ def __init__(self, blur_strength: int = 50):
378
+ self.blur_strength = blur_strength
379
+ self.face_detected = False
380
+ self.face_box = None
381
+
382
+ def detect_face(self, landmarks: List) -> Optional[Tuple[int, int, int, int]]:
383
+ """Detect face region from face mesh landmarks"""
384
+ if not landmarks or len(landmarks) < 468:
385
+ return None
386
+
387
+ x_coords = [lm[0] for lm in landmarks[:468] if len(lm) >= 3]
388
+ y_coords = [lm[1] for lm in landmarks[:468] if len(lm) >= 3]
389
+
390
+ if not x_coords or not y_coords:
391
+ return None
392
+
393
+ min_x, max_x = min(x_coords), max(x_coords)
394
+ min_y, max_y = min(y_coords), max(y_coords)
395
+
396
+ padding = 20
397
+ min_x = max(0, int(min_x) - padding)
398
+ max_x = min(640, int(max_x) + padding)
399
+ min_y = max(0, int(min_y) - padding)
400
+ max_y = min(480, int(max_y) + padding)
401
+
402
+ self.face_detected = True
403
+ self.face_box = (min_x, min_y, max_x, max_y)
404
+
405
+ return self.face_box
406
+
407
+ def should_blur_region(self, x: int, y: int) -> bool:
408
+ """Check if a point is in the face region"""
409
+ if not self.face_box:
410
+ return False
411
+
412
+ min_x, min_y, max_x, max_y = self.face_box
413
+ return min_x <= x <= max_x and min_y <= y <= max_y
414
+
415
+
416
+ class GestureSignalMapper:
417
+ """
418
+ Maps recognized gestures to learning signals.
419
+
420
+ Converts gesture recognition into behavioral signals
421
+ that the StudyOrchestrator can use.
422
+ """
423
+
424
+ def __init__(self):
425
+ self.gesture_to_signal = {
426
+ 'thinking': {
427
+ 'signal': 'cognitive_load',
428
+ 'weight': 0.3,
429
+ 'description': 'User is thinking deeply'
430
+ },
431
+ 'confused': {
432
+ 'signal': 'confusion',
433
+ 'weight': 0.7,
434
+ 'description': 'User seems confused'
435
+ },
436
+ 'pause': {
437
+ 'signal': 'break_needed',
438
+ 'weight': 1.0,
439
+ 'description': 'User wants to pause'
440
+ },
441
+ 'got_it': {
442
+ 'signal': 'understanding',
443
+ 'weight': 0.5,
444
+ 'description': 'User understands the concept'
445
+ },
446
+ 'question': {
447
+ 'signal': 'doubt_intent',
448
+ 'weight': 0.8,
449
+ 'description': 'User likely has a question'
450
+ }
451
+ }
452
+
453
+ def map_to_signal(self, recognition: GestureRecognition) -> Dict:
454
+ """Map gesture recognition to learning signal"""
455
+ mapping = self.gesture_to_signal.get(
456
+ recognition.gesture_id,
457
+ {'signal': 'unknown', 'weight': 0.5, 'description': 'Unknown gesture'}
458
+ )
459
+
460
+ return {
461
+ 'signal_type': mapping['signal'],
462
+ 'confidence': recognition.confidence * mapping['weight'],
463
+ 'raw_confidence': recognition.confidence,
464
+ 'gesture_name': recognition.gesture_name,
465
+ 'description': mapping['description'],
466
+ 'timestamp': recognition.timestamp.isoformat()
467
+ }
468
+
469
+ def add_custom_mapping(self, gesture_id: str, signal: str, weight: float):
470
+ """Add custom gesture to signal mapping"""
471
+ self.gesture_to_signal[gesture_id] = {
472
+ 'signal': signal,
473
+ 'weight': weight,
474
+ 'description': f'Custom gesture: {gesture_id}'
475
+ }