namish10 commited on
Commit
f6ddf5e
·
verified ·
1 Parent(s): 5e38446

Upload app/agents/gesture_action_agent.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app/agents/gesture_action_agent.py +768 -0
app/agents/gesture_action_agent.py ADDED
@@ -0,0 +1,768 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gesture Action Agent
3
+
4
+ Maps hand gestures to LLM actions with OpenClaw-RL inspired learning.
5
+
6
+ Gesture-to-Action Mapping:
7
+ - 2 fingers + swipe right: Query ChatGPT + Gemini
8
+ - 2 fingers + swipe left: Query specific topic
9
+ - 1 finger + tap: Trigger RL learning loop
10
+ - Pinch gesture: Capture and query
11
+ - Open palm: Pause/stop
12
+
13
+ Inspired by:
14
+ - GestLLM: LLM-powered gesture interpretation
15
+ - GestOS: Multi-robot gesture orchestration
16
+ - GestureGPT: Free-form gesture understanding
17
+ - OpenClaw-RL: Personal agent optimization
18
+ """
19
+
20
+ import numpy as np
21
+ from typing import Dict, List, Any, Optional, Callable
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime
24
+ from enum import Enum
25
+ import logging
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class GestureAction(Enum):
31
+ """Actions triggered by gestures"""
32
+ QUERY_MULTI_LLM = "query_multi_llm"
33
+ QUERY_CHATGPT = "query_chatgpt"
34
+ QUERY_GEMINI = "query_gemini"
35
+ TRIGGER_RL_LOOP = "trigger_rl_loop"
36
+ CAPTURE_CONTENT = "capture_content"
37
+ GENERATE_PROMPT = "generate_prompt"
38
+ PAUSE_SESSION = "pause_session"
39
+ RESUME_SESSION = "resume_session"
40
+ SAVE_PROGRESS = "save_progress"
41
+ SWIPE_LEFT = "swipe_left"
42
+ SWIPE_RIGHT = "swipe_right"
43
+ SWIPE_UP = "swipe_up"
44
+ SWIPE_DOWN = "swipe_down"
45
+ PINCH = "pinch"
46
+ CUSTOM = "custom"
47
+
48
+
49
+ @dataclass
50
+ class SwipeGesture:
51
+ """Represents a swipe gesture with direction and speed"""
52
+ direction: str # "left", "right", "up", "down"
53
+ speed: float
54
+ start_x: float
55
+ start_y: float
56
+ end_x: float
57
+ end_y: float
58
+ finger_count: int
59
+ timestamp: datetime = field(default_factory=datetime.now)
60
+
61
+
62
+ @dataclass
63
+ class PinchGesture:
64
+ """Represents a pinch gesture"""
65
+ thumb_tip: List[float]
66
+ index_tip: List[float]
67
+ distance: float
68
+ gesture_type: str # "zoom_in", "zoom_out", "grab"
69
+ timestamp: datetime = field(default_factory=datetime.now)
70
+
71
+
72
+ @dataclass
73
+ class GestureActionEvent:
74
+ """An event triggered by a gesture"""
75
+ action: GestureAction
76
+ gesture_name: str
77
+ parameters: Dict[str, Any]
78
+ confidence: float
79
+ timestamp: datetime = field(default_factory=datetime.now)
80
+ llm_responses: List[Dict] = field(default_factory=list)
81
+ rl_feedback: Optional[Dict] = None
82
+
83
+
84
+ class SwipeDetector:
85
+ """
86
+ Detects swipe gestures from hand landmarks.
87
+
88
+ Uses finger position and movement to detect:
89
+ - Swipe direction (left, right, up, down)
90
+ - Number of fingers involved
91
+ - Swipe speed
92
+ """
93
+
94
+ def __init__(self):
95
+ self.position_history: List[Dict] = []
96
+ self.max_history = 30
97
+
98
+ self.swipe_threshold = 0.15
99
+ self.min_swipe_speed = 0.005
100
+ self.finger_count_window = 5
101
+
102
+ self._reset()
103
+
104
+ def _reset(self):
105
+ """Reset detector state"""
106
+ self.position_history = []
107
+ self.swipe_start = None
108
+ self.swipe_in_progress = False
109
+
110
+ def count_extended_fingers(self, landmarks: List[List[float]]) -> int:
111
+ """Count how many fingers are extended"""
112
+ if not landmarks or len(landmarks) < 21:
113
+ return 0
114
+
115
+ finger_tips = [4, 8, 12, 16, 20]
116
+ finger_bases = [3, 6, 10, 14, 18]
117
+
118
+ extended = 0
119
+
120
+ for tip, base in zip(finger_tips, finger_bases):
121
+ tip_y = landmarks[tip][1]
122
+ base_y = landmarks[base][1]
123
+
124
+ if tip < len(landmarks) and base < len(landmarks):
125
+ if tip_y < base_y:
126
+ extended += 1
127
+
128
+ return extended
129
+
130
+ def detect_swipe(
131
+ self,
132
+ landmarks: List[List[float]],
133
+ finger_count: Optional[int] = None
134
+ ) -> Optional[SwipeGesture]:
135
+ """Detect if a swipe gesture is being performed"""
136
+ if not landmarks or len(landmarks) < 21:
137
+ return None
138
+
139
+ wrist = landmarks[0]
140
+ middle_finger_mcp = landmarks[9]
141
+
142
+ current_pos = {
143
+ "x": middle_finger_mcp[0],
144
+ "y": middle_finger_mcp[1],
145
+ "z": middle_finger_mcp[2] if len(middle_finger_mcp) > 2 else 0,
146
+ "timestamp": datetime.now()
147
+ }
148
+
149
+ self.position_history.append(current_pos)
150
+ if len(self.position_history) > self.max_history:
151
+ self.position_history.pop(0)
152
+
153
+ if finger_count is None:
154
+ finger_count = self.count_extended_fingers(landmarks)
155
+
156
+ if not self.swipe_start and len(self.position_history) >= 3:
157
+ recent = self.position_history[-3:]
158
+ movement_x = abs(recent[-1]["x"] - recent[0]["x"])
159
+ movement_y = abs(recent[-1]["y"] - recent[0]["y"])
160
+
161
+ if movement_x > self.swipe_threshold or movement_y > self.swipe_threshold:
162
+ self.swipe_start = recent[0].copy()
163
+ self.swipe_in_progress = True
164
+
165
+ if self.swipe_in_progress and self.swipe_start:
166
+ current = current_pos
167
+ delta_x = current["x"] - self.swipe_start["x"]
168
+ delta_y = current["y"] - self.swipe_start["y"]
169
+
170
+ time_delta = (current["timestamp"] - self.swipe_start["timestamp"]).total_seconds()
171
+
172
+ if time_delta > 0:
173
+ speed = np.sqrt(delta_x**2 + delta_y**2) / time_delta
174
+ else:
175
+ speed = 0
176
+
177
+ if speed > self.min_swipe_speed and len(self.position_history) >= 10:
178
+ recent = self.position_history[-10:]
179
+ recent_movement_x = recent[-1]["x"] - recent[0]["x"]
180
+ recent_movement_y = recent[-1]["y"] - recent[0]["y"]
181
+
182
+ if abs(recent_movement_x) < 0.01 and abs(recent_movement_y) < 0.01:
183
+ direction = self._get_swipe_direction(
184
+ self.swipe_start["x"] - current["x"],
185
+ self.swipe_start["y"] - current["y"]
186
+ )
187
+
188
+ swipe = SwipeGesture(
189
+ direction=direction,
190
+ speed=speed,
191
+ start_x=self.swipe_start["x"],
192
+ start_y=self.swipe_start["y"],
193
+ end_x=current["x"],
194
+ end_y=current["y"],
195
+ finger_count=finger_count
196
+ )
197
+
198
+ self._reset()
199
+ return swipe
200
+
201
+ if self.swipe_in_progress and len(self.position_history) > 60:
202
+ self._reset()
203
+
204
+ return None
205
+
206
+ def _get_swipe_direction(self, dx: float, dy: float) -> str:
207
+ """Determine swipe direction from deltas"""
208
+ abs_dx = abs(dx)
209
+ abs_dy = abs(dy)
210
+
211
+ if abs_dx > abs_dy:
212
+ return "right" if dx > 0 else "left"
213
+ else:
214
+ return "down" if dy > 0 else "up"
215
+
216
+ def detect_pinch(
217
+ self,
218
+ landmarks: List[List[float]]
219
+ ) -> Optional[PinchGesture]:
220
+ """Detect pinch gesture (thumb + index finger)"""
221
+ if not landmarks or len(landmarks) < 21:
222
+ return None
223
+
224
+ thumb_tip = landmarks[4]
225
+ index_tip = landmarks[8]
226
+
227
+ distance = np.sqrt(
228
+ (thumb_tip[0] - index_tip[0])**2 +
229
+ (thumb_tip[1] - index_tip[1])**2 +
230
+ (thumb_tip[2] - index_tip[2])**2
231
+ )
232
+
233
+ if distance < 0.08:
234
+ gesture_type = "grab"
235
+ elif distance < 0.12:
236
+ gesture_type = "zoom_in"
237
+ elif distance > 0.2:
238
+ gesture_type = "zoom_out"
239
+ else:
240
+ return None
241
+
242
+ return PinchGesture(
243
+ thumb_tip=thumb_tip,
244
+ index_tip=index_tip,
245
+ distance=distance,
246
+ gesture_type=gesture_type
247
+ )
248
+
249
+
250
+ class GestureActionMapper:
251
+ """
252
+ Maps detected gestures to LLM actions.
253
+
254
+ Gesture-to-Action Rules:
255
+ - 2 fingers + swipe right: Query ChatGPT + Gemini
256
+ - 2 fingers + swipe left: Query previous context
257
+ - 1 finger + tap: Trigger RL learning loop
258
+ - Pinch: Capture and analyze content
259
+ - Open palm: Pause/stop
260
+ """
261
+
262
+ def __init__(self):
263
+ self.action_rules: Dict[GestureAction, Dict] = {
264
+ GestureAction.QUERY_MULTI_LLM: {
265
+ "trigger": {"finger_count": 2, "swipe": "right"},
266
+ "llm_providers": ["chatgpt", "gemini"],
267
+ "prompt_template": "Analyze this learning content and explain key concepts: {content}",
268
+ "auto_submit": True
269
+ },
270
+ GestureAction.QUERY_CHATGPT: {
271
+ "trigger": {"finger_count": 2, "swipe": "left"},
272
+ "llm_providers": ["chatgpt"],
273
+ "prompt_template": "Explain this topic in simple terms: {topic}",
274
+ "auto_submit": True
275
+ },
276
+ GestureAction.TRIGGER_RL_LOOP: {
277
+ "trigger": {"finger_count": 1, "gesture": "tap"},
278
+ "rl_mode": True,
279
+ "feedback_enabled": True
280
+ },
281
+ GestureAction.CAPTURE_CONTENT: {
282
+ "trigger": {"gesture": "pinch"},
283
+ "capture_mode": True
284
+ },
285
+ GestureAction.PAUSE_SESSION: {
286
+ "trigger": {"gesture": "open_palm", "duration": 1.0},
287
+ "action": "pause"
288
+ },
289
+ GestureAction.RESUME_SESSION: {
290
+ "trigger": {"gesture": "open_palm", "duration": 1.0},
291
+ "action": "resume"
292
+ }
293
+ }
294
+
295
+ self.custom_mappings: Dict[str, Dict] = {}
296
+ self.action_callbacks: Dict[GestureAction, List[Callable]] = {}
297
+
298
+ self.swipe_detector = SwipeDetector()
299
+ self.gesture_buffer: List[Dict] = []
300
+
301
+ def register_action_callback(
302
+ self,
303
+ action: GestureAction,
304
+ callback: Callable[[GestureActionEvent], None]
305
+ ):
306
+ """Register a callback for when an action is triggered"""
307
+ if action not in self.action_callbacks:
308
+ self.action_callbacks[action] = []
309
+ self.action_callbacks[action].append(callback)
310
+
311
+ def add_custom_mapping(
312
+ self,
313
+ name: str,
314
+ gesture_pattern: Dict,
315
+ action: GestureAction,
316
+ parameters: Dict
317
+ ):
318
+ """Add a custom gesture-to-action mapping"""
319
+ self.custom_mappings[name] = {
320
+ "pattern": gesture_pattern,
321
+ "action": action,
322
+ "parameters": parameters
323
+ }
324
+
325
+ def process_landmarks(
326
+ self,
327
+ landmarks: List[List[float]],
328
+ context: Optional[Dict] = None
329
+ ) -> List[GestureActionEvent]:
330
+ """Process hand landmarks and return triggered actions"""
331
+ events = []
332
+
333
+ if not landmarks:
334
+ return events
335
+
336
+ finger_count = self.swipe_detector.count_extended_fingers(landmarks)
337
+
338
+ swipe = self.swipe_detector.detect_swipe(landmarks, finger_count)
339
+ if swipe:
340
+ event = self._create_swipe_event(swipe, context)
341
+ if event:
342
+ events.append(event)
343
+
344
+ pinch = self.swipe_detector.detect_pinch(landmarks)
345
+ if pinch:
346
+ event = self._create_pinch_event(pinch, context)
347
+ if event:
348
+ events.append(event)
349
+
350
+ if finger_count >= 4:
351
+ event = self._create_open_palm_event(context)
352
+ if event:
353
+ events.append(event)
354
+
355
+ self.gesture_buffer.append({
356
+ "finger_count": finger_count,
357
+ "landmarks": landmarks,
358
+ "timestamp": datetime.now()
359
+ })
360
+
361
+ if len(self.gesture_buffer) > 100:
362
+ self.gesture_buffer = self.gesture_buffer[-50:]
363
+
364
+ return events
365
+
366
+ def _create_swipe_event(
367
+ self,
368
+ swipe: SwipeGesture,
369
+ context: Optional[Dict]
370
+ ) -> Optional[GestureActionEvent]:
371
+ """Create an action event from a swipe gesture"""
372
+
373
+ if swipe.finger_count == 2:
374
+ if swipe.direction == "right":
375
+ return GestureActionEvent(
376
+ action=GestureAction.QUERY_MULTI_LLM,
377
+ gesture_name=f"2_finger_swipe_{swipe.direction}",
378
+ parameters={
379
+ "content": context.get("current_content", "") if context else "",
380
+ "topic": context.get("topic", "") if context else "",
381
+ "swipe_speed": swipe.speed
382
+ },
383
+ confidence=min(swipe.speed * 100, 1.0)
384
+ )
385
+ elif swipe.direction == "left":
386
+ return GestureActionEvent(
387
+ action=GestureAction.SWIPE_LEFT,
388
+ gesture_name=f"2_finger_swipe_{swipe.direction}",
389
+ parameters={
390
+ "topic": context.get("previous_topic", "") if context else "",
391
+ "swipe_speed": swipe.speed
392
+ },
393
+ confidence=min(swipe.speed * 100, 1.0)
394
+ )
395
+
396
+ elif swipe.finger_count == 1:
397
+ return GestureActionEvent(
398
+ action=GestureAction.TRIGGER_RL_LOOP,
399
+ gesture_name=f"1_finger_swipe_{swipe.direction}",
400
+ parameters={
401
+ "direction": swipe.direction,
402
+ "topic": context.get("topic", "") if context else ""
403
+ },
404
+ confidence=min(swipe.speed * 100, 1.0)
405
+ )
406
+
407
+ elif swipe.finger_count == 3:
408
+ if swipe.direction == "up":
409
+ return GestureActionEvent(
410
+ action=GestureAction.SAVE_PROGRESS,
411
+ gesture_name=f"3_finger_swipe_{swipe.direction}",
412
+ parameters={"auto": True},
413
+ confidence=min(swipe.speed * 100, 1.0)
414
+ )
415
+
416
+ return None
417
+
418
+ def _create_pinch_event(
419
+ self,
420
+ pinch: PinchGesture,
421
+ context: Optional[Dict]
422
+ ) -> Optional[GestureActionEvent]:
423
+ """Create an action event from a pinch gesture"""
424
+ return GestureActionEvent(
425
+ action=GestureAction.CAPTURE_CONTENT,
426
+ gesture_name=f"pinch_{pinch.gesture_type}",
427
+ parameters={
428
+ "gesture_type": pinch.gesture_type,
429
+ "distance": pinch.distance,
430
+ "capture_area": context.get("selected_area") if context else None
431
+ },
432
+ confidence=0.9
433
+ )
434
+
435
+ def _create_open_palm_event(
436
+ self,
437
+ context: Optional[Dict]
438
+ ) -> Optional[GestureActionEvent]:
439
+ """Create an action event from open palm gesture"""
440
+ recent_gestures = [
441
+ g for g in self.gesture_buffer[-10:]
442
+ if g["finger_count"] >= 4
443
+ ]
444
+
445
+ if len(recent_gestures) >= 5:
446
+ return GestureActionEvent(
447
+ action=GestureAction.PAUSE_SESSION,
448
+ gesture_name="open_palm",
449
+ parameters={"duration": 2.0},
450
+ confidence=0.8
451
+ )
452
+
453
+ return None
454
+
455
+ def execute_action(
456
+ self,
457
+ event: GestureActionEvent,
458
+ llm_orchestrator=None,
459
+ rl_loop=None
460
+ ) -> GestureActionEvent:
461
+ """Execute a gesture action"""
462
+
463
+ if event.action == GestureAction.QUERY_MULTI_LLM and llm_orchestrator:
464
+ event = self._execute_llm_query(event, llm_orchestrator)
465
+
466
+ elif event.action == GestureAction.TRIGGER_RL_LOOP and rl_loop:
467
+ event = self._execute_rl_loop(event, rl_loop)
468
+
469
+ elif event.action == GestureAction.CAPTURE_CONTENT:
470
+ event = self._execute_capture(event)
471
+
472
+ for callback in self.action_callbacks.get(event.action, []):
473
+ try:
474
+ callback(event)
475
+ except Exception as e:
476
+ logger.error(f"Callback error: {e}")
477
+
478
+ return event
479
+
480
+ def _execute_llm_query(
481
+ self,
482
+ event: GestureActionEvent,
483
+ orchestrator
484
+ ) -> GestureActionEvent:
485
+ """Execute LLM query from gesture"""
486
+ import asyncio
487
+
488
+ async def query():
489
+ from .llm_orchestrator_agent import LLMRequest, LLMProvider
490
+
491
+ content = event.parameters.get("content", "")
492
+ topic = event.parameters.get("topic", "")
493
+
494
+ if content:
495
+ prompt = f"Analyze this content and explain key concepts:\n\n{content}"
496
+ elif topic:
497
+ prompt = f"Explain '{topic}' in simple terms for learning purposes."
498
+ else:
499
+ prompt = "What are the main concepts I should understand about this topic?"
500
+
501
+ request = LLMRequest(
502
+ prompt=prompt,
503
+ system_prompt="You are a helpful learning assistant that explains concepts clearly.",
504
+ providers=[LLMProvider.CHATGPT, LLMProvider.GEMINI]
505
+ )
506
+
507
+ responses = await orchestrator.query_parallel(request)
508
+
509
+ event.llm_responses = [
510
+ {
511
+ "provider": r.provider.value,
512
+ "content": r.content,
513
+ "success": r.success
514
+ }
515
+ for r in responses
516
+ ]
517
+
518
+ return event
519
+
520
+ try:
521
+ loop = asyncio.get_event_loop()
522
+ if loop.is_running():
523
+ asyncio.create_task(query())
524
+ else:
525
+ event = loop.run_until_complete(query())
526
+ except Exception as e:
527
+ logger.error(f"LLM query error: {e}")
528
+ event.llm_responses = [{"error": str(e)}]
529
+
530
+ return event
531
+
532
+ def _execute_rl_loop(
533
+ self,
534
+ event: GestureActionEvent,
535
+ rl_loop
536
+ ) -> GestureActionEvent:
537
+ """Execute RL learning loop (OpenClaw-RL style)"""
538
+
539
+ event.rl_feedback = {
540
+ "mode": "rl_optimization",
541
+ "action": event.action.value,
542
+ "topic": event.parameters.get("topic", ""),
543
+ "timestamp": datetime.now().isoformat()
544
+ }
545
+
546
+ return event
547
+
548
+ def _execute_capture(
549
+ self,
550
+ event: GestureActionEvent
551
+ ) -> GestureActionEvent:
552
+ """Execute content capture"""
553
+ event.parameters["captured"] = True
554
+ event.parameters["capture_timestamp"] = datetime.now().isoformat()
555
+ return event
556
+
557
+ def get_available_actions(self) -> List[Dict]:
558
+ """Get list of available gesture actions"""
559
+ actions = []
560
+
561
+ for action, rules in self.action_rules.items():
562
+ trigger = rules.get("trigger", {})
563
+ actions.append({
564
+ "action": action.value,
565
+ "gesture": trigger.get("gesture", f"{trigger.get('finger_count', '?')}_finger_swipe_{trigger.get('swipe', '')}"),
566
+ "description": self._get_action_description(action)
567
+ })
568
+
569
+ return actions
570
+
571
+ def _get_action_description(self, action: GestureAction) -> str:
572
+ """Get description for an action"""
573
+ descriptions = {
574
+ GestureAction.QUERY_MULTI_LLM: "Query multiple AI models simultaneously for the best answer",
575
+ GestureAction.QUERY_CHATGPT: "Query ChatGPT for an answer",
576
+ GestureAction.QUERY_GEMINI: "Query Gemini for an answer",
577
+ GestureAction.CUSTOM: "Custom action based on your gesture"
578
+ }
579
+ # Also check for string values
580
+ if hasattr(action, 'value'):
581
+ val = action.value
582
+ if val == 'trigger_rl_loop':
583
+ return "Start the RL learning loop to optimize responses"
584
+ elif val == 'capture_content':
585
+ return "Capture and save the current content or selection"
586
+ elif val == 'pause_session':
587
+ return "Pause the current learning session"
588
+ elif val == 'resume_session':
589
+ return "Resume the paused learning session"
590
+ return descriptions.get(action, "Action description not available")
591
+
592
+
593
+ class RLLearningLoop:
594
+ """
595
+ OpenClaw-RL inspired learning loop.
596
+
597
+ Features:
598
+ - Next-state feedback
599
+ - Binary reward signals
600
+ - Personal agent optimization
601
+ - User preference learning
602
+ """
603
+
604
+ def __init__(self, user_id: str):
605
+ self.user_id = user_id
606
+
607
+ self.conversation_history: List[Dict] = []
608
+ self.feedback_history: List[Dict] = []
609
+ self.preference_weights: Dict[str, float] = {}
610
+
611
+ self.is_active = False
612
+ self.current_context = None
613
+
614
+ self.reward_history: List[float] = []
615
+ self.policy_updates: List[Dict] = []
616
+
617
+ def start_loop(self, context: Dict):
618
+ """Start the RL learning loop"""
619
+ self.is_active = True
620
+ self.current_context = context
621
+ self.conversation_history = []
622
+
623
+ def end_loop(self):
624
+ """End the RL learning loop"""
625
+ self.is_active = False
626
+ self._compute_rewards()
627
+
628
+ def add_interaction(
629
+ self,
630
+ action: str,
631
+ response: str,
632
+ context: Optional[Dict] = None
633
+ ):
634
+ """Add an interaction to the learning loop"""
635
+ interaction = {
636
+ "action": action,
637
+ "response": response,
638
+ "context": context or self.current_context,
639
+ "timestamp": datetime.now().isoformat()
640
+ }
641
+
642
+ self.conversation_history.append(interaction)
643
+
644
+ if len(self.conversation_history) >= 3:
645
+ self._update_preferences()
646
+
647
+ def add_feedback(self, quality: int, comment: Optional[str] = None):
648
+ """
649
+ Add user feedback (OpenClaw-RL style).
650
+
651
+ Quality scale:
652
+ - 1: Very bad
653
+ - 2: Bad
654
+ - 3: Neutral
655
+ - 4: Good
656
+ - 5: Very good
657
+ """
658
+ feedback = {
659
+ "quality": quality,
660
+ "comment": comment,
661
+ "timestamp": datetime.now().isoformat(),
662
+ "interaction_index": len(self.conversation_history) - 1
663
+ }
664
+
665
+ self.feedback_history.append(feedback)
666
+
667
+ self._process_feedback(feedback)
668
+
669
+ def _process_feedback(self, feedback: Dict):
670
+ """Process feedback for RL (binary reward + OPD)"""
671
+
672
+ reward = 1.0 if feedback["quality"] >= 4 else -0.5
673
+
674
+ self.reward_history.append(reward)
675
+
676
+ if len(self.conversation_history) > 0:
677
+ last_interaction = self.conversation_history[-1]
678
+ last_interaction["reward"] = reward
679
+ last_interaction["feedback_quality"] = feedback["quality"]
680
+
681
+ if feedback["quality"] >= 4 and feedback["comment"]:
682
+ self._apply_opd(feedback["comment"])
683
+
684
+ def _apply_opd(self, corrective_hint: str):
685
+ """
686
+ Apply On-Policy Distillation from corrective hints.
687
+
688
+ OpenClaw-RL OPD extracts textual hints from next state
689
+ and constructs enhanced teacher context.
690
+ """
691
+
692
+ opd_update = {
693
+ "corrective_hint": corrective_hint,
694
+ "timestamp": datetime.now().isoformat(),
695
+ "policy_version": len(self.policy_updates) + 1
696
+ }
697
+
698
+ self.policy_updates.append(opd_update)
699
+
700
+ for word in corrective_hint.lower().split():
701
+ if word not in self.preference_weights:
702
+ self.preference_weights[word] = 0.5
703
+ self.preference_weights[word] += 0.1
704
+
705
+ def _compute_rewards(self):
706
+ """Compute aggregate rewards from feedback"""
707
+ if not self.reward_history:
708
+ return
709
+
710
+ avg_reward = sum(self.reward_history) / len(self.reward_history)
711
+
712
+ self.policy_updates.append({
713
+ "type": "session_summary",
714
+ "total_interactions": len(self.conversation_history),
715
+ "total_feedback": len(self.feedback_history),
716
+ "average_reward": avg_reward,
717
+ "timestamp": datetime.now().isoformat()
718
+ })
719
+
720
+ def _update_preferences(self):
721
+ """Update preference weights based on recent interactions"""
722
+ recent = self.conversation_history[-3:]
723
+
724
+ for interaction in recent:
725
+ response = interaction.get("response", "").lower()
726
+
727
+ positive_words = ["helpful", "clear", "understand", "good", "thanks"]
728
+ negative_words = ["confusing", "wrong", "don't", "no", "bad"]
729
+
730
+ for word in positive_words:
731
+ if word in response:
732
+ self.preference_weights[word] = self.preference_weights.get(word, 0.5) + 0.05
733
+
734
+ for word in negative_words:
735
+ if word in response:
736
+ self.preference_weights[word] = max(0.1, self.preference_weights.get(word, 0.5) - 0.05)
737
+
738
+ def get_status(self) -> Dict:
739
+ """Get current RL loop status"""
740
+ return {
741
+ "is_active": self.is_active,
742
+ "total_interactions": len(self.conversation_history),
743
+ "total_feedback": len(self.feedback_history),
744
+ "average_reward": (
745
+ sum(self.reward_history) / len(self.reward_history)
746
+ if self.reward_history else 0
747
+ ),
748
+ "policy_updates": len(self.policy_updates),
749
+ "top_preferences": sorted(
750
+ self.preference_weights.items(),
751
+ key=lambda x: x[1],
752
+ reverse=True
753
+ )[:10]
754
+ }
755
+
756
+ def generate_personalized_prompt(self, base_prompt: str) -> str:
757
+ """Generate a personalized prompt based on learned preferences"""
758
+
759
+ top_prefs = self.get_status()["top_preferences"]
760
+
761
+ if not top_prefs:
762
+ return base_prompt
763
+
764
+ style_guide = " ".join([word for word, _ in top_prefs[:5]])
765
+
766
+ enhanced_prompt = f"{base_prompt}\n\nStyle guidance: {style_guide}"
767
+
768
+ return enhanced_prompt